mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
feat: Update dockerfile to support readability
This commit is contained in:
parent
2a68af1b94
commit
5dc7e63792
2 changed files with 8 additions and 3 deletions
|
@ -57,6 +57,11 @@ RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip >
|
||||||
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
|
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
|
||||||
&& chmod +x SingleFile-master/cli/single-file
|
&& chmod +x SingleFile-master/cli/single-file
|
||||||
|
|
||||||
|
RUN wget -qO - https://github.com/pirate/readability-extractor/archive/master.zip > readability.zip \
|
||||||
|
&& unzip -q readability.zip \
|
||||||
|
&& npm install --prefix readability-extractor-master --production > /dev/null 2>&1 \
|
||||||
|
&& chmod +x readability-extractor-master/readability-extractor
|
||||||
|
|
||||||
# Run everything from here on out as non-privileged user
|
# Run everything from here on out as non-privileged user
|
||||||
RUN groupadd --system archivebox \
|
RUN groupadd --system archivebox \
|
||||||
&& useradd --system --create-home --gid archivebox --groups audio,video archivebox
|
&& useradd --system --create-home --gid archivebox --groups audio,video archivebox
|
||||||
|
@ -74,7 +79,8 @@ EXPOSE 8000
|
||||||
ENV IN_DOCKER=True \
|
ENV IN_DOCKER=True \
|
||||||
CHROME_BINARY=google-chrome \
|
CHROME_BINARY=google-chrome \
|
||||||
CHROME_SANDBOX=False \
|
CHROME_SANDBOX=False \
|
||||||
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file"
|
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file" \
|
||||||
|
READABILITY_BINARY="$EXTRA_PATH/readability-extractor-master/readability-extractor"
|
||||||
|
|
||||||
RUN env ALLOW_ROOT=True archivebox version
|
RUN env ALLOW_ROOT=True archivebox version
|
||||||
|
|
||||||
|
|
|
@ -50,8 +50,7 @@ def should_save_readability(link: Link, out_dir: Optional[str]=None) -> bool:
|
||||||
if is_static_file(link.url):
|
if is_static_file(link.url):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
output = Path(out_dir or link.link_dir) / 'readability.json'
|
output = Path(out_dir or link.link_dir) / 'readability'
|
||||||
print(output, SAVE_READABILITY)
|
|
||||||
return SAVE_READABILITY and (not output.exists())
|
return SAVE_READABILITY and (not output.exists())
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue