feat: Update dockerfile to support readability

This commit is contained in:
Cristian 2020-08-11 11:52:43 -05:00
parent 2a68af1b94
commit 5dc7e63792
2 changed files with 8 additions and 3 deletions

View file

@ -57,6 +57,11 @@ RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip >
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
&& chmod +x SingleFile-master/cli/single-file
RUN wget -qO - https://github.com/pirate/readability-extractor/archive/master.zip > readability.zip \
&& unzip -q readability.zip \
&& npm install --prefix readability-extractor-master --production > /dev/null 2>&1 \
&& chmod +x readability-extractor-master/readability-extractor
# Run everything from here on out as non-privileged user
RUN groupadd --system archivebox \
&& useradd --system --create-home --gid archivebox --groups audio,video archivebox
@ -74,7 +79,8 @@ EXPOSE 8000
ENV IN_DOCKER=True \
CHROME_BINARY=google-chrome \
CHROME_SANDBOX=False \
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file"
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file" \
READABILITY_BINARY="$EXTRA_PATH/readability-extractor-master/readability-extractor"
RUN env ALLOW_ROOT=True archivebox version

View file

@ -50,8 +50,7 @@ def should_save_readability(link: Link, out_dir: Optional[str]=None) -> bool:
if is_static_file(link.url):
return False
output = Path(out_dir or link.link_dir) / 'readability.json'
print(output, SAVE_READABILITY)
output = Path(out_dir or link.link_dir) / 'readability'
return SAVE_READABILITY and (not output.exists())