mirror of
https://github.com/sissbruecker/linkding
synced 2024-11-21 19:03:02 +00:00
Remove ads and cookie banners from HTML snapshots (#695)
* integrate ublock with single-file * reuse chromium profile
This commit is contained in:
parent
22a1fc80ad
commit
25470edb2c
8 changed files with 89 additions and 10 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -191,3 +191,6 @@ typings/
|
|||
/tmp
|
||||
# Database file
|
||||
/data
|
||||
# ublock + chromium
|
||||
/uBlock0.chromium
|
||||
/chromium-profile
|
||||
|
|
|
@ -18,11 +18,12 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
def create_snapshot(url: str, filepath: str):
|
||||
singlefile_path = settings.LD_SINGLEFILE_PATH
|
||||
# parse string to list of arguments
|
||||
singlefile_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
|
||||
# parse options to list of arguments
|
||||
ublock_options = shlex.split(settings.LD_SINGLEFILE_UBLOCK_OPTIONS)
|
||||
custom_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
|
||||
temp_filepath = filepath + ".tmp"
|
||||
# concat lists
|
||||
args = [singlefile_path] + singlefile_options + [url, temp_filepath]
|
||||
args = [singlefile_path] + ublock_options + custom_options + [url, temp_filepath]
|
||||
try:
|
||||
# Use start_new_session=True to create a new process group
|
||||
process = subprocess.Popen(args, start_new_session=True)
|
||||
|
|
|
@ -61,6 +61,10 @@ class SingleFileServiceTestCase(TestCase):
|
|||
|
||||
expected_args = [
|
||||
"single-file",
|
||||
'--browser-arg="--headless=new"',
|
||||
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||
'--browser-arg="--no-sandbox"',
|
||||
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||
"http://example.com",
|
||||
self.html_filepath + ".tmp",
|
||||
]
|
||||
|
@ -79,6 +83,10 @@ class SingleFileServiceTestCase(TestCase):
|
|||
|
||||
expected_args = [
|
||||
"single-file",
|
||||
'--browser-arg="--headless=new"',
|
||||
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||
'--browser-arg="--no-sandbox"',
|
||||
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||
"--some-option",
|
||||
"some value",
|
||||
"--another-option",
|
||||
|
@ -97,9 +105,9 @@ class SingleFileServiceTestCase(TestCase):
|
|||
with mock.patch("subprocess.Popen", return_value=mock_process):
|
||||
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
||||
|
||||
mock_process.wait.assert_called_with(timeout=60)
|
||||
mock_process.wait.assert_called_with(timeout=120)
|
||||
|
||||
@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=120)
|
||||
@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=180)
|
||||
def test_create_snapshot_custom_timeout_setting(self):
|
||||
mock_process = mock.Mock()
|
||||
mock_process.wait.return_value = 0
|
||||
|
@ -108,4 +116,4 @@ class SingleFileServiceTestCase(TestCase):
|
|||
with mock.patch("subprocess.Popen", return_value=mock_process):
|
||||
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
||||
|
||||
mock_process.wait.assert_called_with(timeout=120)
|
||||
mock_process.wait.assert_called_with(timeout=180)
|
||||
|
|
|
@ -9,6 +9,8 @@ mkdir -p data
|
|||
mkdir -p data/favicons
|
||||
# Create assets folder if it does not exist
|
||||
mkdir -p data/assets
|
||||
# Create chromium profile folder if it does not exist
|
||||
mkdir -p chromium-profile
|
||||
|
||||
# Generate secret key file if it does not exist
|
||||
python manage.py generate_secret_key
|
||||
|
@ -21,8 +23,9 @@ python manage.py create_initial_superuser
|
|||
# Migrate legacy background tasks to Huey
|
||||
python manage.py migrate_tasks
|
||||
|
||||
# Ensure the DB folder is owned by the right user
|
||||
# Ensure folders are owned by the right user
|
||||
chown -R www-data: /etc/linkding/data
|
||||
chown -R www-data: /etc/linkding/chromium-profile
|
||||
|
||||
# Start background task processor using supervisord, unless explicitly disabled
|
||||
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
|
||||
|
|
|
@ -99,10 +99,29 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
|
|||
CMD ["./bootstrap.sh"]
|
||||
|
||||
|
||||
FROM node:18-alpine AS ublock-build
|
||||
WORKDIR /etc/linkding
|
||||
# Install necessary tools
|
||||
RUN apk add --no-cache curl jq unzip
|
||||
# Fetch the latest release tag
|
||||
# Download the library
|
||||
# Unzip the library
|
||||
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
|
||||
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
|
||||
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
|
||||
unzip uBlock0.zip
|
||||
# Patch assets.json to enable easylist-cookies by default
|
||||
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
|
||||
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||
|
||||
|
||||
FROM linkding AS linkding-plus
|
||||
# install node, chromium
|
||||
RUN apk update && apk add nodejs npm chromium
|
||||
# install single-file from fork for now, which contains several hotfixes
|
||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
|
||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
|
||||
# copy uBlock0
|
||||
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
|
||||
# enable snapshot support
|
||||
ENV LD_ENABLE_SNAPSHOTS=True
|
||||
|
|
|
@ -96,6 +96,24 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
|
|||
|
||||
CMD ["./bootstrap.sh"]
|
||||
|
||||
|
||||
FROM node:18-alpine AS ublock-build
|
||||
WORKDIR /etc/linkding
|
||||
# Install necessary tools
|
||||
RUN apk add --no-cache curl jq unzip
|
||||
# Fetch the latest release tag
|
||||
# Download the library
|
||||
# Unzip the library
|
||||
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
|
||||
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
|
||||
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
|
||||
unzip uBlock0.zip
|
||||
# Patch assets.json to enable easylist-cookies by default
|
||||
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
|
||||
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||
|
||||
|
||||
FROM linkding AS linkding-plus
|
||||
# install chromium
|
||||
RUN apt-get update && apt-get -y install chromium
|
||||
|
@ -106,6 +124,8 @@ RUN apt-get install -y gnupg2 apt-transport-https ca-certificates && \
|
|||
echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
||||
apt-get update && apt-get install -y nodejs
|
||||
# install single-file from fork for now, which contains several hotfixes
|
||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
|
||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
|
||||
# copy uBlock0
|
||||
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
|
||||
# enable snapshot support
|
||||
ENV LD_ENABLE_SNAPSHOTS=True
|
||||
|
|
13
scripts/setup-ublock.sh
Executable file
13
scripts/setup-ublock.sh
Executable file
|
@ -0,0 +1,13 @@
|
|||
rm -rf ublock0.chromium
|
||||
|
||||
TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name')
|
||||
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip
|
||||
curl -L -o uBlock0.zip $DOWNLOAD_URL
|
||||
unzip uBlock0.zip
|
||||
rm uBlock0.zip
|
||||
|
||||
curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||
jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json
|
||||
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||
|
||||
mkdir -p chromium-profile
|
|
@ -12,6 +12,7 @@ https://docs.djangoproject.com/en/2.2/ref/settings/
|
|||
|
||||
import json
|
||||
import os
|
||||
import shlex
|
||||
|
||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
@ -294,8 +295,19 @@ LD_ENABLE_SNAPSHOTS = os.getenv("LD_ENABLE_SNAPSHOTS", False) in (
|
|||
"1",
|
||||
)
|
||||
LD_SINGLEFILE_PATH = os.getenv("LD_SINGLEFILE_PATH", "single-file")
|
||||
LD_SINGLEFILE_UBLOCK_OPTIONS = os.getenv(
|
||||
"LD_SINGLEFILE_UBLOCK_OPTIONS",
|
||||
shlex.join(
|
||||
[
|
||||
'--browser-arg="--headless=new"',
|
||||
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||
'--browser-arg="--no-sandbox"',
|
||||
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||
]
|
||||
),
|
||||
)
|
||||
LD_SINGLEFILE_OPTIONS = os.getenv("LD_SINGLEFILE_OPTIONS", "")
|
||||
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 60))
|
||||
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 120))
|
||||
|
||||
# Monolith isn't used at the moment, as the local snapshot implementation
|
||||
# switched to single-file after the prototype. Keeping this around in case
|
||||
|
|
Loading…
Reference in a new issue