ArchiveBox/tests/fixtures.py
Ross Williams 310b4d1242 Add htmltotext extractor
Saves HTML text nodes and selected element attributes in
`htmltotext.txt` for each Snapshot. Primarily intended to be used
for search indexing.
2023-10-23 21:42:32 -04:00

29 lines
708 B
Python

import os
import subprocess
import pytest
@pytest.fixture
def process(tmp_path):
os.chdir(tmp_path)
process = subprocess.run(['archivebox', 'init'], capture_output=True)
return process
@pytest.fixture
def disable_extractors_dict():
env = os.environ.copy()
env.update({
"USE_WGET": "false",
"USE_SINGLEFILE": "false",
"USE_READABILITY": "false",
"USE_MERCURY": "false",
"SAVE_HTMLTOTEXT": "false",
"SAVE_PDF": "false",
"SAVE_SCREENSHOT": "false",
"SAVE_DOM": "false",
"SAVE_HEADERS": "false",
"USE_GIT": "false",
"SAVE_MEDIA": "false",
"SAVE_ARCHIVE_DOT_ORG": "false"
})
return env