rename OUTPUT_DIR to DATA_DIR

This commit is contained in:
Nick Sweeting 2024-09-30 17:44:18 -07:00
parent 363a499289
commit b913e6f426
No known key found for this signature in database
28 changed files with 128 additions and 138 deletions

View file

@ -16,7 +16,7 @@ if str(PACKAGE_DIR) not in sys.path:
from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
os.environ['OUTPUT_DIR'] = str(DATA_DIR)
os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR)
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
# print('INSTALLING MONKEY PATCHES')

View file

@ -8,10 +8,11 @@ import argparse
from typing import List, Optional, IO
from ..main import add
from archivebox.misc.util import docstring
from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
from ..main import add
from ..parsers import PARSERS
from ..config.legacy import OUTPUT_DIR, ONLY_NEW
from ..logging_util import SmartFormatter, accept_stdin, stderr
@ -32,7 +33,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--update', #'-u',
action='store_true',
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
default=not ARCHIVING_CONFIG.ONLY_NEW, # when ONLY_NEW=True we skip updating old links
help="Also retry previously skipped/failed links when adding new links",
)
parser.add_argument(
@ -117,7 +118,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
init=command.init,
extractors=command.extract,
parser=command.parser,
out_dir=pwd or OUTPUT_DIR,
out_dir=pwd or DATA_DIR,
)

View file

@ -5,12 +5,13 @@ __command__ = 'archivebox config'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import config
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..main import config
from ..logging_util import SmartFormatter, accept_stdin
@ -56,7 +57,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
get=command.get,
set=command.set,
reset=command.reset,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,12 +5,12 @@ __command__ = 'archivebox help'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import help
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..main import help
from ..logging_util import SmartFormatter, reject_stdin
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.parse_args(args or ())
reject_stdin(__command__, stdin)
help(out_dir=pwd or OUTPUT_DIR)
help(out_dir=Path(pwd) if pwd else DATA_DIR)
if __name__ == '__main__':

View file

@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import init
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
@ -44,7 +44,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
force=command.force,
quick=command.quick,
setup=command.setup,
out_dir=pwd or OUTPUT_DIR,
out_dir=pwd or DATA_DIR,
)

View file

@ -5,12 +5,12 @@ __command__ = 'archivebox list'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import list_all
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..main import list_all
from ..index import (
LINK_FILTERS,
get_indexed_folders,
@ -131,7 +131,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
json=command.json,
html=command.html,
with_headers=command.with_headers,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)
raise SystemExit(not matching_folders)

View file

@ -4,19 +4,19 @@ __package__ = 'archivebox.cli'
__command__ = 'archivebox manage'
import sys
from pathlib import Path
from typing import Optional, List, IO
from ..main import manage
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..main import manage
@docstring(manage.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
manage(
args=args,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -9,10 +9,10 @@ import argparse
from pathlib import Path
from typing import List, Optional, IO
from ..main import oneshot
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, accept_stdin, stderr
from ..main import oneshot
@docstring(oneshot.__doc__)
@ -46,7 +46,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--out-dir',
type=str,
default=OUTPUT_DIR,
default=DATA_DIR,
help= "Path to save the single archive folder to, e.g. ./example.com_archive"
)
command = parser.parse_args(args or ())

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox remove'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import remove
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, accept_stdin
from ..main import remove
@docstring(remove.__doc__)
@ -74,7 +74,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
after=command.after,
yes=command.yes,
delete=command.delete,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox schedule'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import schedule
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
from ..main import schedule
@docstring(schedule.__doc__)
@ -108,7 +108,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
overwrite=command.overwrite,
update=command.update,
import_path=command.import_path,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox server'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import server
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR, BIND_ADDR
from archivebox.config import DATA_DIR, SERVER_CONFIG
from ..logging_util import SmartFormatter, reject_stdin
from ..main import server
@docstring(server.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
'runserver_args',
nargs='*',
type=str,
default=[BIND_ADDR],
default=[SERVER_CONFIG.BIND_ADDR],
help='Arguments to pass to Django runserver'
)
parser.add_argument(
@ -68,7 +68,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
init=command.init,
quick_init=command.quick_init,
createsuperuser=command.createsuperuser,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox setup'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import setup
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
from ..main import setup
@docstring(setup.__doc__)
@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
setup(
# force=command.force,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox shell'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import shell
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
from ..main import shell
@docstring(shell.__doc__)
@ -26,7 +26,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
reject_stdin(__command__, stdin)
shell(
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox status'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import status
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
from ..main import status
@docstring(status.__doc__)
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.parse_args(args or ())
reject_stdin(__command__, stdin)
status(out_dir=pwd or OUTPUT_DIR)
status(out_dir=Path(pwd) if pwd else DATA_DIR)
if __name__ == '__main__':

View file

@ -5,12 +5,11 @@ __command__ = 'archivebox update'
import sys
import argparse
from pathlib import Path
from typing import List, Optional, IO
from ..main import update
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..index import (
LINK_FILTERS,
get_indexed_folders,
@ -25,6 +24,7 @@ from ..index import (
get_unrecognized_folders,
)
from ..logging_util import SmartFormatter, accept_stdin
from ..main import update
@docstring(update.__doc__)
@ -127,7 +127,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
status=command.status,
after=command.after,
before=command.before,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
extractors=command.extract,
)

View file

@ -5,13 +5,13 @@ __command__ = 'archivebox version'
import sys
import argparse
from pathlib import Path
from typing import Optional, List, IO
from ..main import version
from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR
from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin
from ..main import version
@docstring(version.__doc__)
@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
version(
quiet=command.quiet,
out_dir=pwd or OUTPUT_DIR,
out_dir=Path(pwd) if pwd else DATA_DIR,
)

View file

@ -15,7 +15,7 @@ TEST_CONFIG = {
'USE_COLOR': 'False',
'SHOW_PROGRESS': 'False',
'OUTPUT_DIR': 'data.tests',
'DATA_DIR': 'data.tests',
'SAVE_ARCHIVE_DOT_ORG': 'False',
'SAVE_TITLE': 'False',
@ -27,12 +27,12 @@ TEST_CONFIG = {
'USE_YOUTUBEDL': 'False',
}
OUTPUT_DIR = 'data.tests'
DATA_DIR = 'data.tests'
os.environ.update(TEST_CONFIG)
from ..main import init
from ..index import load_main_index
from ..config.legacy import (
from archivebox.config.constants import (
SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME,
@ -101,22 +101,22 @@ def output_hidden(show_failing=True):
class TestInit(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)
def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_basic_init(self):
with output_hidden():
archivebox_init.main([])
assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
assert len(load_main_index(out_dir=DATA_DIR)) == 0
def test_conflicting_init(self):
with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
f.write('test')
try:
@ -126,11 +126,11 @@ class TestInit(unittest.TestCase):
except SystemExit:
pass
assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
try:
load_main_index(out_dir=OUTPUT_DIR)
load_main_index(out_dir=DATA_DIR)
assert False, 'load_main_index should raise an exception when no index is present'
except Exception:
pass
@ -138,36 +138,36 @@ class TestInit(unittest.TestCase):
def test_no_dirty_state(self):
with output_hidden():
init()
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
shutil.rmtree(DATA_DIR, ignore_errors=True)
with output_hidden():
init()
class TestAdd(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)
with output_hidden():
init()
def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_add_arg_url(self):
with output_hidden():
archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 30
def test_add_arg_file(self):
test_file = Path(OUTPUT_DIR) / 'test.txt'
test_file = Path(DATA_DIR) / 'test.txt'
with open(test_file, 'w+', encoding='utf') as f:
f.write(test_urls)
with output_hidden():
archivebox_add.main([test_file])
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 12
os.remove(test_file)
@ -175,40 +175,40 @@ class TestAdd(unittest.TestCase):
with output_hidden():
archivebox_add.main([], stdin=test_urls)
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 12
class TestRemove(unittest.TestCase):
def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)
with output_hidden():
init()
archivebox_add.main([], stdin=test_urls)
# def tearDown(self):
# shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
# shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_remove_exact(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 11
def test_remove_regex(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 4
def test_remove_domain(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 10
def test_remove_none(self):

View file

@ -36,7 +36,6 @@ class ConfigDict(BaseConfig, benedict, total=False):
IN_DOCKER: bool
PACKAGE_DIR: Path
OUTPUT_DIR: Path
CONFIG_FILE: Path
ONLY_NEW: bool
TIMEOUT: int

View file

@ -60,7 +60,6 @@ class ConstantsDict(Mapping):
LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp'
OUTPUT_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME

View file

@ -44,7 +44,7 @@ import django
from django.db.backends.sqlite3.base import Database as sqlite3
from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
from .constants import CONSTANTS, TIMEZONE
from .constants import *
from .config_stubs import (
ConfigValue,
@ -57,8 +57,9 @@ from ..misc.logging import (
)
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from ..plugins_auth.ldap.apps import LDAP_CONFIG
from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
ANSI = SHELL_CONFIG.ANSI
LDAP = LDAP_CONFIG.LDAP_ENABLED
@ -331,7 +332,7 @@ def load_config_val(key: str,
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
config_path = CONSTANTS.CONFIG_FILE
if config_path.exists():
@ -351,7 +352,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
from archivebox.misc.system import atomic_write
@ -785,7 +786,7 @@ def bump_startup_progress_bar():
def setup_django_minimal():
# sys.path.append(str(CONSTANTS.PACKAGE_DIR))
# os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup()
raise Exception('dont use this anymore')

View file

@ -21,8 +21,7 @@ from django import forms
from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
from archivebox.config import VERSION
from archivebox.config import VERSION, DATA_DIR
from archivebox.misc.util import htmldecode, urldecode
from core.models import Snapshot, ArchiveResult, Tag
@ -536,11 +535,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
links = [snapshot.as_link() for snapshot in queryset]
if len(links) < 3:
# run syncronously if there are only 1 or 2 links
archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
else:
# otherwise run in a background worker
result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": CONFIG.OUTPUT_DIR})
result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
messages.success(
request,
mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
@ -552,7 +551,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
def update_snapshots(self, request, queryset):
links = [snapshot.as_link() for snapshot in queryset]
result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR})
result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
messages.success(
request,
@ -581,7 +580,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
def overwrite_snapshots(self, request, queryset):
links = [snapshot.as_link() for snapshot in queryset]
result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR})
result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
messages.success(
request,
@ -592,7 +591,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
description="☠️ Delete"
)
def delete_snapshots(self, request, queryset):
remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
messages.success(
request,
mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
@ -732,7 +731,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
)
def output_summary(self, result):
snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>',
result.output,

View file

@ -243,7 +243,7 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i
log_indexing_process_finished()
@enforce_types
def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]:
def load_main_index(out_dir: Path | str=DATA_DIR, warn: bool=True) -> List[Link]:
"""parse and load existing index with any new links from import_path merged in"""
from core.models import Snapshot
try:

View file

@ -8,18 +8,15 @@ from typing import List, Tuple, Iterator
from django.db.models import QuerySet
from django.db import transaction
from .schema import Link
from archivebox.misc.util import enforce_types, parse_date
from ..config.legacy import (
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)
from archivebox.config import DATA_DIR, GENERAL_CONFIG
from .schema import Link
### Main Links Index
@enforce_types
def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
def parse_sql_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
from core.models import Snapshot
return (
@ -28,7 +25,7 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
)
@enforce_types
def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=DATA_DIR) -> None:
if atomic:
with transaction.atomic():
return snapshots.delete()
@ -44,7 +41,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
info['created_by_id'] = created_by_id or get_or_create_system_user_pk()
tag_list = list(dict.fromkeys(
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')
))
info.pop('tags')
@ -95,7 +92,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
@enforce_types
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
def write_sql_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
for link in links:
# with transaction.atomic():
# write_link_to_sql_index(link)
@ -103,7 +100,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by
@enforce_types
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
def write_sql_link_details(link: Link, out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
from core.models import Snapshot
# with transaction.atomic():
@ -120,7 +117,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
snap.title = link.title
tag_list = list(
{tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')}
{tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')}
| set(snap.tags.values_list('name', flat=True))
)
@ -130,7 +127,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
@enforce_types
def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
def list_migrations(out_dir: Path=DATA_DIR) -> List[Tuple[bool, str]]:
from django.core.management import call_command
out = StringIO()
call_command("showmigrations", list=True, stdout=out)
@ -146,7 +143,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
return migrations
@enforce_types
def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]:
from django.core.management import call_command
out1, out2 = StringIO(), StringIO()
@ -160,6 +157,6 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
]
@enforce_types
def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
def get_admins(out_dir: Path=DATA_DIR) -> List[str]:
from django.contrib.auth.models import User
return User.objects.filter(is_superuser=True)

View file

@ -13,7 +13,6 @@ IN_DOCKER=False
IN_QEMU=False
PUID=501
PGID=20
OUTPUT_DIR=/opt/archivebox/data
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
ONLY_NEW=True
TIMEOUT=60
@ -173,7 +172,6 @@ IN_DOCKER = false
IN_QEMU = false
PUID = 501
PGID = 20
OUTPUT_DIR = "/opt/archivebox/data"
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
ONLY_NEW = true
TIMEOUT = 60

View file

@ -13,21 +13,16 @@ from typing import IO, Tuple, List, Optional
from datetime import datetime, timezone
from pathlib import Path
from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
from archivebox.misc.system import atomic_write
from ..config.legacy import (
ANSI,
OUTPUT_DIR,
SOURCES_DIR_NAME,
TIMEOUT,
stderr,
hint,
)
from archivebox.misc.logging import stderr, hint
from archivebox.misc.util import (
basename,
htmldecode,
download_url,
enforce_types,
)
from ..index.schema import Link
from ..logging_util import TimedProgress, log_source_saved
@ -38,7 +33,6 @@ from . import pocket_html
from . import pinboard_rss
from . import shaarli_rss
from . import medium_rss
from . import netscape_html
from . import generic_rss
from . import generic_json
@ -79,7 +73,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
parse a list of URLS without touching the filesystem
"""
timer = TimedProgress(TIMEOUT * 4)
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
#urls = list(map(lambda x: x + "\n", urls))
file = StringIO()
file.writelines(urls)
@ -98,7 +92,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut
RSS feed, bookmarks export, or text file
"""
timer = TimedProgress(TIMEOUT * 4)
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
with open(source_file, 'r', encoding='utf-8') as file:
links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser)
@ -148,9 +142,9 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
@enforce_types
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=DATA_DIR) -> str:
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
source_path = str(CONSTANTS.SOURCES_DIR / filename.format(ts=ts))
referenced_texts = ''
@ -167,10 +161,10 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
@enforce_types
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
def save_file_as_source(path: str, timeout: int=ARCHIVING_CONFIG.TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=DATA_DIR) -> str:
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
source_path = str(CONSTANTS.SOURCES_DIR / filename.format(basename=basename(path), ts=ts))
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
# Source is a URL that needs to be downloaded
@ -183,9 +177,9 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
except Exception as e:
timer.end()
print('{}[!] Failed to download {}{}\n'.format(
ANSI['red'],
SHELL_CONFIG.ANSI['red'],
path,
ANSI['reset'],
SHELL_CONFIG.ANSI['reset'],
))
print(' ', e)
raise e

View file

@ -1,10 +1,11 @@
import time
import uuid
from functools import wraps
from django.db import connection, transaction
from django.utils import timezone
from huey.exceptions import TaskLockedException
from archivebox.config import CONSTANTS
class SqliteSemaphore:
def __init__(self, db_path, table_name, name, value=1, timeout=None):
self.db_path = db_path
@ -68,7 +69,8 @@ class SqliteSemaphore:
return cursor.rowcount > 0
LOCKS_DB_PATH = settings.CONFIG.OUTPUT_DIR / 'locks.sqlite3'
LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
"""

View file

@ -2,7 +2,6 @@ from pathlib import Path
from archivebox.config import DATA_DIR, CONSTANTS
OUTPUT_DIR = DATA_DIR
LOGS_DIR = CONSTANTS.LOGS_DIR
TMP_DIR = CONSTANTS.TMP_DIR

View file

@ -2,7 +2,7 @@
socket = 127.0.0.1:3031
chdir = ../
http = 0.0.0.0:8001
env = OUTPUT_DIR=./data
env = DATA_DIR=./data
wsgi-file = archivebox/core/wsgi.py
processes = 4
threads = 1