mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-21 19:53:06 +00:00
rename OUTPUT_DIR to DATA_DIR
This commit is contained in:
parent
363a499289
commit
b913e6f426
28 changed files with 128 additions and 138 deletions
|
@ -16,7 +16,7 @@ if str(PACKAGE_DIR) not in sys.path:
|
|||
|
||||
from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||
|
||||
os.environ['OUTPUT_DIR'] = str(DATA_DIR)
|
||||
os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR)
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
||||
|
||||
# print('INSTALLING MONKEY PATCHES')
|
||||
|
|
|
@ -8,10 +8,11 @@ import argparse
|
|||
|
||||
from typing import List, Optional, IO
|
||||
|
||||
from ..main import add
|
||||
from archivebox.misc.util import docstring
|
||||
from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
|
||||
|
||||
from ..main import add
|
||||
from ..parsers import PARSERS
|
||||
from ..config.legacy import OUTPUT_DIR, ONLY_NEW
|
||||
from ..logging_util import SmartFormatter, accept_stdin, stderr
|
||||
|
||||
|
||||
|
@ -32,7 +33,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--update', #'-u',
|
||||
action='store_true',
|
||||
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
|
||||
default=not ARCHIVING_CONFIG.ONLY_NEW, # when ONLY_NEW=True we skip updating old links
|
||||
help="Also retry previously skipped/failed links when adding new links",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -117,7 +118,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
init=command.init,
|
||||
extractors=command.extract,
|
||||
parser=command.parser,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=pwd or DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,12 +5,13 @@ __command__ = 'archivebox config'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import config
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..main import config
|
||||
from ..logging_util import SmartFormatter, accept_stdin
|
||||
|
||||
|
||||
|
@ -56,7 +57,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
get=command.get,
|
||||
set=command.set,
|
||||
reset=command.reset,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ __command__ = 'archivebox help'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import help
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..main import help
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.parse_args(args or ())
|
||||
reject_stdin(__command__, stdin)
|
||||
|
||||
help(out_dir=pwd or OUTPUT_DIR)
|
||||
help(out_dir=Path(pwd) if pwd else DATA_DIR)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -10,7 +10,7 @@ from typing import Optional, List, IO
|
|||
|
||||
from ..main import init
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
|
||||
|
||||
|
@ -44,7 +44,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
force=command.force,
|
||||
quick=command.quick,
|
||||
setup=command.setup,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=pwd or DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ __command__ = 'archivebox list'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import list_all
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..main import list_all
|
||||
from ..index import (
|
||||
LINK_FILTERS,
|
||||
get_indexed_folders,
|
||||
|
@ -131,7 +131,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
json=command.json,
|
||||
html=command.html,
|
||||
with_headers=command.with_headers,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
raise SystemExit(not matching_folders)
|
||||
|
||||
|
|
|
@ -4,19 +4,19 @@ __package__ = 'archivebox.cli'
|
|||
__command__ = 'archivebox manage'
|
||||
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import manage
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..main import manage
|
||||
|
||||
|
||||
@docstring(manage.__doc__)
|
||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||
manage(
|
||||
args=args,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -9,10 +9,10 @@ import argparse
|
|||
from pathlib import Path
|
||||
from typing import List, Optional, IO
|
||||
|
||||
from ..main import oneshot
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, accept_stdin, stderr
|
||||
from ..main import oneshot
|
||||
|
||||
|
||||
@docstring(oneshot.__doc__)
|
||||
|
@ -46,7 +46,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--out-dir',
|
||||
type=str,
|
||||
default=OUTPUT_DIR,
|
||||
default=DATA_DIR,
|
||||
help= "Path to save the single archive folder to, e.g. ./example.com_archive"
|
||||
)
|
||||
command = parser.parse_args(args or ())
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox remove'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import remove
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, accept_stdin
|
||||
from ..main import remove
|
||||
|
||||
|
||||
@docstring(remove.__doc__)
|
||||
|
@ -74,7 +74,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
after=command.after,
|
||||
yes=command.yes,
|
||||
delete=command.delete,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox schedule'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import schedule
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import schedule
|
||||
|
||||
|
||||
@docstring(schedule.__doc__)
|
||||
|
@ -108,7 +108,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
overwrite=command.overwrite,
|
||||
update=command.update,
|
||||
import_path=command.import_path,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox server'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import server
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR, BIND_ADDR
|
||||
from archivebox.config import DATA_DIR, SERVER_CONFIG
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import server
|
||||
|
||||
@docstring(server.__doc__)
|
||||
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
||||
|
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
'runserver_args',
|
||||
nargs='*',
|
||||
type=str,
|
||||
default=[BIND_ADDR],
|
||||
default=[SERVER_CONFIG.BIND_ADDR],
|
||||
help='Arguments to pass to Django runserver'
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -68,7 +68,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
init=command.init,
|
||||
quick_init=command.quick_init,
|
||||
createsuperuser=command.createsuperuser,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox setup'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import setup
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import setup
|
||||
|
||||
|
||||
@docstring(setup.__doc__)
|
||||
|
@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
|
||||
setup(
|
||||
# force=command.force,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox shell'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import shell
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import shell
|
||||
|
||||
|
||||
@docstring(shell.__doc__)
|
||||
|
@ -26,7 +26,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
reject_stdin(__command__, stdin)
|
||||
|
||||
shell(
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox status'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import status
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import status
|
||||
|
||||
|
||||
@docstring(status.__doc__)
|
||||
|
@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.parse_args(args or ())
|
||||
reject_stdin(__command__, stdin)
|
||||
|
||||
status(out_dir=pwd or OUTPUT_DIR)
|
||||
status(out_dir=Path(pwd) if pwd else DATA_DIR)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -5,12 +5,11 @@ __command__ = 'archivebox update'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, IO
|
||||
|
||||
from ..main import update
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..index import (
|
||||
LINK_FILTERS,
|
||||
get_indexed_folders,
|
||||
|
@ -25,6 +24,7 @@ from ..index import (
|
|||
get_unrecognized_folders,
|
||||
)
|
||||
from ..logging_util import SmartFormatter, accept_stdin
|
||||
from ..main import update
|
||||
|
||||
|
||||
@docstring(update.__doc__)
|
||||
|
@ -127,7 +127,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
status=command.status,
|
||||
after=command.after,
|
||||
before=command.before,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
extractors=command.extract,
|
||||
)
|
||||
|
||||
|
|
|
@ -5,13 +5,13 @@ __command__ = 'archivebox version'
|
|||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, IO
|
||||
|
||||
from ..main import version
|
||||
from archivebox.misc.util import docstring
|
||||
from ..config.legacy import OUTPUT_DIR
|
||||
from archivebox.config import DATA_DIR
|
||||
from ..logging_util import SmartFormatter, reject_stdin
|
||||
from ..main import version
|
||||
|
||||
|
||||
@docstring(version.__doc__)
|
||||
|
@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
|
||||
version(
|
||||
quiet=command.quiet,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
out_dir=Path(pwd) if pwd else DATA_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ TEST_CONFIG = {
|
|||
'USE_COLOR': 'False',
|
||||
'SHOW_PROGRESS': 'False',
|
||||
|
||||
'OUTPUT_DIR': 'data.tests',
|
||||
'DATA_DIR': 'data.tests',
|
||||
|
||||
'SAVE_ARCHIVE_DOT_ORG': 'False',
|
||||
'SAVE_TITLE': 'False',
|
||||
|
@ -27,12 +27,12 @@ TEST_CONFIG = {
|
|||
'USE_YOUTUBEDL': 'False',
|
||||
}
|
||||
|
||||
OUTPUT_DIR = 'data.tests'
|
||||
DATA_DIR = 'data.tests'
|
||||
os.environ.update(TEST_CONFIG)
|
||||
|
||||
from ..main import init
|
||||
from ..index import load_main_index
|
||||
from ..config.legacy import (
|
||||
from archivebox.config.constants import (
|
||||
SQL_INDEX_FILENAME,
|
||||
JSON_INDEX_FILENAME,
|
||||
HTML_INDEX_FILENAME,
|
||||
|
@ -101,22 +101,22 @@ def output_hidden(show_failing=True):
|
|||
|
||||
class TestInit(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
|
||||
shutil.rmtree(DATA_DIR, ignore_errors=True)
|
||||
|
||||
def test_basic_init(self):
|
||||
with output_hidden():
|
||||
archivebox_init.main([])
|
||||
|
||||
assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0
|
||||
assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
assert len(load_main_index(out_dir=DATA_DIR)) == 0
|
||||
|
||||
def test_conflicting_init(self):
|
||||
with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
|
||||
with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
|
||||
f.write('test')
|
||||
|
||||
try:
|
||||
|
@ -126,11 +126,11 @@ class TestInit(unittest.TestCase):
|
|||
except SystemExit:
|
||||
pass
|
||||
|
||||
assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
|
||||
assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
|
||||
assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
|
||||
try:
|
||||
load_main_index(out_dir=OUTPUT_DIR)
|
||||
load_main_index(out_dir=DATA_DIR)
|
||||
assert False, 'load_main_index should raise an exception when no index is present'
|
||||
except Exception:
|
||||
pass
|
||||
|
@ -138,36 +138,36 @@ class TestInit(unittest.TestCase):
|
|||
def test_no_dirty_state(self):
|
||||
with output_hidden():
|
||||
init()
|
||||
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
|
||||
shutil.rmtree(DATA_DIR, ignore_errors=True)
|
||||
with output_hidden():
|
||||
init()
|
||||
|
||||
|
||||
class TestAdd(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
with output_hidden():
|
||||
init()
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
|
||||
shutil.rmtree(DATA_DIR, ignore_errors=True)
|
||||
|
||||
def test_add_arg_url(self):
|
||||
with output_hidden():
|
||||
archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 30
|
||||
|
||||
def test_add_arg_file(self):
|
||||
test_file = Path(OUTPUT_DIR) / 'test.txt'
|
||||
test_file = Path(DATA_DIR) / 'test.txt'
|
||||
with open(test_file, 'w+', encoding='utf') as f:
|
||||
f.write(test_urls)
|
||||
|
||||
with output_hidden():
|
||||
archivebox_add.main([test_file])
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 12
|
||||
os.remove(test_file)
|
||||
|
||||
|
@ -175,40 +175,40 @@ class TestAdd(unittest.TestCase):
|
|||
with output_hidden():
|
||||
archivebox_add.main([], stdin=test_urls)
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 12
|
||||
|
||||
|
||||
class TestRemove(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
with output_hidden():
|
||||
init()
|
||||
archivebox_add.main([], stdin=test_urls)
|
||||
|
||||
# def tearDown(self):
|
||||
# shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
|
||||
# shutil.rmtree(DATA_DIR, ignore_errors=True)
|
||||
|
||||
|
||||
def test_remove_exact(self):
|
||||
with output_hidden():
|
||||
archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 11
|
||||
|
||||
def test_remove_regex(self):
|
||||
with output_hidden():
|
||||
archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 4
|
||||
|
||||
def test_remove_domain(self):
|
||||
with output_hidden():
|
||||
archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
|
||||
|
||||
all_links = load_main_index(out_dir=OUTPUT_DIR)
|
||||
all_links = load_main_index(out_dir=DATA_DIR)
|
||||
assert len(all_links) == 10
|
||||
|
||||
def test_remove_none(self):
|
||||
|
|
|
@ -36,7 +36,6 @@ class ConfigDict(BaseConfig, benedict, total=False):
|
|||
IN_DOCKER: bool
|
||||
|
||||
PACKAGE_DIR: Path
|
||||
OUTPUT_DIR: Path
|
||||
CONFIG_FILE: Path
|
||||
ONLY_NEW: bool
|
||||
TIMEOUT: int
|
||||
|
|
|
@ -60,7 +60,6 @@ class ConstantsDict(Mapping):
|
|||
LIB_DIR_NAME: str = 'lib'
|
||||
TMP_DIR_NAME: str = 'tmp'
|
||||
|
||||
OUTPUT_DIR: Path = DATA_DIR
|
||||
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
|
||||
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
|
||||
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
|
||||
|
|
|
@ -44,7 +44,7 @@ import django
|
|||
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||
|
||||
|
||||
from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR
|
||||
from .constants import CONSTANTS, TIMEZONE
|
||||
from .constants import *
|
||||
from .config_stubs import (
|
||||
ConfigValue,
|
||||
|
@ -57,8 +57,9 @@ from ..misc.logging import (
|
|||
)
|
||||
|
||||
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
||||
from ..plugins_auth.ldap.apps import LDAP_CONFIG
|
||||
from ..plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
|
||||
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||
|
||||
ANSI = SHELL_CONFIG.ANSI
|
||||
LDAP = LDAP_CONFIG.LDAP_ENABLED
|
||||
|
||||
|
@ -331,7 +332,7 @@ def load_config_val(key: str,
|
|||
|
||||
|
||||
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
|
||||
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
|
||||
|
||||
config_path = CONSTANTS.CONFIG_FILE
|
||||
if config_path.exists():
|
||||
|
@ -351,7 +352,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic
|
|||
|
||||
|
||||
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
|
||||
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
|
||||
|
||||
from archivebox.misc.system import atomic_write
|
||||
|
||||
|
@ -785,7 +786,7 @@ def bump_startup_progress_bar():
|
|||
|
||||
def setup_django_minimal():
|
||||
# sys.path.append(str(CONSTANTS.PACKAGE_DIR))
|
||||
# os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR))
|
||||
# os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
|
||||
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
# django.setup()
|
||||
raise Exception('dont use this anymore')
|
||||
|
|
|
@ -21,8 +21,7 @@ from django import forms
|
|||
from signal_webhooks.admin import WebhookAdmin
|
||||
from signal_webhooks.utils import get_webhook_model
|
||||
|
||||
from archivebox.config import VERSION
|
||||
|
||||
from archivebox.config import VERSION, DATA_DIR
|
||||
from archivebox.misc.util import htmldecode, urldecode
|
||||
|
||||
from core.models import Snapshot, ArchiveResult, Tag
|
||||
|
@ -536,11 +535,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
|||
links = [snapshot.as_link() for snapshot in queryset]
|
||||
if len(links) < 3:
|
||||
# run syncronously if there are only 1 or 2 links
|
||||
archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
|
||||
archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
|
||||
messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
|
||||
else:
|
||||
# otherwise run in a background worker
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": CONFIG.OUTPUT_DIR})
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
|
||||
|
@ -552,7 +551,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
|||
def update_snapshots(self, request, queryset):
|
||||
links = [snapshot.as_link() for snapshot in queryset]
|
||||
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR})
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
request,
|
||||
|
@ -581,7 +580,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
|||
def overwrite_snapshots(self, request, queryset):
|
||||
links = [snapshot.as_link() for snapshot in queryset]
|
||||
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR})
|
||||
result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
|
||||
|
||||
messages.success(
|
||||
request,
|
||||
|
@ -592,7 +591,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
|
|||
description="☠️ Delete"
|
||||
)
|
||||
def delete_snapshots(self, request, queryset):
|
||||
remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
|
||||
remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
|
||||
messages.success(
|
||||
request,
|
||||
mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
|
||||
|
@ -732,7 +731,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
|
|||
)
|
||||
|
||||
def output_summary(self, result):
|
||||
snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
|
||||
snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
|
||||
output_str = format_html(
|
||||
'<pre style="display: inline-block">{}</pre><br/>',
|
||||
result.output,
|
||||
|
|
|
@ -243,7 +243,7 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i
|
|||
log_indexing_process_finished()
|
||||
|
||||
@enforce_types
|
||||
def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]:
|
||||
def load_main_index(out_dir: Path | str=DATA_DIR, warn: bool=True) -> List[Link]:
|
||||
"""parse and load existing index with any new links from import_path merged in"""
|
||||
from core.models import Snapshot
|
||||
try:
|
||||
|
|
|
@ -8,18 +8,15 @@ from typing import List, Tuple, Iterator
|
|||
from django.db.models import QuerySet
|
||||
from django.db import transaction
|
||||
|
||||
from .schema import Link
|
||||
from archivebox.misc.util import enforce_types, parse_date
|
||||
from ..config.legacy import (
|
||||
OUTPUT_DIR,
|
||||
TAG_SEPARATOR_PATTERN,
|
||||
)
|
||||
from archivebox.config import DATA_DIR, GENERAL_CONFIG
|
||||
|
||||
from .schema import Link
|
||||
|
||||
### Main Links Index
|
||||
|
||||
@enforce_types
|
||||
def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
||||
def parse_sql_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
|
||||
from core.models import Snapshot
|
||||
|
||||
return (
|
||||
|
@ -28,7 +25,7 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
|
|||
)
|
||||
|
||||
@enforce_types
|
||||
def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
|
||||
def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=DATA_DIR) -> None:
|
||||
if atomic:
|
||||
with transaction.atomic():
|
||||
return snapshots.delete()
|
||||
|
@ -44,7 +41,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
|
|||
info['created_by_id'] = created_by_id or get_or_create_system_user_pk()
|
||||
|
||||
tag_list = list(dict.fromkeys(
|
||||
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||
tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||
))
|
||||
info.pop('tags')
|
||||
|
||||
|
@ -95,7 +92,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
|
|||
|
||||
|
||||
@enforce_types
|
||||
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
|
||||
def write_sql_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
|
||||
for link in links:
|
||||
# with transaction.atomic():
|
||||
# write_link_to_sql_index(link)
|
||||
|
@ -103,7 +100,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by
|
|||
|
||||
|
||||
@enforce_types
|
||||
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
|
||||
def write_sql_link_details(link: Link, out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
|
||||
from core.models import Snapshot
|
||||
|
||||
# with transaction.atomic():
|
||||
|
@ -120,7 +117,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
|
|||
snap.title = link.title
|
||||
|
||||
tag_list = list(
|
||||
{tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')}
|
||||
{tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')}
|
||||
| set(snap.tags.values_list('name', flat=True))
|
||||
)
|
||||
|
||||
|
@ -130,7 +127,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
|
|||
|
||||
|
||||
@enforce_types
|
||||
def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
|
||||
def list_migrations(out_dir: Path=DATA_DIR) -> List[Tuple[bool, str]]:
|
||||
from django.core.management import call_command
|
||||
out = StringIO()
|
||||
call_command("showmigrations", list=True, stdout=out)
|
||||
|
@ -146,7 +143,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
|
|||
return migrations
|
||||
|
||||
@enforce_types
|
||||
def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
||||
def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]:
|
||||
from django.core.management import call_command
|
||||
out1, out2 = StringIO(), StringIO()
|
||||
|
||||
|
@ -160,6 +157,6 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
|||
]
|
||||
|
||||
@enforce_types
|
||||
def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]:
|
||||
def get_admins(out_dir: Path=DATA_DIR) -> List[str]:
|
||||
from django.contrib.auth.models import User
|
||||
return User.objects.filter(is_superuser=True)
|
||||
|
|
|
@ -13,7 +13,6 @@ IN_DOCKER=False
|
|||
IN_QEMU=False
|
||||
PUID=501
|
||||
PGID=20
|
||||
OUTPUT_DIR=/opt/archivebox/data
|
||||
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
|
||||
ONLY_NEW=True
|
||||
TIMEOUT=60
|
||||
|
@ -173,7 +172,6 @@ IN_DOCKER = false
|
|||
IN_QEMU = false
|
||||
PUID = 501
|
||||
PGID = 20
|
||||
OUTPUT_DIR = "/opt/archivebox/data"
|
||||
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
|
||||
ONLY_NEW = true
|
||||
TIMEOUT = 60
|
||||
|
|
|
@ -13,21 +13,16 @@ from typing import IO, Tuple, List, Optional
|
|||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
|
||||
from archivebox.misc.system import atomic_write
|
||||
from ..config.legacy import (
|
||||
ANSI,
|
||||
OUTPUT_DIR,
|
||||
SOURCES_DIR_NAME,
|
||||
TIMEOUT,
|
||||
stderr,
|
||||
hint,
|
||||
)
|
||||
from archivebox.misc.logging import stderr, hint
|
||||
from archivebox.misc.util import (
|
||||
basename,
|
||||
htmldecode,
|
||||
download_url,
|
||||
enforce_types,
|
||||
)
|
||||
|
||||
from ..index.schema import Link
|
||||
from ..logging_util import TimedProgress, log_source_saved
|
||||
|
||||
|
@ -38,7 +33,6 @@ from . import pocket_html
|
|||
from . import pinboard_rss
|
||||
from . import shaarli_rss
|
||||
from . import medium_rss
|
||||
|
||||
from . import netscape_html
|
||||
from . import generic_rss
|
||||
from . import generic_json
|
||||
|
@ -79,7 +73,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
|
|||
parse a list of URLS without touching the filesystem
|
||||
"""
|
||||
|
||||
timer = TimedProgress(TIMEOUT * 4)
|
||||
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
|
||||
#urls = list(map(lambda x: x + "\n", urls))
|
||||
file = StringIO()
|
||||
file.writelines(urls)
|
||||
|
@ -98,7 +92,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut
|
|||
RSS feed, bookmarks export, or text file
|
||||
"""
|
||||
|
||||
timer = TimedProgress(TIMEOUT * 4)
|
||||
timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
|
||||
with open(source_file, 'r', encoding='utf-8') as file:
|
||||
links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser)
|
||||
|
||||
|
@ -148,9 +142,9 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
|
|||
|
||||
|
||||
@enforce_types
|
||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=DATA_DIR) -> str:
|
||||
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
||||
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts))
|
||||
source_path = str(CONSTANTS.SOURCES_DIR / filename.format(ts=ts))
|
||||
|
||||
referenced_texts = ''
|
||||
|
||||
|
@ -167,10 +161,10 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
|
|||
|
||||
|
||||
@enforce_types
|
||||
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str:
|
||||
def save_file_as_source(path: str, timeout: int=ARCHIVING_CONFIG.TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=DATA_DIR) -> str:
|
||||
"""download a given url's content into output/sources/domain-<timestamp>.txt"""
|
||||
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
|
||||
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts))
|
||||
source_path = str(CONSTANTS.SOURCES_DIR / filename.format(basename=basename(path), ts=ts))
|
||||
|
||||
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
# Source is a URL that needs to be downloaded
|
||||
|
@ -183,9 +177,9 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
|
|||
except Exception as e:
|
||||
timer.end()
|
||||
print('{}[!] Failed to download {}{}\n'.format(
|
||||
ANSI['red'],
|
||||
SHELL_CONFIG.ANSI['red'],
|
||||
path,
|
||||
ANSI['reset'],
|
||||
SHELL_CONFIG.ANSI['reset'],
|
||||
))
|
||||
print(' ', e)
|
||||
raise e
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import time
|
||||
import uuid
|
||||
from functools import wraps
|
||||
from django.db import connection, transaction
|
||||
from django.utils import timezone
|
||||
from huey.exceptions import TaskLockedException
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
|
||||
class SqliteSemaphore:
|
||||
def __init__(self, db_path, table_name, name, value=1, timeout=None):
|
||||
self.db_path = db_path
|
||||
|
@ -68,7 +69,8 @@ class SqliteSemaphore:
|
|||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
LOCKS_DB_PATH = settings.CONFIG.OUTPUT_DIR / 'locks.sqlite3'
|
||||
LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
|
||||
|
||||
|
||||
def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
|
||||
"""
|
||||
|
|
|
@ -2,7 +2,6 @@ from pathlib import Path
|
|||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS
|
||||
|
||||
OUTPUT_DIR = DATA_DIR
|
||||
LOGS_DIR = CONSTANTS.LOGS_DIR
|
||||
TMP_DIR = CONSTANTS.TMP_DIR
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
socket = 127.0.0.1:3031
|
||||
chdir = ../
|
||||
http = 0.0.0.0:8001
|
||||
env = OUTPUT_DIR=./data
|
||||
env = DATA_DIR=./data
|
||||
wsgi-file = archivebox/core/wsgi.py
|
||||
processes = 4
|
||||
threads = 1
|
||||
|
|
Loading…
Reference in a new issue