From c30ae1d2cbb5234c94dfa0d6ccff8d5946f3f8d0 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 20 Aug 2024 19:28:28 -0700 Subject: [PATCH] add created_by_id to all Snapshot creation functions --- archivebox/core/views.py | 1 + archivebox/index/__init__.py | 8 ++++---- archivebox/index/sql.py | 14 +++++++++----- archivebox/main.py | 5 +++-- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 02f67ffa..c5c09b09 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -470,6 +470,7 @@ class AddView(UserPassesTestMixin, FormView): "parser": parser, "update_all": False, "out_dir": OUTPUT_DIR, + "created_by_id": self.request.user.pk, } if extractors: input_kwargs.update({"extractors": extractors}) diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index fb3688f3..1bc5a104 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -225,21 +225,21 @@ def timed_index_update(out_path: Path): @enforce_types -def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: +def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: """Writes links to sqlite3 file for a given list of links""" log_indexing_process_started(len(links)) try: with timed_index_update(out_dir / SQL_INDEX_FILENAME): - write_sql_main_index(links, out_dir=out_dir) + write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes except (KeyboardInterrupt, SystemExit): stderr('[!] Warning: Still writing index to disk...', color='lightyellow') stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.') with timed_index_update(out_dir / SQL_INDEX_FILENAME): - write_sql_main_index(links, out_dir=out_dir) + write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id) os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes raise SystemExit(0) @@ -268,7 +268,7 @@ def load_main_index_meta(out_dir: Path=OUTPUT_DIR) -> Optional[dict]: @enforce_types -def parse_links_from_source(source_path: str, root_url: Optional[str]=None, parser: str="auto") -> Tuple[List[Link], List[Link]]: +def parse_links_from_source(source_path: str, root_url: Optional[str]=None, parser: str="auto") -> List[Link]: from ..parsers import parse_links diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index b1497977..3e9ddc77 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -35,10 +35,12 @@ def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: return snapshots.delete() @enforce_types -def write_link_to_sql_index(link: Link): +def write_link_to_sql_index(link: Link, created_by_id: int | None=None): from core.models import Snapshot, ArchiveResult info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys} + info['created_by_id'] = created_by_id + tag_list = list(dict.fromkeys( tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '') )) @@ -68,6 +70,7 @@ def write_link_to_sql_index(link: Link): 'cmd_version': entry.get('cmd_version') or 'unknown', 'pwd': entry['pwd'], 'status': entry['status'], + 'created_by_id': created_by_id, } ) else: @@ -82,6 +85,7 @@ def write_link_to_sql_index(link: Link): 'cmd_version': entry.cmd_version or 'unknown', 'pwd': entry.pwd, 'status': entry.status, + 'created_by_id': created_by_id, } ) @@ -89,15 +93,15 @@ def write_link_to_sql_index(link: Link): @enforce_types -def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: +def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: for link in links: # with transaction.atomic(): # write_link_to_sql_index(link) - write_link_to_sql_index(link) + write_link_to_sql_index(link, created_by_id=created_by_id) @enforce_types -def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None: +def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: from core.models import Snapshot # with transaction.atomic(): @@ -109,7 +113,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None: try: snap = Snapshot.objects.get(url=link.url) except Snapshot.DoesNotExist: - snap = write_link_to_sql_index(link) + snap = write_link_to_sql_index(link, created_by_id=created_by_id) snap.title = link.title diff --git a/archivebox/main.py b/archivebox/main.py index 1beefdd3..b2bc1ce4 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -595,6 +595,7 @@ def add(urls: Union[str, List[str]], init: bool=False, extractors: str="", parser: str="auto", + created_by_id: int | None=None, out_dir: Path=OUTPUT_DIR) -> List[Link]: """Add a new URL or list of URLs to your archive""" @@ -639,11 +640,11 @@ def add(urls: Union[str, List[str]], new_links = dedupe_links(all_links, imported_links) - write_main_index(links=new_links, out_dir=out_dir) + write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id) all_links = load_main_index(out_dir=out_dir) tags = [ - Tag.objects.get_or_create(name=name.strip())[0] + Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0] for name in tag.split(',') if name.strip() ]