add created_by_id to all Snapshot creation functions

This commit is contained in:
Nick Sweeting 2024-08-20 19:28:28 -07:00
parent aa282daadf
commit c30ae1d2cb
No known key found for this signature in database
4 changed files with 17 additions and 11 deletions

View file

@ -470,6 +470,7 @@ class AddView(UserPassesTestMixin, FormView):
"parser": parser, "parser": parser,
"update_all": False, "update_all": False,
"out_dir": OUTPUT_DIR, "out_dir": OUTPUT_DIR,
"created_by_id": self.request.user.pk,
} }
if extractors: if extractors:
input_kwargs.update({"extractors": extractors}) input_kwargs.update({"extractors": extractors})

View file

@ -225,21 +225,21 @@ def timed_index_update(out_path: Path):
@enforce_types @enforce_types
def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
"""Writes links to sqlite3 file for a given list of links""" """Writes links to sqlite3 file for a given list of links"""
log_indexing_process_started(len(links)) log_indexing_process_started(len(links))
try: try:
with timed_index_update(out_dir / SQL_INDEX_FILENAME): with timed_index_update(out_dir / SQL_INDEX_FILENAME):
write_sql_main_index(links, out_dir=out_dir) write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
stderr('[!] Warning: Still writing index to disk...', color='lightyellow') stderr('[!] Warning: Still writing index to disk...', color='lightyellow')
stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.') stderr(' Run archivebox init to fix any inconsistencies from an ungraceful exit.')
with timed_index_update(out_dir / SQL_INDEX_FILENAME): with timed_index_update(out_dir / SQL_INDEX_FILENAME):
write_sql_main_index(links, out_dir=out_dir) write_sql_main_index(links, out_dir=out_dir, created_by_id=created_by_id)
os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes os.chmod(out_dir / SQL_INDEX_FILENAME, int(OUTPUT_PERMISSIONS, base=8)) # set here because we don't write it with atomic writes
raise SystemExit(0) raise SystemExit(0)
@ -268,7 +268,7 @@ def load_main_index_meta(out_dir: Path=OUTPUT_DIR) -> Optional[dict]:
@enforce_types @enforce_types
def parse_links_from_source(source_path: str, root_url: Optional[str]=None, parser: str="auto") -> Tuple[List[Link], List[Link]]: def parse_links_from_source(source_path: str, root_url: Optional[str]=None, parser: str="auto") -> List[Link]:
from ..parsers import parse_links from ..parsers import parse_links

View file

@ -35,10 +35,12 @@ def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir:
return snapshots.delete() return snapshots.delete()
@enforce_types @enforce_types
def write_link_to_sql_index(link: Link): def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
from core.models import Snapshot, ArchiveResult from core.models import Snapshot, ArchiveResult
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys} info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
info['created_by_id'] = created_by_id
tag_list = list(dict.fromkeys( tag_list = list(dict.fromkeys(
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '') tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
)) ))
@ -68,6 +70,7 @@ def write_link_to_sql_index(link: Link):
'cmd_version': entry.get('cmd_version') or 'unknown', 'cmd_version': entry.get('cmd_version') or 'unknown',
'pwd': entry['pwd'], 'pwd': entry['pwd'],
'status': entry['status'], 'status': entry['status'],
'created_by_id': created_by_id,
} }
) )
else: else:
@ -82,6 +85,7 @@ def write_link_to_sql_index(link: Link):
'cmd_version': entry.cmd_version or 'unknown', 'cmd_version': entry.cmd_version or 'unknown',
'pwd': entry.pwd, 'pwd': entry.pwd,
'status': entry.status, 'status': entry.status,
'created_by_id': created_by_id,
} }
) )
@ -89,15 +93,15 @@ def write_link_to_sql_index(link: Link):
@enforce_types @enforce_types
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
for link in links: for link in links:
# with transaction.atomic(): # with transaction.atomic():
# write_link_to_sql_index(link) # write_link_to_sql_index(link)
write_link_to_sql_index(link) write_link_to_sql_index(link, created_by_id=created_by_id)
@enforce_types @enforce_types
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None: def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None:
from core.models import Snapshot from core.models import Snapshot
# with transaction.atomic(): # with transaction.atomic():
@ -109,7 +113,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
try: try:
snap = Snapshot.objects.get(url=link.url) snap = Snapshot.objects.get(url=link.url)
except Snapshot.DoesNotExist: except Snapshot.DoesNotExist:
snap = write_link_to_sql_index(link) snap = write_link_to_sql_index(link, created_by_id=created_by_id)
snap.title = link.title snap.title = link.title

View file

@ -595,6 +595,7 @@ def add(urls: Union[str, List[str]],
init: bool=False, init: bool=False,
extractors: str="", extractors: str="",
parser: str="auto", parser: str="auto",
created_by_id: int | None=None,
out_dir: Path=OUTPUT_DIR) -> List[Link]: out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive""" """Add a new URL or list of URLs to your archive"""
@ -639,11 +640,11 @@ def add(urls: Union[str, List[str]],
new_links = dedupe_links(all_links, imported_links) new_links = dedupe_links(all_links, imported_links)
write_main_index(links=new_links, out_dir=out_dir) write_main_index(links=new_links, out_dir=out_dir, created_by_id=created_by_id)
all_links = load_main_index(out_dir=out_dir) all_links = load_main_index(out_dir=out_dir)
tags = [ tags = [
Tag.objects.get_or_create(name=name.strip())[0] Tag.objects.get_or_create(name=name.strip(), defaults={'created_by_id': created_by_id})[0]
for name in tag.split(',') for name in tag.split(',')
if name.strip() if name.strip()
] ]