mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
automatically create storage directories and symlinks based on ulid
This commit is contained in:
parent
33bc4622a0
commit
ce833e8ead
1 changed files with 57 additions and 4 deletions
|
@ -21,7 +21,7 @@ from django.contrib.auth.models import User # noqa
|
|||
|
||||
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
|
||||
from ..system import get_dir_size
|
||||
from ..util import parse_date, base_url, hashurl
|
||||
from ..util import parse_date, base_url, hashurl, domain
|
||||
from ..index.schema import Link
|
||||
from ..index.html import snapshot_icons
|
||||
from ..extractors import get_default_archive_methods, ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
|
||||
|
@ -206,9 +206,7 @@ class Snapshot(models.Model):
|
|||
@cached_property
|
||||
def url_hash(self):
|
||||
# return hashurl(self.url)
|
||||
url_hash = hashlib.new('sha256')
|
||||
url_hash.update(self.url.encode('utf-8'))
|
||||
return url_hash.hexdigest()[:16]
|
||||
return hashlib.sha256(self.url.encode('utf-8')).hexdigest()[:16].upper()
|
||||
|
||||
@cached_property
|
||||
def base_url(self):
|
||||
|
@ -301,6 +299,31 @@ class Snapshot(models.Model):
|
|||
self.tags.add(*tags_id)
|
||||
|
||||
|
||||
def get_storage_dir(self, create=True, symlink=True) -> Path:
|
||||
date_str = self.added.strftime('%Y%m%d')
|
||||
domain_str = domain(self.url)
|
||||
abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
|
||||
|
||||
if create and not abs_storage_dir.is_dir():
|
||||
abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if symlink:
|
||||
LINK_PATHS = [
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
|
||||
]
|
||||
for link_path in LINK_PATHS:
|
||||
link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
link_path.symlink_to(abs_storage_dir)
|
||||
except FileExistsError:
|
||||
link_path.unlink()
|
||||
link_path.symlink_to(abs_storage_dir)
|
||||
|
||||
return abs_storage_dir
|
||||
|
||||
class ArchiveResultManager(models.Manager):
|
||||
def indexable(self, sorted: bool = True):
|
||||
INDEXABLE_METHODS = [ r[0] for r in ARCHIVE_METHODS_INDEXING_PRECEDENCE ]
|
||||
|
@ -397,3 +420,33 @@ class ArchiveResult(models.Model):
|
|||
|
||||
def output_exists(self) -> bool:
|
||||
return Path(self.output_path()).exists()
|
||||
|
||||
|
||||
def get_storage_dir(self, create=True, symlink=True):
|
||||
date_str = self.snapshot.added.strftime('%Y%m%d')
|
||||
domain_str = domain(self.snapshot.url)
|
||||
abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / str(self.ulid)
|
||||
|
||||
if create and not abs_storage_dir.is_dir():
|
||||
abs_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if symlink:
|
||||
LINK_PATHS = [
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
|
||||
Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
|
||||
]
|
||||
for link_path in LINK_PATHS:
|
||||
link_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
link_path.symlink_to(abs_storage_dir)
|
||||
except FileExistsError:
|
||||
link_path.unlink()
|
||||
link_path.symlink_to(abs_storage_dir)
|
||||
|
||||
return abs_storage_dir
|
||||
|
||||
def symlink_index(self, create=True):
|
||||
abs_result_dir = self.get_storage_dir(create=create)
|
||||
|
|
Loading…
Reference in a new issue