mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2025-02-16 13:28:29 +00:00
add ModelWithOutputDir base model to manage output directories and index writing
This commit is contained in:
parent
c8b830b8dc
commit
af21c3428b
1 changed files with 105 additions and 0 deletions
|
@ -386,10 +386,115 @@ class ModelWithHealthStats(models.Model):
|
|||
|
||||
|
||||
|
||||
class ModelWithOutputDir(ABIDModel):
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
||||
# output_dir = models.FilePathField(path=CONSTANTS.DATA_DIR, max_length=200, blank=True, null=True)
|
||||
# output_files = models.JSONField(default=dict)
|
||||
|
||||
def save(self, *args, write_indexes=False, **kwargs) -> None:
|
||||
super().save(*args, **kwargs)
|
||||
if write_indexes:
|
||||
self.write_indexes()
|
||||
|
||||
@property
|
||||
def output_dir_type(self) -> str:
|
||||
"""Get the model type parent directory name that holds this object's data e.g. 'archiveresults'"""
|
||||
parent_dir = getattr(self, 'output_dir_parent', self._meta.model_name)
|
||||
assert parent_dir
|
||||
return f'{parent_dir}s' # e.g. archiveresults
|
||||
|
||||
@property
|
||||
def output_dir_name(self) -> str:
|
||||
"""Get the subdirectory name for the filesystem directory that holds this object's data e.g. 'snp_2342353k2jn3j32l4324'"""
|
||||
assert self.ABID
|
||||
return str(self.ABID) # e.g. snp_2342353k2jn3j32l4324
|
||||
|
||||
@property
|
||||
def output_dir_str(self) -> str:
|
||||
"""Get relateive the filesystem directory Path that holds that data for this object e.g. 'snapshots/snp_2342353k2jn3j32l4324'"""
|
||||
return f'{self.output_dir_type}/{self.output_dir_name}' # e.g. snapshots/snp_2342353k2jn3j32l4324
|
||||
|
||||
@property
|
||||
def OUTPUT_DIR(self) -> Path:
|
||||
"""Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
|
||||
from archivebox import DATA_DIR
|
||||
return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
|
||||
|
||||
def write_indexes(self):
|
||||
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
|
||||
print(f'{self}.write_indexes()')
|
||||
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
self.migrate_output_dir()
|
||||
self.save_merkle_index()
|
||||
self.save_html_index()
|
||||
self.save_json_index()
|
||||
self.save_symlinks_index()
|
||||
|
||||
def migrate_output_dir(self):
|
||||
"""Move the output files to the new folder structure if needed"""
|
||||
print(f'{self}.migrate_output_dir()')
|
||||
self.migrate_from_0_7_2()
|
||||
self.migrate_from_0_8_6()
|
||||
# ... future migrations here
|
||||
|
||||
def migrate_from_0_7_2(self) -> None:
|
||||
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
|
||||
print(f'{self}.migrate_from_0_7_2()')
|
||||
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
|
||||
# update self.output_path = /data/archive/snapshots/<abid>
|
||||
pass
|
||||
|
||||
def migrate_from_0_8_6(self) -> None:
|
||||
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
|
||||
# ... future migration code here ...
|
||||
print(f'{self}.migrate_from_0_8_6()')
|
||||
pass
|
||||
|
||||
def save_merkle_index(self, **kwargs) -> None:
|
||||
"""Write the ./.index.merkle file to the output dir"""
|
||||
# write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
|
||||
print(f'{self}.save_merkle_index()')
|
||||
pass
|
||||
|
||||
def save_html_index(self, **kwargs) -> None:
|
||||
# write self.as_html() to self.output_dir / 'index.html'
|
||||
print(f'{self}.save_html_index()')
|
||||
pass
|
||||
|
||||
def save_json_index(self, **kwargs) -> None:
|
||||
print(f'{self}.save_json_index()')
|
||||
# write self.as_json() to self.output_dir / 'index.json'
|
||||
pass
|
||||
|
||||
def save_symlinks_index(self) -> None:
|
||||
print(f'{self}.save_symlinks_index()')
|
||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
|
||||
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
|
||||
# ln -s self.output_dir data/archive/1453452234234.21445
|
||||
pass
|
||||
|
||||
def as_json(self) -> dict:
|
||||
"""Get the object's properties as a dict"""
|
||||
# dump the object's properties to a json-ready dict
|
||||
return {
|
||||
'TYPE': self.TYPE,
|
||||
'id': self.id,
|
||||
'abid': str(self.ABID),
|
||||
'str': str(self),
|
||||
'modified_at': self.modified_at,
|
||||
'created_at': self.created_at,
|
||||
'created_by_id': self.created_by_id,
|
||||
'status': getattr(self, 'status', None),
|
||||
'retry_at': getattr(self, 'retry_at', None),
|
||||
'notes': getattr(self, 'notes', None),
|
||||
}
|
||||
|
||||
def as_html(self) -> str:
|
||||
"""Get the object's properties as a html string"""
|
||||
# render snapshot_detail.html template with self as context and return html string
|
||||
return ''
|
||||
|
||||
|
||||
####################################################
|
||||
|
|
Loading…
Add table
Reference in a new issue