add ModelWithOutputDir base model to manage output directories and index writing

This commit is contained in:
Nick Sweeting 2024-11-18 02:24:10 -08:00
parent c8b830b8dc
commit af21c3428b
No known key found for this signature in database

View file

@ -386,10 +386,115 @@ class ModelWithHealthStats(models.Model):
class ModelWithOutputDir(ABIDModel):
class Meta:
abstract = True
# output_dir = models.FilePathField(path=CONSTANTS.DATA_DIR, max_length=200, blank=True, null=True)
# output_files = models.JSONField(default=dict)
def save(self, *args, write_indexes=False, **kwargs) -> None:
super().save(*args, **kwargs)
if write_indexes:
self.write_indexes()
@property
def output_dir_type(self) -> str:
"""Get the model type parent directory name that holds this object's data e.g. 'archiveresults'"""
parent_dir = getattr(self, 'output_dir_parent', self._meta.model_name)
assert parent_dir
return f'{parent_dir}s' # e.g. archiveresults
@property
def output_dir_name(self) -> str:
"""Get the subdirectory name for the filesystem directory that holds this object's data e.g. 'snp_2342353k2jn3j32l4324'"""
assert self.ABID
return str(self.ABID) # e.g. snp_2342353k2jn3j32l4324
@property
def output_dir_str(self) -> str:
"""Get relateive the filesystem directory Path that holds that data for this object e.g. 'snapshots/snp_2342353k2jn3j32l4324'"""
return f'{self.output_dir_type}/{self.output_dir_name}' # e.g. snapshots/snp_2342353k2jn3j32l4324
@property
def OUTPUT_DIR(self) -> Path:
"""Get absolute filesystem directory Path that holds that data for this object e.g. Path('/data/snapshots/snp_2342353k2jn3j32l4324')"""
from archivebox import DATA_DIR
return DATA_DIR / self.output_dir_str # e.g. /data/snapshots/snp_2342353k2jn3j32l4324
def write_indexes(self):
"""Write the Snapshot json, html, and merkle indexes to its output dir"""
print(f'{self}.write_indexes()')
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
self.migrate_output_dir()
self.save_merkle_index()
self.save_html_index()
self.save_json_index()
self.save_symlinks_index()
def migrate_output_dir(self):
"""Move the output files to the new folder structure if needed"""
print(f'{self}.migrate_output_dir()')
self.migrate_from_0_7_2()
self.migrate_from_0_8_6()
# ... future migrations here
def migrate_from_0_7_2(self) -> None:
"""Migrate output_dir generated by ArchiveBox <= 0.7.2 to current version"""
print(f'{self}.migrate_from_0_7_2()')
# move /data/archive/<timestamp> -> /data/archive/snapshots/<abid>
# update self.output_path = /data/archive/snapshots/<abid>
pass
def migrate_from_0_8_6(self) -> None:
"""Migrate output_dir generated by ArchiveBox <= 0.8.6 to current version"""
# ... future migration code here ...
print(f'{self}.migrate_from_0_8_6()')
pass
def save_merkle_index(self, **kwargs) -> None:
"""Write the ./.index.merkle file to the output dir"""
# write self.generate_merkle_tree() to self.output_dir / '.index.merkle'
print(f'{self}.save_merkle_index()')
pass
def save_html_index(self, **kwargs) -> None:
# write self.as_html() to self.output_dir / 'index.html'
print(f'{self}.save_html_index()')
pass
def save_json_index(self, **kwargs) -> None:
print(f'{self}.save_json_index()')
# write self.as_json() to self.output_dir / 'index.json'
pass
def save_symlinks_index(self) -> None:
print(f'{self}.save_symlinks_index()')
# ln -s ../../../../self.output_dir data/index/snapshots_by_date/2024-01-01/example.com/<abid>
# ln -s ../../../../self.output_dir data/index/snapshots_by_domain/example.com/2024-01-01/<abid>
# ln -s self.output_dir data/archive/1453452234234.21445
pass
def as_json(self) -> dict:
"""Get the object's properties as a dict"""
# dump the object's properties to a json-ready dict
return {
'TYPE': self.TYPE,
'id': self.id,
'abid': str(self.ABID),
'str': str(self),
'modified_at': self.modified_at,
'created_at': self.created_at,
'created_by_id': self.created_by_id,
'status': getattr(self, 'status', None),
'retry_at': getattr(self, 'retry_at', None),
'notes': getattr(self, 'notes', None),
}
def as_html(self) -> str:
"""Get the object's properties as a html string"""
# render snapshot_detail.html template with self as context and return html string
return ''
####################################################