Merge pull request #607 from ArchiveBox/dev

This commit is contained in:
Nick Sweeting 2021-01-06 20:04:37 +02:00 committed by GitHub
commit eb2eecc24a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 3 deletions

View file

@ -26,7 +26,7 @@
<hr/>
</div>
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects.
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a variety of formats depending on the configuration and the content it detects.
Your archive can be managed through the command line with commands like `archivebox add`, through the built-in Web UI `archivebox server`, or via the Python library API (beta). It can ingest bookmarks from a browser or service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time. You can also schedule regular/realtime imports with `archivebox schedule`.

View file

@ -0,0 +1,18 @@
# Generated by Django 3.1.3 on 2021-01-05 14:21
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0007_archiveresult'),
]
operations = [
migrations.AlterField(
model_name='archiveresult',
name='cmd_version',
field=models.CharField(blank=True, default=None, max_length=32, null=True),
),
]

View file

@ -181,7 +181,7 @@ class ArchiveResult(models.Model):
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
cmd = JSONField()
pwd = models.CharField(max_length=256)
cmd_version = models.CharField(max_length=32)
cmd_version = models.CharField(max_length=32, default=None, null=True, blank=True)
output = models.CharField(max_length=512)
start_ts = models.DateTimeField()
end_ts = models.DateTimeField()

View file

@ -124,7 +124,7 @@ def validate_links(links: Iterable[Link]) -> List[Link]:
timer = TimedProgress(TIMEOUT * 4)
try:
links = archivable_links(links) # remove chrome://, about:, mailto: etc.
links = sorted_links(links) # deterministically sort the links based on timstamp, url
links = sorted_links(links) # deterministically sort the links based on timestamp, url
links = fix_duplicate_links(links) # merge/dedupe duplicate timestamps & urls
finally:
timer.end()