mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
first working django model with archivebox-shell command and sql exporting
This commit is contained in:
parent
ecf95d398a
commit
cdb70c73df
17 changed files with 215 additions and 21 deletions
|
@ -1 +1,3 @@
|
|||
__package__ = 'archivebox'
|
||||
|
||||
from . import core
|
||||
|
|
|
@ -8,9 +8,8 @@ import sys
|
|||
import argparse
|
||||
|
||||
|
||||
from ..legacy.main import list_archive_data, remove_archive_links
|
||||
from ..legacy.util import reject_stdin, to_csv, TimedProgress
|
||||
from ..legacy.config import ANSI
|
||||
from ..legacy.main import remove_archive_links
|
||||
from ..legacy.util import reject_stdin
|
||||
|
||||
|
||||
def main(args=None):
|
||||
|
|
31
archivebox/cli/archivebox_shell.py
Normal file
31
archivebox/cli/archivebox_shell.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
__package__ = 'archivebox.cli'
|
||||
__command__ = 'archivebox shell'
|
||||
__description__ = 'Enter an interactive ArchiveBox Django shell'
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from ..legacy.config import setup_django
|
||||
from ..legacy.util import reject_stdin
|
||||
|
||||
|
||||
def main(args=None):
|
||||
args = sys.argv[1:] if args is None else args
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=__command__,
|
||||
description=__description__,
|
||||
add_help=True,
|
||||
)
|
||||
parser.parse_args(args)
|
||||
reject_stdin(__command__)
|
||||
|
||||
setup_django()
|
||||
from django.core.management import call_command
|
||||
call_command("shell_plus")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1 @@
|
|||
__package__ = 'archivebox.core'
|
28
archivebox/core/migrations/0001_initial.py
Normal file
28
archivebox/core/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Generated by Django 2.2 on 2019-04-17 06:46
|
||||
|
||||
from django.db import migrations, models
|
||||
import uuid
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Page',
|
||||
fields=[
|
||||
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||
('url', models.URLField()),
|
||||
('timestamp', models.CharField(default=None, max_length=32, null=True)),
|
||||
('title', models.CharField(default=None, max_length=128, null=True)),
|
||||
('tags', models.CharField(default=None, max_length=256, null=True)),
|
||||
('added', models.DateTimeField(auto_now_add=True)),
|
||||
('bookmarked', models.DateTimeField()),
|
||||
('updated', models.DateTimeField(default=None, null=True)),
|
||||
],
|
||||
),
|
||||
]
|
27
archivebox/core/migrations/0002_auto_20190417_0739.py
Normal file
27
archivebox/core/migrations/0002_auto_20190417_0739.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
# Generated by Django 2.2 on 2019-04-17 07:39
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('core', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='page',
|
||||
name='bookmarked',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='page',
|
||||
name='timestamp',
|
||||
field=models.CharField(default=None, max_length=32, null=True, unique=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='page',
|
||||
name='url',
|
||||
field=models.URLField(unique=True),
|
||||
),
|
||||
]
|
|
@ -1,3 +1,33 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
import uuid
|
||||
|
||||
from django.db import models
|
||||
|
||||
# Create your models here.
|
||||
|
||||
class Page(models.Model):
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
|
||||
url = models.URLField(unique=True)
|
||||
timestamp = models.CharField(unique=True, max_length=32, null=True, default=None)
|
||||
|
||||
title = models.CharField(max_length=128, null=True, default=None)
|
||||
tags = models.CharField(max_length=256, null=True, default=None)
|
||||
|
||||
added = models.DateTimeField(auto_now_add=True)
|
||||
updated = models.DateTimeField(null=True, default=None)
|
||||
# bookmarked = models.DateTimeField()
|
||||
|
||||
sql_args = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, info: dict):
|
||||
info = {k: v for k, v in info.items() if k in cls.sql_args}
|
||||
return cls(**info)
|
||||
|
||||
def as_json(self, *args) -> dict:
|
||||
args = args or self.sql_args
|
||||
return {
|
||||
key: getattr(self, key)
|
||||
for key in args
|
||||
}
|
||||
|
|
|
@ -1,24 +1,22 @@
|
|||
__package__ = 'archivebox.core'
|
||||
|
||||
from ..legacy.config import (
|
||||
TEMPLATES_DIR,
|
||||
DATABASE_FILE,
|
||||
)
|
||||
|
||||
import os
|
||||
|
||||
SECRET_KEY = '---------------- not a valid secret key ! ----------------'
|
||||
DEBUG = True
|
||||
|
||||
|
||||
INSTALLED_APPS = [
|
||||
# 'django.contrib.admin',
|
||||
# 'django.contrib.auth',
|
||||
# 'django.contrib.contenttypes',
|
||||
# 'django.contrib.sessions',
|
||||
# 'django.contrib.messages',
|
||||
# 'django.contrib.staticfiles',
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
|
||||
'core',
|
||||
|
||||
'django_extensions',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
|
@ -35,7 +33,7 @@ ROOT_URLCONF = 'core.urls'
|
|||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [TEMPLATES_DIR],
|
||||
'DIRS': ['templates'],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
|
@ -53,7 +51,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
|
|||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': DATABASE_FILE,
|
||||
'NAME': os.path.join(os.path.abspath(os.curdir), 'database', 'database.sqlite3'),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
__package__ = 'archivebox.legacy'
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import getpass
|
||||
import django
|
||||
import getpass
|
||||
import shutil
|
||||
|
||||
from typing import Optional
|
||||
from subprocess import run, PIPE, DEVNULL
|
||||
|
||||
|
||||
# ******************************************************************************
|
||||
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
|
||||
|
@ -93,10 +94,11 @@ else:
|
|||
ARCHIVE_DIR_NAME = 'archive'
|
||||
SOURCES_DIR_NAME = 'sources'
|
||||
DATABASE_DIR_NAME = 'database'
|
||||
DATABASE_FILE_NAME = 'database.sqlite3'
|
||||
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
|
||||
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
|
||||
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
|
||||
DATABASE_FILE = os.path.join(DATABASE_DIR, 'database.sqlite3')
|
||||
DATABASE_FILE = os.path.join(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||
|
||||
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
|
||||
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
|
||||
|
@ -221,6 +223,12 @@ def find_chrome_data_dir() -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def setup_django():
|
||||
import django
|
||||
sys.path.append(PYTHON_DIR)
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
django.setup()
|
||||
|
||||
# ******************************************************************************
|
||||
# ************************ Environment & Dependencies **************************
|
||||
# ******************************************************************************
|
||||
|
|
|
@ -6,6 +6,8 @@ from collections import OrderedDict
|
|||
|
||||
from .schema import Link, ArchiveResult
|
||||
from .config import (
|
||||
DATABASE_DIR,
|
||||
DATABASE_FILE_NAME,
|
||||
OUTPUT_DIR,
|
||||
TIMEOUT,
|
||||
URL_BLACKLIST_PTN,
|
||||
|
@ -19,6 +21,10 @@ from .storage.json import (
|
|||
parse_json_link_details,
|
||||
write_json_link_details,
|
||||
)
|
||||
from .storage.sql import (
|
||||
write_sql_main_index,
|
||||
parse_sql_main_index,
|
||||
)
|
||||
from .util import (
|
||||
scheme,
|
||||
enforce_types,
|
||||
|
@ -204,6 +210,14 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
|
|||
|
||||
log_indexing_process_started()
|
||||
|
||||
log_indexing_started(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
||||
try:
|
||||
write_sql_main_index(links)
|
||||
finally:
|
||||
timer.end()
|
||||
log_indexing_finished(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||
|
||||
log_indexing_started(out_dir, 'index.json')
|
||||
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
||||
try:
|
||||
|
@ -228,6 +242,8 @@ def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) ->
|
|||
existing_links: List[Link] = []
|
||||
if out_dir:
|
||||
existing_links = list(parse_json_main_index(out_dir))
|
||||
existing_sql_links = list(parse_sql_main_index())
|
||||
assert set(l.url for l in existing_links) == set(l['url'] for l in existing_sql_links)
|
||||
|
||||
new_links: List[Link] = []
|
||||
if import_path:
|
||||
|
|
|
@ -22,6 +22,7 @@ from .config import (
|
|||
DATABASE_DIR,
|
||||
check_dependencies,
|
||||
check_data_folder,
|
||||
setup_django,
|
||||
)
|
||||
from .logs import (
|
||||
log_archiving_started,
|
||||
|
@ -75,6 +76,11 @@ def init():
|
|||
|
||||
write_main_index([], out_dir=OUTPUT_DIR, finished=True)
|
||||
|
||||
setup_django()
|
||||
from django.core.management import call_command
|
||||
call_command("makemigrations", interactive=False)
|
||||
call_command("migrate", interactive=False)
|
||||
|
||||
stderr('{green}[√] Done.{reset}'.format(**ANSI))
|
||||
|
||||
|
||||
|
|
10
archivebox/legacy/mypy_django.ini
Normal file
10
archivebox/legacy/mypy_django.ini
Normal file
|
@ -0,0 +1,10 @@
|
|||
[mypy_django_plugin]
|
||||
|
||||
# specify settings module to use for django.conf.settings, this setting
|
||||
# could also be specified with DJANGO_SETTINGS_MODULE environment variable
|
||||
# (it also takes priority over config file)
|
||||
django_settings = core.settings
|
||||
|
||||
# if True, all unknown settings in django.conf.settings will fallback to Any,
|
||||
# specify it if your settings are loaded dynamically to avoid false positives
|
||||
ignore_missing_settings = True
|
32
archivebox/legacy/storage/sql.py
Normal file
32
archivebox/legacy/storage/sql.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
__package__ = 'archivebox.legacy.storage'
|
||||
|
||||
from typing import List, Iterator
|
||||
|
||||
from ..schema import Link
|
||||
from ..util import enforce_types
|
||||
from ..config import setup_django
|
||||
|
||||
|
||||
### Main Links Index
|
||||
|
||||
sql_keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||
|
||||
|
||||
@enforce_types
|
||||
def parse_sql_main_index() -> Iterator[Link]:
|
||||
setup_django()
|
||||
from core.models import Page
|
||||
|
||||
return (
|
||||
page.as_json(*sql_keys)
|
||||
for page in Page.objects.all()
|
||||
)
|
||||
|
||||
@enforce_types
|
||||
def write_sql_main_index(links: List[Link]) -> None:
|
||||
setup_django()
|
||||
from core.models import Page
|
||||
|
||||
for link in links:
|
||||
info = {k: v for k, v in link._asdict().items() if k in sql_keys}
|
||||
Page.objects.update_or_create(url=link.url, defaults=info)
|
3
archivebox/mypy.ini
Normal file
3
archivebox/mypy.ini
Normal file
|
@ -0,0 +1,3 @@
|
|||
[mypy]
|
||||
plugins =
|
||||
mypy_django_plugin.main
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
__package__ = 'archivebox'
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
|
|
|
@ -5,6 +5,7 @@ base32-crockford
|
|||
setuptools
|
||||
ipdb
|
||||
mypy
|
||||
django-stubs
|
||||
flake8
|
||||
|
||||
#wpull
|
||||
|
|
3
setup.py
3
setup.py
|
@ -36,9 +36,10 @@ setuptools.setup(
|
|||
packages=setuptools.find_packages(),
|
||||
python_requires='>=3.6',
|
||||
install_requires=[
|
||||
"dataclasses==0.6",
|
||||
"base32-crockford==0.3.0",
|
||||
"django==2.2",
|
||||
"dataclasses==0.6",
|
||||
"django-extensions==2.1.6",
|
||||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
|
|
Loading…
Reference in a new issue