first working django model with archivebox-shell command and sql exporting

This commit is contained in:
Nick Sweeting 2019-04-17 03:49:18 -04:00
parent ecf95d398a
commit cdb70c73df
17 changed files with 215 additions and 21 deletions

View file

@ -1 +1,3 @@
__package__ = 'archivebox' __package__ = 'archivebox'
from . import core

View file

@ -8,9 +8,8 @@ import sys
import argparse import argparse
from ..legacy.main import list_archive_data, remove_archive_links from ..legacy.main import remove_archive_links
from ..legacy.util import reject_stdin, to_csv, TimedProgress from ..legacy.util import reject_stdin
from ..legacy.config import ANSI
def main(args=None): def main(args=None):

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox shell'
__description__ = 'Enter an interactive ArchiveBox Django shell'
import sys
import argparse
from ..legacy.config import setup_django
from ..legacy.util import reject_stdin
def main(args=None):
args = sys.argv[1:] if args is None else args
parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
add_help=True,
)
parser.parse_args(args)
reject_stdin(__command__)
setup_django()
from django.core.management import call_command
call_command("shell_plus")
if __name__ == '__main__':
main()

View file

@ -0,0 +1 @@
__package__ = 'archivebox.core'

View file

@ -0,0 +1,28 @@
# Generated by Django 2.2 on 2019-04-17 06:46
from django.db import migrations, models
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Page',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('url', models.URLField()),
('timestamp', models.CharField(default=None, max_length=32, null=True)),
('title', models.CharField(default=None, max_length=128, null=True)),
('tags', models.CharField(default=None, max_length=256, null=True)),
('added', models.DateTimeField(auto_now_add=True)),
('bookmarked', models.DateTimeField()),
('updated', models.DateTimeField(default=None, null=True)),
],
),
]

View file

@ -0,0 +1,27 @@
# Generated by Django 2.2 on 2019-04-17 07:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0001_initial'),
]
operations = [
migrations.RemoveField(
model_name='page',
name='bookmarked',
),
migrations.AlterField(
model_name='page',
name='timestamp',
field=models.CharField(default=None, max_length=32, null=True, unique=True),
),
migrations.AlterField(
model_name='page',
name='url',
field=models.URLField(unique=True),
),
]

View file

@ -1,3 +1,33 @@
__package__ = 'archivebox.core'
import uuid
from django.db import models from django.db import models
# Create your models here.
class Page(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
url = models.URLField(unique=True)
timestamp = models.CharField(unique=True, max_length=32, null=True, default=None)
title = models.CharField(max_length=128, null=True, default=None)
tags = models.CharField(max_length=256, null=True, default=None)
added = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(null=True, default=None)
# bookmarked = models.DateTimeField()
sql_args = ('url', 'timestamp', 'title', 'tags', 'updated')
@classmethod
def from_json(cls, info: dict):
info = {k: v for k, v in info.items() if k in cls.sql_args}
return cls(**info)
def as_json(self, *args) -> dict:
args = args or self.sql_args
return {
key: getattr(self, key)
for key in args
}

View file

@ -1,24 +1,22 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
from ..legacy.config import ( import os
TEMPLATES_DIR,
DATABASE_FILE,
)
SECRET_KEY = '---------------- not a valid secret key ! ----------------' SECRET_KEY = '---------------- not a valid secret key ! ----------------'
DEBUG = True DEBUG = True
INSTALLED_APPS = [ INSTALLED_APPS = [
# 'django.contrib.admin', 'django.contrib.admin',
# 'django.contrib.auth', 'django.contrib.auth',
# 'django.contrib.contenttypes', 'django.contrib.contenttypes',
# 'django.contrib.sessions', 'django.contrib.sessions',
# 'django.contrib.messages', 'django.contrib.messages',
# 'django.contrib.staticfiles', 'django.contrib.staticfiles',
'core', 'core',
'django_extensions',
] ]
MIDDLEWARE = [ MIDDLEWARE = [
@ -35,7 +33,7 @@ ROOT_URLCONF = 'core.urls'
TEMPLATES = [ TEMPLATES = [
{ {
'BACKEND': 'django.template.backends.django.DjangoTemplates', 'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [TEMPLATES_DIR], 'DIRS': ['templates'],
'APP_DIRS': True, 'APP_DIRS': True,
'OPTIONS': { 'OPTIONS': {
'context_processors': [ 'context_processors': [
@ -53,7 +51,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
DATABASES = { DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.sqlite3', 'ENGINE': 'django.db.backends.sqlite3',
'NAME': DATABASE_FILE, 'NAME': os.path.join(os.path.abspath(os.curdir), 'database', 'database.sqlite3'),
} }
} }

View file

@ -1,14 +1,15 @@
__package__ = 'archivebox.legacy'
import os import os
import re import re
import sys import sys
import getpass
import django import django
import getpass
import shutil import shutil
from typing import Optional from typing import Optional
from subprocess import run, PIPE, DEVNULL from subprocess import run, PIPE, DEVNULL
# ****************************************************************************** # ******************************************************************************
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration # Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
# Use the 'env' command to pass config options to ArchiveBox. e.g.: # Use the 'env' command to pass config options to ArchiveBox. e.g.:
@ -93,10 +94,11 @@ else:
ARCHIVE_DIR_NAME = 'archive' ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources' SOURCES_DIR_NAME = 'sources'
DATABASE_DIR_NAME = 'database' DATABASE_DIR_NAME = 'database'
DATABASE_FILE_NAME = 'database.sqlite3'
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME) ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME) SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME) DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
DATABASE_FILE = os.path.join(DATABASE_DIR, 'database.sqlite3') DATABASE_FILE = os.path.join(DATABASE_DIR, DATABASE_FILE_NAME)
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox') PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy') LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
@ -221,6 +223,12 @@ def find_chrome_data_dir() -> Optional[str]:
return None return None
def setup_django():
import django
sys.path.append(PYTHON_DIR)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup()
# ****************************************************************************** # ******************************************************************************
# ************************ Environment & Dependencies ************************** # ************************ Environment & Dependencies **************************
# ****************************************************************************** # ******************************************************************************

View file

@ -6,6 +6,8 @@ from collections import OrderedDict
from .schema import Link, ArchiveResult from .schema import Link, ArchiveResult
from .config import ( from .config import (
DATABASE_DIR,
DATABASE_FILE_NAME,
OUTPUT_DIR, OUTPUT_DIR,
TIMEOUT, TIMEOUT,
URL_BLACKLIST_PTN, URL_BLACKLIST_PTN,
@ -19,6 +21,10 @@ from .storage.json import (
parse_json_link_details, parse_json_link_details,
write_json_link_details, write_json_link_details,
) )
from .storage.sql import (
write_sql_main_index,
parse_sql_main_index,
)
from .util import ( from .util import (
scheme, scheme,
enforce_types, enforce_types,
@ -204,6 +210,14 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
log_indexing_process_started() log_indexing_process_started()
log_indexing_started(DATABASE_DIR, DATABASE_FILE_NAME)
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
try:
write_sql_main_index(links)
finally:
timer.end()
log_indexing_finished(DATABASE_DIR, DATABASE_FILE_NAME)
log_indexing_started(out_dir, 'index.json') log_indexing_started(out_dir, 'index.json')
timer = TimedProgress(TIMEOUT * 2, prefix=' ') timer = TimedProgress(TIMEOUT * 2, prefix=' ')
try: try:
@ -228,6 +242,8 @@ def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) ->
existing_links: List[Link] = [] existing_links: List[Link] = []
if out_dir: if out_dir:
existing_links = list(parse_json_main_index(out_dir)) existing_links = list(parse_json_main_index(out_dir))
existing_sql_links = list(parse_sql_main_index())
assert set(l.url for l in existing_links) == set(l['url'] for l in existing_sql_links)
new_links: List[Link] = [] new_links: List[Link] = []
if import_path: if import_path:

View file

@ -22,6 +22,7 @@ from .config import (
DATABASE_DIR, DATABASE_DIR,
check_dependencies, check_dependencies,
check_data_folder, check_data_folder,
setup_django,
) )
from .logs import ( from .logs import (
log_archiving_started, log_archiving_started,
@ -75,6 +76,11 @@ def init():
write_main_index([], out_dir=OUTPUT_DIR, finished=True) write_main_index([], out_dir=OUTPUT_DIR, finished=True)
setup_django()
from django.core.management import call_command
call_command("makemigrations", interactive=False)
call_command("migrate", interactive=False)
stderr('{green}[√] Done.{reset}'.format(**ANSI)) stderr('{green}[√] Done.{reset}'.format(**ANSI))

View file

@ -0,0 +1,10 @@
[mypy_django_plugin]
# specify settings module to use for django.conf.settings, this setting
# could also be specified with DJANGO_SETTINGS_MODULE environment variable
# (it also takes priority over config file)
django_settings = core.settings
# if True, all unknown settings in django.conf.settings will fallback to Any,
# specify it if your settings are loaded dynamically to avoid false positives
ignore_missing_settings = True

View file

@ -0,0 +1,32 @@
__package__ = 'archivebox.legacy.storage'
from typing import List, Iterator
from ..schema import Link
from ..util import enforce_types
from ..config import setup_django
### Main Links Index
sql_keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@enforce_types
def parse_sql_main_index() -> Iterator[Link]:
setup_django()
from core.models import Page
return (
page.as_json(*sql_keys)
for page in Page.objects.all()
)
@enforce_types
def write_sql_main_index(links: List[Link]) -> None:
setup_django()
from core.models import Page
for link in links:
info = {k: v for k, v in link._asdict().items() if k in sql_keys}
Page.objects.update_or_create(url=link.url, defaults=info)

3
archivebox/mypy.ini Normal file
View file

@ -0,0 +1,3 @@
[mypy]
plugins =
mypy_django_plugin.main

View file

@ -2,6 +2,7 @@
__package__ = 'archivebox' __package__ = 'archivebox'
import os import os
import sys import sys
import shutil import shutil

View file

@ -5,6 +5,7 @@ base32-crockford
setuptools setuptools
ipdb ipdb
mypy mypy
django-stubs
flake8 flake8
#wpull #wpull

View file

@ -36,9 +36,10 @@ setuptools.setup(
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
python_requires='>=3.6', python_requires='>=3.6',
install_requires=[ install_requires=[
"dataclasses==0.6",
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
"django==2.2", "django==2.2",
"dataclasses==0.6", "django-extensions==2.1.6",
], ],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [