config and attr access improvements

This commit is contained in:
Nick Sweeting 2024-08-20 18:31:21 -07:00
parent 4ae186dfca
commit 0285aa52a0
No known key found for this signature in database
15 changed files with 203 additions and 187 deletions

View file

@ -115,7 +115,7 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str:
if isinstance(uri, bytes): if isinstance(uri, bytes):
uri_str: str = uri.decode() uri_str: str = uri.decode()
else: else:
uri_str = uri uri_str = str(uri)
# only hash the domain part of URLs # only hash the domain part of URLs
if '://' in uri_str: if '://' in uri_str:

View file

@ -15,6 +15,7 @@ from charidfield import CharIDField # type: ignore[import-untyped]
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
from django.utils import timezone
from django.db.utils import OperationalError from django.db.utils import OperationalError
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
@ -115,7 +116,8 @@ class ABIDModel(models.Model):
raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})') raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})')
if not ts: if not ts:
ts = datetime.utcfromtimestamp(0) # default to unix epoch with 00:00:00 UTC
ts = datetime.fromtimestamp(0, timezone.utc) # equivalent to: ts = datetime.utcfromtimestamp(0)
print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat()) print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat())
if not uri: if not uri:
@ -146,7 +148,13 @@ class ABIDModel(models.Model):
""" """
ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE')
""" """
abid = None
# if object is not yet saved to DB, always generate fresh ABID from values
if self._state.adding:
return self.generate_abid()
# otherwise DB is single source of truth, load ABID from existing db pk
abid: ABID | None = None
try: try:
abid = abid or ABID.parse(self.pk) abid = abid or ABID.parse(self.pk)
except Exception: except Exception:
@ -158,12 +166,7 @@ class ABIDModel(models.Model):
pass pass
try: try:
abid = abid or ABID.parse(self.uuid) abid = abid or ABID.parse(cast(str, self.abid))
except Exception:
pass
try:
abid = abid or ABID.parse(self.abid)
except Exception: except Exception:
pass pass

View file

@ -1,6 +1,6 @@
__package__ = 'archivebox.api' __package__ = 'archivebox.api'
from typing import Optional from typing import Optional, cast
from django.http import HttpRequest from django.http import HttpRequest
from django.contrib.auth import login from django.contrib.auth import login
@ -18,12 +18,13 @@ def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[Abs
submitted_empty_form = token in ('string', '', None) submitted_empty_form = token in ('string', '', None)
if submitted_empty_form: if submitted_empty_form:
assert request is not None, 'No request provided for API key authentication'
user = request.user # see if user is authed via django session and use that as the default user = request.user # see if user is authed via django session and use that as the default
else: else:
try: try:
token = APIToken.objects.get(token=token) token = APIToken.objects.get(token=token)
if token.is_valid(): if token.is_valid():
user = token.user user = token.created_by
except APIToken.DoesNotExist: except APIToken.DoesNotExist:
pass pass
@ -38,6 +39,7 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None)) submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
if submitted_empty_form: if submitted_empty_form:
assert request is not None, 'No request provided for API key authentication'
user = request.user # see if user is authed via django session and use that as the default user = request.user # see if user is authed via django session and use that as the default
else: else:
user = authenticate( user = authenticate(
@ -47,8 +49,9 @@ def auth_using_password(username, password, request: Optional[HttpRequest]=None)
if not user: if not user:
print('[❌] Failed to authenticate API user using API Key:', request) print('[❌] Failed to authenticate API user using API Key:', request)
user = None
return user return cast(AbstractBaseUser | None, user)
### Base Auth Types ### Base Auth Types

View file

@ -12,7 +12,8 @@ from signal_webhooks.models import WebhookBase
from django_stubs_ext.db.models import TypedModelMeta from django_stubs_ext.db.models import TypedModelMeta
from abid_utils.models import ABIDModel, ABIDField from abid_utils.models import ABIDModel, ABIDField, get_or_create_system_user_pk
def generate_secret_token() -> str: def generate_secret_token() -> str:
@ -32,15 +33,13 @@ class APIToken(ABIDModel):
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
uuid = models.UUIDField(blank=True, null=True, editable=False, unique=True)
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
expires = models.DateTimeField(null=True, blank=True)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
expires = models.DateTimeField(null=True, blank=True)
class Meta(TypedModelMeta): class Meta(TypedModelMeta):
verbose_name = "API Key" verbose_name = "API Key"
@ -50,7 +49,7 @@ class APIToken(ABIDModel):
return self.token return self.token
def __repr__(self) -> str: def __repr__(self) -> str:
return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>' return f'<APIToken user={self.created_by.username} token=************{self.token[-4:]}>'
def __json__(self) -> dict: def __json__(self) -> dict:
return { return {
@ -63,10 +62,6 @@ class APIToken(ABIDModel):
"expires": self.expires_as_iso8601, "expires": self.expires_as_iso8601,
} }
@property
def ulid(self):
return self.get_abid().ulid
@property @property
def expires_as_iso8601(self): def expires_as_iso8601(self):
"""Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none.""" """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
@ -100,10 +95,15 @@ class OutboundWebhook(ABIDModel, WebhookBase):
abid_subtype_src = 'self.ref' abid_subtype_src = 'self.ref'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
id = models.UUIDField(blank=True, null=True, unique=True, editable=True) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True)
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
# More fields here: WebhookBase...
WebhookBase._meta.get_field('name').help_text = ( WebhookBase._meta.get_field('name').help_text = (
'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).') 'Give your webhook a descriptive name (e.g. Notify ACME Slack channel of any new ArchiveResults).')
WebhookBase._meta.get_field('signal').help_text = ( WebhookBase._meta.get_field('signal').help_text = (

View file

@ -309,9 +309,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# snapshot = Snapshot.objects.create(**payload.dict()) # snapshot = Snapshot.objects.create(**payload.dict())
# return snapshot # return snapshot
# #
# @router.put("/snapshot/{snapshot_uuid}", response=SnapshotSchema) # @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_uuid: str, payload: SnapshotSchema): # def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid) # snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
# #
# for attr, value in payload.dict().items(): # for attr, value in payload.dict().items():
# setattr(snapshot, attr, value) # setattr(snapshot, attr, value)
@ -319,9 +319,9 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# #
# return snapshot # return snapshot
# #
# @router.delete("/snapshot/{snapshot_uuid}") # @router.delete("/snapshot/{snapshot_id}")
# def delete_snapshot(request, snapshot_uuid: str): # def delete_snapshot(request, snapshot_id: str):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_uuid) # snapshot = get_object_or_404(Snapshot, uuid=snapshot_id)
# snapshot.delete() # snapshot.delete()
# return {"success": True} # return {"success": True}

View file

@ -44,6 +44,7 @@ from collections import defaultdict
import importlib.metadata import importlib.metadata
from .config_stubs import ( from .config_stubs import (
AttrDict,
SimpleConfigValueDict, SimpleConfigValueDict,
ConfigValue, ConfigValue,
ConfigDict, ConfigDict,
@ -379,6 +380,29 @@ ALLOWED_IN_OUTPUT_DIR = {
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
CONSTANTS = {
"PACKAGE_DIR_NAME": {'default': lambda c: PACKAGE_DIR_NAME},
"TEMPLATES_DIR_NAME": {'default': lambda c: TEMPLATES_DIR_NAME},
"ARCHIVE_DIR_NAME": {'default': lambda c: ARCHIVE_DIR_NAME},
"SOURCES_DIR_NAME": {'default': lambda c: SOURCES_DIR_NAME},
"LOGS_DIR_NAME": {'default': lambda c: LOGS_DIR_NAME},
"CACHE_DIR_NAME": {'default': lambda c: CACHE_DIR_NAME},
"PERSONAS_DIR_NAME": {'default': lambda c: PERSONAS_DIR_NAME},
"CRONTABS_DIR_NAME": {'default': lambda c: CRONTABS_DIR_NAME},
"SQL_INDEX_FILENAME": {'default': lambda c: SQL_INDEX_FILENAME},
"JSON_INDEX_FILENAME": {'default': lambda c: JSON_INDEX_FILENAME},
"HTML_INDEX_FILENAME": {'default': lambda c: HTML_INDEX_FILENAME},
"ROBOTS_TXT_FILENAME": {'default': lambda c: ROBOTS_TXT_FILENAME},
"FAVICON_FILENAME": {'default': lambda c: FAVICON_FILENAME},
"CONFIG_FILENAME": {'default': lambda c: CONFIG_FILENAME},
"DEFAULT_CLI_COLORS": {'default': lambda c: DEFAULT_CLI_COLORS},
"ANSI": {'default': lambda c: ANSI},
"COLOR_DICT": {'default': lambda c: COLOR_DICT},
"STATICFILE_EXTENSIONS": {'default': lambda c: STATICFILE_EXTENSIONS},
"ALLOWED_IN_OUTPUT_DIR": {'default': lambda c: ALLOWED_IN_OUTPUT_DIR},
"ALLOWDENYLIST_REGEX_FLAGS": {'default': lambda c: ALLOWDENYLIST_REGEX_FLAGS},
}
############################## Version Config ################################## ############################## Version Config ##################################
def get_system_user() -> str: def get_system_user() -> str:
@ -498,9 +522,13 @@ def can_upgrade(config):
############################## Derived Config ################################## ############################## Derived Config ##################################
# These are derived/computed values calculated *after* all user-provided config values are ingested # These are derived/computed values calculated *after* all user-provided config values are ingested
# they appear in `archivebox config` output and are intended to be read-only for the user # they appear in `archivebox config` output and are intended to be read-only for the user
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = { DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
**CONSTANTS,
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns}, 'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
'USER': {'default': lambda c: get_system_user()}, 'USER': {'default': lambda c: get_system_user()},
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}}, 'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
@ -678,28 +706,29 @@ def load_config_val(key: str,
raise Exception('Config values can only be str, bool, int, or json') raise Exception('Config values can only be str, bool, int, or json')
def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]: def load_config_file(out_dir: str | None=None) -> Optional[ConfigDict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve() out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
assert out_dir and out_dir.is_dir()
config_path = Path(out_dir) / CONFIG_FILENAME config_path = Path(out_dir) / CONFIG_FILENAME
if config_path.exists(): if config_path.exists():
config_file = ConfigParser() config_file = ConfigParser()
config_file.optionxform = str config_file.optionxform = str
config_file.read(config_path) config_file.read(config_path)
# flatten into one namespace # flatten into one namespace
config_file_vars = { config_file_vars = ConfigDict({
key.upper(): val key.upper(): val
for section, options in config_file.items() for section, options in config_file.items()
for key, val in options.items() for key, val in options.items()
} })
# print('[i] Loaded config file', os.path.abspath(config_path)) # print('[i] Loaded config file', os.path.abspath(config_path))
# print(config_file_vars) # print(config_file_vars)
return config_file_vars return config_file_vars
return None return None
def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict: def write_config_file(config: Dict[str, str], out_dir: str | None=None) -> ConfigDict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
from .system import atomic_write from .system import atomic_write
@ -740,7 +769,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
existing_config = dict(config_file[section]) existing_config = dict(config_file[section])
else: else:
existing_config = {} existing_config = {}
config_file[section] = {**existing_config, key: val} config_file[section] = ConfigDict({**existing_config, key: val})
# always make sure there's a SECRET_KEY defined for Django # always make sure there's a SECRET_KEY defined for Django
existing_secret_key = None existing_secret_key = None
@ -815,7 +844,7 @@ def load_config(defaults: ConfigDefaultDict,
# raise # raise
raise SystemExit(2) raise SystemExit(2)
return extended_config return AttrDict(extended_config)
def parse_version_string(version: str) -> Tuple[int, int, int]: def parse_version_string(version: str) -> Tuple[int, int, int]:
@ -1198,14 +1227,14 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
def load_all_config(): def load_all_config():
CONFIG: ConfigDict = {} CONFIG: ConfigDict = ConfigDict()
for section_name, section_config in CONFIG_SCHEMA.items(): for section_name, section_config in CONFIG_SCHEMA.items():
CONFIG = load_config(section_config, CONFIG) CONFIG = load_config(section_config, CONFIG)
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG) return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
# add all final config values in CONFIG to globals in this file # add all final config values in CONFIG to globals in this file
CONFIG = load_all_config() CONFIG: ConfigDict = load_all_config()
globals().update(CONFIG) globals().update(CONFIG)
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ... # this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...

View file

@ -9,11 +9,15 @@ SimpleConfigValueDict = Dict[str, SimpleConfigValue]
SimpleConfigValueGetter = Callable[[], SimpleConfigValue] SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter] ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__dict__ = self
class BaseConfig(TypedDict): class BaseConfig(TypedDict):
pass pass
class ConfigDict(BaseConfig, total=False): class ConfigDict(BaseConfig, AttrDict, total=False):
""" """
# Regenerate by pasting this quine into `archivebox shell` 🥚 # Regenerate by pasting this quine into `archivebox shell` 🥚
from archivebox.config import ConfigDict, CONFIG_DEFAULTS from archivebox.config import ConfigDict, CONFIG_DEFAULTS
@ -28,6 +32,7 @@ class ConfigDict(BaseConfig, total=False):
print(f' {key}: {Type.__name__}') print(f' {key}: {Type.__name__}')
print() print()
""" """
IS_TTY: bool IS_TTY: bool
USE_COLOR: bool USE_COLOR: bool
SHOW_PROGRESS: bool SHOW_PROGRESS: bool

View file

@ -7,6 +7,7 @@ from io import StringIO
from pathlib import Path from pathlib import Path
from contextlib import redirect_stdout from contextlib import redirect_stdout
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Dict, Any
from django.contrib import admin from django.contrib import admin
from django.db.models import Count, Q from django.db.models import Count, Q
@ -16,10 +17,12 @@ from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.conf import settings
from django import forms from django import forms
from signal_webhooks.admin import WebhookAdmin, get_webhook_model from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model
# from plugantic.admin import CustomPlugin # from plugantic.admin import CustomPlugin
from ..util import htmldecode, urldecode, ansi_to_html from ..util import htmldecode, urldecode, ansi_to_html
@ -34,16 +37,11 @@ from index.html import snapshot_icons
from logging_util import printable_filesize from logging_util import printable_filesize
from main import add, remove from main import add, remove
from extractors import archive_links from extractors import archive_links
from config import (
OUTPUT_DIR,
SNAPSHOTS_PER_PAGE,
VERSION,
VERSIONS_AVAILABLE,
CAN_UPGRADE
)
GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE} CONFIG = settings.CONFIG
GLOBAL_CONTEXT = {'VERSION': CONFIG.VERSION, 'VERSIONS_AVAILABLE': CONFIG.VERSIONS_AVAILABLE, 'CAN_UPGRADE': CONFIG.CAN_UPGRADE}
# Admin URLs # Admin URLs
# /admin/ # /admin/
@ -74,7 +72,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
return redirect(f'/admin/login/?next={request.path}') return redirect(f'/admin/login/?next={request.path}')
request.current_app = self.name request.current_app = self.name
context = { context: Dict[str, Any] = {
**self.each_context(request), **self.each_context(request),
'title': 'Add URLs', 'title': 'Add URLs',
} }
@ -92,7 +90,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
"urls": url, "urls": url,
"depth": depth, "depth": depth,
"update_all": False, "update_all": False,
"out_dir": OUTPUT_DIR, "out_dir": CONFIG.OUTPUT_DIR,
} }
add_stdout = StringIO() add_stdout = StringIO()
with redirect_stdout(add_stdout): with redirect_stdout(add_stdout):
@ -101,7 +99,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
context.update({ context.update({
"stdout": ansi_to_html(add_stdout.getvalue().strip()), "stdout": ansi_to_html(add_stdout.getvalue().strip()),
"form": AddLinkForm() "form": AddLinkForm(),
}) })
else: else:
context["form"] = form context["form"] = form
@ -118,12 +116,14 @@ archivebox_admin.disable_action('delete_selected')
# archivebox_admin.register(CustomPlugin) # archivebox_admin.register(CustomPlugin)
# patch admin with methods to add data views (implemented by admin_data_views package) # patch admin with methods to add data views (implemented by admin_data_views package)
# https://github.com/MrThearMan/django-admin-data-views
# https://mrthearman.github.io/django-admin-data-views/setup/
############### Additional sections are defined in settings.ADMIN_DATA_VIEWS ######### ############### Additional sections are defined in settings.ADMIN_DATA_VIEWS #########
from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls from admin_data_views.admin import get_app_list, admin_data_index_view, get_admin_data_urls, get_urls
archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin) archivebox_admin.get_app_list = get_app_list.__get__(archivebox_admin, ArchiveBoxAdmin)
archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) archivebox_admin.admin_data_index_view = admin_data_index_view.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) archivebox_admin.get_admin_data_urls = get_admin_data_urls.__get__(archivebox_admin, ArchiveBoxAdmin) # type: ignore
archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin) archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archivebox_admin, ArchiveBoxAdmin)
@ -146,7 +146,7 @@ class ArchiveResultInline(admin.TabularInline):
class TagInline(admin.TabularInline): class TagInline(admin.TabularInline):
model = Tag.snapshot_set.through model = Tag.snapshot_set.through # type: ignore
# fk_name = 'snapshot' # fk_name = 'snapshot'
fields = ('id', 'tag') fields = ('id', 'tag')
extra = 1 extra = 1
@ -241,7 +241,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots'] actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
autocomplete_fields = ['tags'] autocomplete_fields = ['tags']
inlines = [TagInline, ArchiveResultInline] inlines = [TagInline, ArchiveResultInline]
list_per_page = SNAPSHOTS_PER_PAGE list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
action_form = SnapshotActionForm action_form = SnapshotActionForm
@ -433,7 +433,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
# Monkey patch here plus core_tags.py # Monkey patch here plus core_tags.py
self.change_list_template = 'private_index_grid.html' self.change_list_template = 'private_index_grid.html'
self.list_per_page = SNAPSHOTS_PER_PAGE self.list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
self.list_max_show_all = self.list_per_page self.list_max_show_all = self.list_per_page
# Call monkey patched view # Call monkey patched view
@ -458,7 +458,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([ archive_links([
snapshot.as_link() snapshot.as_link()
for snapshot in queryset for snapshot in queryset
], out_dir=OUTPUT_DIR) ], out_dir=CONFIG.OUTPUT_DIR)
@admin.action( @admin.action(
description="⬇️ Title" description="⬇️ Title"
@ -467,7 +467,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([ archive_links([
snapshot.as_link() snapshot.as_link()
for snapshot in queryset for snapshot in queryset
], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR) ], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
@admin.action( @admin.action(
description="Re-Snapshot" description="Re-Snapshot"
@ -485,13 +485,13 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
archive_links([ archive_links([
snapshot.as_link() snapshot.as_link()
for snapshot in queryset for snapshot in queryset
], overwrite=True, out_dir=OUTPUT_DIR) ], overwrite=True, out_dir=CONFIG.OUTPUT_DIR)
@admin.action( @admin.action(
description="Delete" description="Delete"
) )
def delete_snapshots(self, request, queryset): def delete_snapshots(self, request, queryset):
remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR) remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
@admin.action( @admin.action(
@ -578,7 +578,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version') list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
ordering = ['-start_ts'] ordering = ['-start_ts']
list_per_page = SNAPSHOTS_PER_PAGE list_per_page = CONFIG.SNAPSHOTS_PER_PAGE
@admin.display( @admin.display(
description='Snapshot Info' description='Snapshot Info'
@ -620,7 +620,7 @@ class ArchiveResultAdmin(admin.ModelAdmin):
) )
def output_summary(self, result): def output_summary(self, result):
snapshot_dir = Path(OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1] snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html( output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>', '<pre style="display: inline-block">{}</pre><br/>',
result.output, result.output,

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
from typing import Optional, List, Dict from typing import Optional, List, Dict, Iterable
from django_stubs_ext.db.models import TypedModelMeta from django_stubs_ext.db.models import TypedModelMeta
import json import json
@ -17,10 +17,10 @@ from django.utils.text import slugify
from django.core.cache import cache from django.core.cache import cache
from django.urls import reverse, reverse_lazy from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField from django.db.models import Case, When, Value, IntegerField
from django.conf import settings
from abid_utils.models import ABIDModel, ABIDField from abid_utils.models import ABIDModel, ABIDField
from ..config import ARCHIVE_DIR, ARCHIVE_DIR_NAME
from ..system import get_dir_size from ..system import get_dir_size
from ..util import parse_date, base_url from ..util import parse_date, base_url
from ..index.schema import Link from ..index.schema import Link
@ -72,6 +72,7 @@ class Tag(ABIDModel):
slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False) slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
# slug is autoset on save from name, never set it manually # slug is autoset on save from name, never set it manually
snapshot_set: models.Manager['Snapshot']
class Meta(TypedModelMeta): class Meta(TypedModelMeta):
verbose_name = "Tag" verbose_name = "Tag"
@ -154,6 +155,8 @@ class Snapshot(ABIDModel):
keys = ('url', 'timestamp', 'title', 'tags', 'updated') keys = ('url', 'timestamp', 'title', 'tags', 'updated')
archiveresult_set: models.Manager['ArchiveResult']
@property @property
def uuid(self): def uuid(self):
return self.id return self.id
@ -246,11 +249,11 @@ class Snapshot(ABIDModel):
@cached_property @cached_property
def link_dir(self): def link_dir(self):
return str(ARCHIVE_DIR / self.timestamp) return str(settings.CONFIG.ARCHIVE_DIR / self.timestamp)
@cached_property @cached_property
def archive_path(self): def archive_path(self):
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp) return '{}/{}'.format(settings.CONFIG.ARCHIVE_DIR_NAME, self.timestamp)
@cached_property @cached_property
def archive_size(self): def archive_size(self):
@ -284,7 +287,7 @@ class Snapshot(ABIDModel):
@cached_property @cached_property
def status_code(self) -> Optional[str]: def status_code(self) -> Optional[str]:
return self.headers and self.headers.get('Status-Code') return self.headers.get('Status-Code') if self.headers else None
@cached_property @cached_property
def history(self) -> dict: def history(self) -> dict:
@ -322,7 +325,7 @@ class Snapshot(ABIDModel):
return None return None
def save_tags(self, tags: List[str]=()) -> None: def save_tags(self, tags: Iterable[str]=()) -> None:
tags_id = [] tags_id = []
for tag in tags: for tag in tags:
if tag.strip(): if tag.strip():
@ -334,17 +337,17 @@ class Snapshot(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True) -> Path: # def get_storage_dir(self, create=True, symlink=True) -> Path:
# date_str = self.added.strftime('%Y%m%d') # date_str = self.added.strftime('%Y%m%d')
# domain_str = domain(self.url) # domain_str = domain(self.url)
# abs_storage_dir = Path(ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid) # abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
# if create and not abs_storage_dir.is_dir(): # if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True) # abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink: # if symlink:
# LINK_PATHS = [ # LINK_PATHS = [
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid), # # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_id' / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_date' / date_str / domain_str / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'snapshots_by_domain' / domain_str / date_str / str(self.ulid),
# ] # ]
# for link_path in LINK_PATHS: # for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True) # link_path.parent.mkdir(parents=True, exist_ok=True)
@ -439,8 +442,8 @@ class ArchiveResult(ABIDModel):
should be used for user-facing iframe embeds of this result should be used for user-facing iframe embeds of this result
""" """
if hasattr(self.extractor_module, 'get_embed_path'): if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
return self.extractor_module.get_embed_path(self) return get_embed_path_func(self)
return self.extractor_module.get_output_path() return self.extractor_module.get_output_path()
@ -455,18 +458,18 @@ class ArchiveResult(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True): # def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.added.strftime('%Y%m%d') # date_str = self.snapshot.added.strftime('%Y%m%d')
# domain_str = domain(self.snapshot.url) # domain_str = domain(self.snapshot.url)
# abs_storage_dir = Path(ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid) # abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)
# if create and not abs_storage_dir.is_dir(): # if create and not abs_storage_dir.is_dir():
# abs_storage_dir.mkdir(parents=True, exist_ok=True) # abs_storage_dir.mkdir(parents=True, exist_ok=True)
# if symlink: # if symlink:
# LINK_PATHS = [ # LINK_PATHS = [
# Path(ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'all_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid), # # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_id' / str(self.ulid),
# # Path(ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid), # # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_date' / date_str / domain_str / self.extractor / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_domain' / domain_str / date_str / self.extractor / str(self.ulid),
# Path(ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid), # Path(settings.CONFIG.ARCHIVE_DIR).parent / 'index' / 'results_by_type' / self.extractor / date_str / domain_str / str(self.ulid),
# ] # ]
# for link_path in LINK_PATHS: # for link_path in LINK_PATHS:
# link_path.parent.mkdir(parents=True, exist_ok=True) # link_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -9,32 +9,9 @@ import tempfile
from pathlib import Path from pathlib import Path
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from ..config import ( from ..config import CONFIG
CONFIG, from ..config_stubs import AttrDict
DEBUG, assert isinstance(CONFIG, AttrDict)
SECRET_KEY,
ALLOWED_HOSTS,
PACKAGE_DIR,
TEMPLATES_DIR_NAME,
CUSTOM_TEMPLATES_DIR,
SQL_INDEX_FILENAME,
OUTPUT_DIR,
ARCHIVE_DIR,
LOGS_DIR,
CACHE_DIR,
TIMEZONE,
LDAP,
LDAP_SERVER_URI,
LDAP_BIND_DN,
LDAP_BIND_PASSWORD,
LDAP_USER_BASE,
LDAP_USER_FILTER,
LDAP_USERNAME_ATTR,
LDAP_FIRSTNAME_ATTR,
LDAP_LASTNAME_ATTR,
LDAP_EMAIL_ATTR,
)
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
@ -53,12 +30,12 @@ LOGOUT_REDIRECT_URL = os.environ.get('LOGOUT_REDIRECT_URL', '/')
PASSWORD_RESET_URL = '/accounts/password_reset/' PASSWORD_RESET_URL = '/accounts/password_reset/'
APPEND_SLASH = True APPEND_SLASH = True
DEBUG = DEBUG or ('--debug' in sys.argv) DEBUG = CONFIG.DEBUG or ('--debug' in sys.argv)
# add plugins folders to system path, and load plugins in installed_apps # add plugins folders to system path, and load plugins in installed_apps
BUILTIN_PLUGINS_DIR = PACKAGE_DIR / 'plugins' BUILTIN_PLUGINS_DIR = CONFIG.PACKAGE_DIR / 'plugins'
USER_PLUGINS_DIR = OUTPUT_DIR / 'plugins' USER_PLUGINS_DIR = CONFIG.OUTPUT_DIR / 'plugins'
sys.path.insert(0, str(BUILTIN_PLUGINS_DIR)) sys.path.insert(0, str(BUILTIN_PLUGINS_DIR))
sys.path.insert(0, str(USER_PLUGINS_DIR)) sys.path.insert(0, str(USER_PLUGINS_DIR))
@ -127,7 +104,7 @@ AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.ModelBackend', 'django.contrib.auth.backends.ModelBackend',
] ]
if LDAP: if CONFIG.LDAP:
try: try:
import ldap import ldap
from django_auth_ldap.config import LDAPSearch from django_auth_ldap.config import LDAPSearch
@ -138,23 +115,23 @@ if LDAP:
global AUTH_LDAP_USER_SEARCH global AUTH_LDAP_USER_SEARCH
global AUTH_LDAP_USER_ATTR_MAP global AUTH_LDAP_USER_ATTR_MAP
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI AUTH_LDAP_SERVER_URI = CONFIG.LDAP_SERVER_URI
AUTH_LDAP_BIND_DN = LDAP_BIND_DN AUTH_LDAP_BIND_DN = CONFIG.LDAP_BIND_DN
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD AUTH_LDAP_BIND_PASSWORD = CONFIG.LDAP_BIND_PASSWORD
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True' assert AUTH_LDAP_SERVER_URI and CONFIG.LDAP_USERNAME_ATTR and CONFIG.LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
AUTH_LDAP_USER_SEARCH = LDAPSearch( AUTH_LDAP_USER_SEARCH = LDAPSearch(
LDAP_USER_BASE, CONFIG.LDAP_USER_BASE,
ldap.SCOPE_SUBTREE, ldap.SCOPE_SUBTREE,
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')', '(&(' + CONFIG.LDAP_USERNAME_ATTR + '=%(user)s)' + CONFIG.LDAP_USER_FILTER + ')',
) )
AUTH_LDAP_USER_ATTR_MAP = { AUTH_LDAP_USER_ATTR_MAP = {
'username': LDAP_USERNAME_ATTR, 'username': CONFIG.LDAP_USERNAME_ATTR,
'first_name': LDAP_FIRSTNAME_ATTR, 'first_name': CONFIG.LDAP_FIRSTNAME_ATTR,
'last_name': LDAP_LASTNAME_ATTR, 'last_name': CONFIG.LDAP_LASTNAME_ATTR,
'email': LDAP_EMAIL_ATTR, 'email': CONFIG.LDAP_EMAIL_ATTR,
} }
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
@ -206,6 +183,15 @@ if DEBUG_TOOLBAR:
] ]
MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware'] MIDDLEWARE = [*MIDDLEWARE, 'debug_toolbar.middleware.DebugToolbarMiddleware']
if DEBUG:
from django_autotyping.typing import AutotypingSettingsDict
INSTALLED_APPS += ['django_autotyping']
AUTOTYPING: AutotypingSettingsDict = {
"STUBS_GENERATION": {
"LOCAL_STUBS_DIR": Path(CONFIG.PACKAGE_DIR) / "typings",
}
}
# https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar) # https://github.com/bensi94/Django-Requests-Tracker (improved version of django-debug-toolbar)
# Must delete archivebox/templates/admin to use because it relies on some things we override # Must delete archivebox/templates/admin to use because it relies on some things we override
@ -224,15 +210,15 @@ if DEBUG_REQUESTS_TRACKER:
STATIC_URL = '/static/' STATIC_URL = '/static/'
STATICFILES_DIRS = [ STATICFILES_DIRS = [
*([str(CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_DIR else []), *([str(CONFIG.CUSTOM_TEMPLATES_DIR / 'static')] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'static'), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'static'),
] ]
TEMPLATE_DIRS = [ TEMPLATE_DIRS = [
*([str(CUSTOM_TEMPLATES_DIR)] if CUSTOM_TEMPLATES_DIR else []), *([str(CONFIG.CUSTOM_TEMPLATES_DIR)] if CONFIG.CUSTOM_TEMPLATES_DIR else []),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'core'), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'core'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME / 'admin'), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME / 'admin'),
str(Path(PACKAGE_DIR) / TEMPLATES_DIR_NAME), str(Path(CONFIG.PACKAGE_DIR) / CONFIG.TEMPLATES_DIR_NAME),
] ]
TEMPLATES = [ TEMPLATES = [
@ -258,10 +244,10 @@ TEMPLATES = [
CACHE_DB_FILENAME = 'cache.sqlite3' CACHE_DB_FILENAME = 'cache.sqlite3'
CACHE_DB_PATH = CACHE_DIR / CACHE_DB_FILENAME CACHE_DB_PATH = CONFIG.CACHE_DIR / CACHE_DB_FILENAME
CACHE_DB_TABLE = 'django_cache' CACHE_DB_TABLE = 'django_cache'
DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME DATABASE_FILE = Path(CONFIG.OUTPUT_DIR) / CONFIG.SQL_INDEX_FILENAME
DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE)) DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", str(DATABASE_FILE))
DATABASES = { DATABASES = {
@ -272,7 +258,7 @@ DATABASES = {
'timeout': 60, 'timeout': 60,
'check_same_thread': False, 'check_same_thread': False,
}, },
'TIME_ZONE': TIMEZONE, 'TIME_ZONE': CONFIG.TIMEZONE,
# DB setup is sometimes modified at runtime by setup_django() in config.py # DB setup is sometimes modified at runtime by setup_django() in config.py
}, },
# 'cache': { # 'cache': {
@ -282,7 +268,7 @@ DATABASES = {
# 'timeout': 60, # 'timeout': 60,
# 'check_same_thread': False, # 'check_same_thread': False,
# }, # },
# 'TIME_ZONE': TIMEZONE, # 'TIME_ZONE': CONFIG.TIMEZONE,
# }, # },
} }
MIGRATION_MODULES = {'signal_webhooks': None} MIGRATION_MODULES = {'signal_webhooks': None}
@ -312,7 +298,7 @@ STORAGES = {
"BACKEND": "django.core.files.storage.FileSystemStorage", "BACKEND": "django.core.files.storage.FileSystemStorage",
"OPTIONS": { "OPTIONS": {
"base_url": "/archive/", "base_url": "/archive/",
"location": ARCHIVE_DIR, "location": CONFIG.ARCHIVE_DIR,
}, },
}, },
# "personas": { # "personas": {
@ -328,9 +314,9 @@ STORAGES = {
### Security Settings ### Security Settings
################################################################################ ################################################################################
SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_') SECRET_KEY = CONFIG.SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789_')
ALLOWED_HOSTS = ALLOWED_HOSTS.split(',') ALLOWED_HOSTS = CONFIG.ALLOWED_HOSTS.split(',')
SECURE_BROWSER_XSS_FILTER = True SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True SECURE_CONTENT_TYPE_NOSNIFF = True
@ -361,7 +347,7 @@ SHELL_PLUS_PRINT_SQL = False
IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner'] IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell' IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
if IS_SHELL: if IS_SHELL:
os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py') os.environ['PYTHONSTARTUP'] = str(Path(CONFIG.PACKAGE_DIR) / 'core' / 'welcome_message.py')
################################################################################ ################################################################################
@ -373,10 +359,10 @@ USE_I18N = True
USE_TZ = True USE_TZ = True
DATETIME_FORMAT = 'Y-m-d g:iA' DATETIME_FORMAT = 'Y-m-d g:iA'
SHORT_DATETIME_FORMAT = 'Y-m-d h:iA' SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
TIME_ZONE = TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent TIME_ZONE = CONFIG.TIMEZONE # django convention is TIME_ZONE, archivebox config uses TIMEZONE, they are equivalent
from django.conf.locale.en import formats as en_formats from django.conf.locale.en import formats as en_formats # type: ignore
en_formats.DATETIME_FORMAT = DATETIME_FORMAT en_formats.DATETIME_FORMAT = DATETIME_FORMAT
en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT
@ -410,8 +396,8 @@ class NoisyRequestsFilter(logging.Filter):
return 1 return 1
if LOGS_DIR.exists(): if CONFIG.LOGS_DIR.exists():
ERROR_LOG = (LOGS_DIR / 'errors.log') ERROR_LOG = (CONFIG.LOGS_DIR / 'errors.log')
else: else:
# historically too many edge cases here around creating log dir w/ correct permissions early on # historically too many edge cases here around creating log dir w/ correct permissions early on
# if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr # if there's an issue on startup, we trash the log and let user figure it out via stdout/stderr

View file

@ -46,7 +46,7 @@ urlpatterns = [
# path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django # path('jet_api/', include('jet_django.urls')), Enable to use https://www.jetadmin.io/integrations/django
path('index.html', RedirectView.as_view(url='/')), path('index.html', RedirectView.as_view(url='/')),
path('index.json', static.serve, {'document_root': settings.OUTPUT_DIR, 'path': 'index.json'}), path('index.json', static.serve, {'document_root': settings.CONFIG.OUTPUT_DIR, 'path': 'index.json'}),
path('', HomepageView.as_view(), name='Home'), path('', HomepageView.as_view(), name='Home'),
] ]
urlpatterns += staticfiles_urlpatterns() urlpatterns += staticfiles_urlpatterns()

View file

@ -11,27 +11,18 @@ from ..util import (
domain, domain,
dedupe, dedupe,
) )
from ..config import ( from ..config import CONFIG
TIMEOUT,
SAVE_FAVICON,
FAVICON_PROVIDER,
CURL_BINARY,
CURL_ARGS,
CURL_EXTRA_ARGS,
CURL_VERSION,
CHECK_SSL_VALIDITY,
CURL_USER_AGENT,
)
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
@enforce_types @enforce_types
def should_save_favicon(link: Link, out_dir: Optional[str]=None, overwrite: Optional[bool]=False) -> bool: def should_save_favicon(link: Link, out_dir: str | Path | None=None, overwrite: bool=False) -> bool:
out_dir = out_dir or Path(link.link_dir) assert link.link_dir
out_dir = Path(out_dir or link.link_dir)
if not overwrite and (out_dir / 'favicon.ico').exists(): if not overwrite and (out_dir / 'favicon.ico').exists():
return False return False
return SAVE_FAVICON return CONFIG.SAVE_FAVICON
@enforce_types @enforce_types
def get_output_path(): def get_output_path():
@ -39,24 +30,26 @@ def get_output_path():
@enforce_types @enforce_types
def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: def save_favicon(link: Link, out_dir: str | Path | None=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download site favicon from google's favicon api""" """download site favicon from google's favicon api"""
out_dir = out_dir or link.link_dir out_dir = Path(out_dir or link.link_dir)
assert out_dir.exists()
output: ArchiveOutput = 'favicon.ico' output: ArchiveOutput = 'favicon.ico'
# later options take precedence # later options take precedence
options = [ options = [
*CURL_ARGS, *CONFIG.CURL_ARGS,
*CURL_EXTRA_ARGS, *CONFIG.CURL_EXTRA_ARGS,
'--max-time', str(timeout), '--max-time', str(timeout),
'--output', str(output), '--output', str(output),
*(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []), *(['--user-agent', '{}'.format(CONFIG.CURL_USER_AGENT)] if CONFIG.CURL_USER_AGENT else []),
*([] if CHECK_SSL_VALIDITY else ['--insecure']), *([] if CONFIG.CHECK_SSL_VALIDITY else ['--insecure']),
] ]
cmd = [ cmd = [
CURL_BINARY, CONFIG.CURL_BINARY,
*dedupe(options), *dedupe(options),
FAVICON_PROVIDER.format(domain(link.url)), CONFIG.FAVICON_PROVIDER.format(domain(link.url)),
] ]
status = 'failed' status = 'failed'
timer = TimedProgress(timeout, prefix=' ') timer = TimedProgress(timeout, prefix=' ')
@ -72,7 +65,7 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
return ArchiveResult( return ArchiveResult(
cmd=cmd, cmd=cmd,
pwd=str(out_dir), pwd=str(out_dir),
cmd_version=CURL_VERSION, cmd_version=CONFIG.CURL_VERSION,
output=output, output=output,
status=status, status=status,
**timer.stats, **timer.stats,

View file

@ -14,15 +14,7 @@ from ..util import (
without_query, without_query,
without_fragment, without_fragment,
) )
from ..config import ( from ..config import CONFIG
TIMEOUT,
SAVE_GIT,
GIT_BINARY,
GIT_ARGS,
GIT_VERSION,
GIT_DOMAINS,
CHECK_SSL_VALIDITY
)
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
@ -50,17 +42,17 @@ def should_save_git(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
return False return False
is_clonable_url = ( is_clonable_url = (
(domain(link.url) in GIT_DOMAINS) (domain(link.url) in CONFIG.GIT_DOMAINS)
or (extension(link.url) == 'git') or (extension(link.url) == 'git')
) )
if not is_clonable_url: if not is_clonable_url:
return False return False
return SAVE_GIT return CONFIG.SAVE_GIT
@enforce_types @enforce_types
def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=CONFIG.TIMEOUT) -> ArchiveResult:
"""download full site using git""" """download full site using git"""
out_dir = out_dir or Path(link.link_dir) out_dir = out_dir or Path(link.link_dir)
@ -68,10 +60,10 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
output_path = out_dir / output output_path = out_dir / output
output_path.mkdir(exist_ok=True) output_path.mkdir(exist_ok=True)
cmd = [ cmd = [
GIT_BINARY, CONFIG.GIT_BINARY,
'clone', 'clone',
*GIT_ARGS, *CONFIG.GIT_ARGS,
*([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']), *([] if CONFIG.CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
without_query(without_fragment(link.url)), without_query(without_fragment(link.url)),
] ]
status = 'succeeded' status = 'succeeded'
@ -96,7 +88,7 @@ def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
return ArchiveResult( return ArchiveResult(
cmd=cmd, cmd=cmd,
pwd=str(out_dir), pwd=str(out_dir),
cmd_version=GIT_VERSION, cmd_version=CONFIG.GIT_VERSION,
output=output, output=output,
status=status, status=status,
**timer.stats, **timer.stats,

View file

@ -192,7 +192,7 @@ class Link:
if extended: if extended:
info.update({ info.update({
'snapshot_id': self.snapshot_id, 'snapshot_id': self.snapshot_id,
'snapshot_uuid': self.snapshot_uuid, 'snapshot_old_id': self.snapshot_old_id,
'snapshot_abid': self.snapshot_abid, 'snapshot_abid': self.snapshot_abid,
'link_dir': self.link_dir, 'link_dir': self.link_dir,
@ -266,15 +266,15 @@ class Link:
@cached_property @cached_property
def snapshot(self): def snapshot(self):
from core.models import Snapshot from core.models import Snapshot
return Snapshot.objects.only('id').get(url=self.url) return Snapshot.objects.only('id', 'old_id', 'abid').get(url=self.url)
@cached_property @cached_property
def snapshot_id(self): def snapshot_id(self):
return str(self.snapshot.pk) return str(self.snapshot.pk)
@cached_property @cached_property
def snapshot_uuid(self): def snapshot_old_id(self):
return str(self.snapshot.id) return str(self.snapshot.old_id)
@cached_property @cached_property
def snapshot_abid(self): def snapshot_abid(self):

View file

@ -7,7 +7,9 @@ if __name__ == '__main__':
# versions of ./manage.py commands whenever possible. When that's not possible # versions of ./manage.py commands whenever possible. When that's not possible
# (e.g. makemigrations), you can comment out this check temporarily # (e.g. makemigrations), you can comment out this check temporarily
if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv or 'squashmigrations' in sys.argv): allowed_commands = ['makemigrations', 'migrate', 'startapp','squashmigrations', 'generate_stubs']
if not any(cmd in sys.argv for cmd in allowed_commands):
print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):") print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):")
print() print()
print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:') print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:')