diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py index f74493fc..7aecb592 100644 --- a/archivebox/abid_utils/admin.py +++ b/archivebox/abid_utils/admin.py @@ -9,11 +9,13 @@ from django.utils.html import format_html from django.utils.safestring import mark_safe from django.shortcuts import redirect -from .abid import ABID +from django_object_actions import DjangoObjectActions, action + from api.auth import get_or_create_api_token from ..util import parse_date +from .abid import ABID def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color_same: str | None=None, color_diff: str | None=None): """highlight each character in red that differs with the char at the same index in compare_val""" @@ -39,22 +41,26 @@ def get_abid_info(self, obj, request=None): try: #abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅' - fresh_abid = ABID(**obj.ABID_FRESH_HASHES) + fresh_values = obj.ABID_FRESH_VALUES + fresh_hashes = obj.ABID_FRESH_HASHES + fresh_diffs = obj.ABID_FRESH_DIFFS + fresh_abid = ABID(**fresh_hashes) + fresh_abid_diff = f'❌ !=   .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)}' if str(fresh_abid) != str(obj.ABID) else '✅' fresh_uuid_diff = f'❌ !=   .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅' id_pk_diff = f'❌ != .pk: {highlight_diff(obj.pk, obj.id)}' if str(obj.pk) != str(obj.id) else '✅' - fresh_ts = parse_date(obj.ABID_FRESH_VALUES['ts']) or None - ts_diff = f'❌ != {highlight_diff( obj.ABID_FRESH_HASHES["ts"], obj.ABID.ts)}' if obj.ABID_FRESH_HASHES["ts"] != obj.ABID.ts else '✅' + fresh_ts = parse_date(fresh_values['ts']) or None + ts_diff = f'❌ != {highlight_diff( fresh_hashes["ts"], obj.ABID.ts)}' if fresh_hashes["ts"] != obj.ABID.ts else '✅' - derived_uri = obj.ABID_FRESH_HASHES['uri'] + derived_uri = fresh_hashes['uri'] uri_diff = f'❌ != {highlight_diff(derived_uri, obj.ABID.uri)}' if derived_uri != obj.ABID.uri else '✅' - derived_subtype = obj.ABID_FRESH_HASHES['subtype'] + derived_subtype = fresh_hashes['subtype'] subtype_diff = f'❌ != {highlight_diff(derived_subtype, obj.ABID.subtype)}' if derived_subtype != obj.ABID.subtype else '✅' - derived_rand = obj.ABID_FRESH_HASHES['rand'] + derived_rand = fresh_hashes['rand'] rand_diff = f'❌ != {highlight_diff(derived_rand, obj.ABID.rand)}' if derived_rand != obj.ABID.rand else '✅' return format_html( @@ -72,7 +78,7 @@ def get_abid_info(self, obj, request=None):     SUBTYPE:       {}           {}                           {} {}: {}
    RAND:             {}       {}                 {} {}: {}

- {} {} + {} {} {} ''', obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url, @@ -81,23 +87,27 @@ def get_abid_info(self, obj, request=None): highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff), # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff), # str(fresh_abid), mark_safe(fresh_abid_diff), - highlight_diff(obj.ABID.ts, obj.ABID_FRESH_HASHES['ts']), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, fresh_ts and fresh_ts.isoformat(), - highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.ABID_FRESH_VALUES['uri']), - highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.ABID_FRESH_VALUES['subtype']), - highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.ABID_FRESH_VALUES['rand'])[-7:], - f'Some values the ABID depends on have changed since the ABID was issued:' if obj.ABID_FRESH_DIFFS else '', - ", ".join(diff['abid_src'] for diff in obj.ABID_FRESH_DIFFS.values()), + highlight_diff(obj.ABID.ts, fresh_hashes['ts']), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, fresh_ts and fresh_ts.isoformat(), + highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(fresh_values['uri']), + highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(fresh_values['subtype']), + highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(fresh_values['rand'])[-7:], + 'Some values the ABID depends on have changed since the ABID was issued:' if fresh_diffs else '', + ", ".join(diff['abid_src'] for diff in fresh_diffs.values()), + '(clicking "Regenerate ABID" in the upper right will assign a new ABID, breaking any external references to the old ABID)' if fresh_diffs else '', ) except Exception as e: # import ipdb; ipdb.set_trace() return str(e) -class ABIDModelAdmin(admin.ModelAdmin): +class ABIDModelAdmin(DjangoObjectActions, admin.ModelAdmin): list_display = ('created_at', 'created_by', 'abid') sort_fields = ('created_at', 'created_by', 'abid') readonly_fields = ('created_at', 'modified_at', 'abid_info') # fields = [*readonly_fields] + + change_actions = ("regenerate_abid",) + # changelist_actions = ("regenerate_abid",) def _get_obj_does_not_exist_redirect(self, request, opts, object_id): try: @@ -120,11 +130,17 @@ class ABIDModelAdmin(admin.ModelAdmin): form = super().get_form(request, obj, **kwargs) if 'created_by' in form.base_fields: form.base_fields['created_by'].initial = request.user + + if obj: + if obj.ABID_FRESH_DIFFS: + messages.warning(request, "The ABID is not in sync with the object! See the API Identifiers section below for more info...") + return form def get_formset(self, request, formset=None, obj=None, **kwargs): formset = super().get_formset(request, formset, obj, **kwargs) formset.form.base_fields['created_at'].disabled = True + return formset def save_model(self, request, obj, form, change): @@ -143,3 +159,16 @@ class ABIDModelAdmin(admin.ModelAdmin): @admin.display(description='API Identifiers') def abid_info(self, obj): return get_abid_info(self, obj, request=self.request) + + @action(label="Regenerate ABID", description="Re-Generate the ABID based on fresh values") + def regenerate_abid(self, request, obj): + old_abid = str(obj.abid) + obj.abid = obj.issue_new_abid(overwrite=True) + obj.save() + obj.refresh_from_db() + new_abid = str(obj.abid) + + if new_abid != old_abid: + messages.warning(request, f"The object's ABID has been updated! {old_abid} -> {new_abid} (any external references to the old ABID will need to be updated manually)") + else: + messages.success(request, "The ABID was not regenerated, it is already up-to-date with the object.") diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 00a0df39..66e2f72f 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -2,12 +2,10 @@ This file provides the Django ABIDField and ABIDModel base model to inherit from. """ -from typing import Any, Dict, Union, List, Set, NamedTuple, cast -from ulid import ULID -from uuid import uuid4, UUID -from typeid import TypeID # type: ignore[import-untyped] -from datetime import datetime, timedelta +from typing import Any, Dict, Union, List, Set, cast + +from uuid import uuid4 from functools import partial from charidfield import CharIDField # type: ignore[import-untyped] @@ -30,7 +28,6 @@ from .abid import ( DEFAULT_ABID_URI_SALT, abid_part_from_prefix, abid_hashes_from_values, - abid_from_values, ts_from_abid, abid_part_from_ts, ) @@ -119,6 +116,7 @@ class ABIDModel(models.Model): # otherwise if updating, make sure none of the field changes would invalidate existing ABID abid_diffs = self.ABID_FRESH_DIFFS if abid_diffs: + # change has invalidated the existing ABID, raise a nice ValidationError pointing out which fields caused the issue keys_changed = ', '.join(diff['abid_src'] for diff in abid_diffs.values()) full_summary = ( @@ -142,16 +140,15 @@ class ABIDModel(models.Model): NON_FIELD_ERRORS: ValidationError(full_summary), }) - should_ovewrite_abid = self.abid_drift_allowed if (abid_drift_allowed is None) else abid_drift_allowed - if should_ovewrite_abid: - print(f'\n#### DANGER: Changing ABID of existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed}), this will break any references to its previous ABID!') + allowed_to_invalidate_abid = self.abid_drift_allowed if (abid_drift_allowed is None) else abid_drift_allowed + if allowed_to_invalidate_abid: + print(f'\n#### WARNING: Change allowed despite it invalidating the ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})!', self.abid) print(change_error) - self._previous_abid = self.abid - self.abid = str(self.issue_new_abid(force_new=True)) - print(f'#### DANGER: OVERWROTE OLD ABID. NEW ABID=', self.abid) + print('--------------------------------------------------------------------------------------------------') else: - print(f'\n#### WARNING: ABID of existing record is outdated and has not been updated ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})') + print(f'\n#### ERROR: Change blocked because it would invalidate ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})', self.abid) print(change_error) + print('--------------------------------------------------------------------------------------------------') raise change_error def save(self, *args: Any, abid_drift_allowed: bool | None=None, **kwargs: Any) -> None: @@ -230,11 +227,11 @@ class ABIDModel(models.Model): if getattr(existing_abid, key) != new_hash } - def issue_new_abid(self, force_new=False) -> ABID: + def issue_new_abid(self, overwrite=False) -> ABID: """ Issue a new ABID based on the current object's properties, can only be called once on new objects (before they are saved to DB). """ - if not force_new: + if not overwrite: assert self._state.adding, 'Can only issue new ABID when model._state.adding is True' assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})' diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 7a975b38..89e6f7c9 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -131,7 +131,7 @@ class Snapshot(ABIDModel): abid_uri_src = 'self.url' abid_subtype_src = '"01"' abid_rand_src = 'self.id' - abid_drift_allowed = False + abid_drift_allowed = True id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 02ec7d56..738be0f0 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -6,9 +6,11 @@ import re import logging import inspect import tempfile -from typing import Any, Dict +from typing import Dict from pathlib import Path + +import django from django.utils.crypto import get_random_string from ..config import CONFIG @@ -89,8 +91,9 @@ INSTALLED_APPS = [ 'django.contrib.admin', # 3rd-party apps from PyPI - 'django_jsonform', # handles rendering Pydantic models to Django HTML widgets/forms - 'signal_webhooks', # handles REST API outbound webhooks + 'django_jsonform', # handles rendering Pydantic models to Django HTML widgets/forms https://github.com/bhch/django-jsonform + 'signal_webhooks', # handles REST API outbound webhooks https://github.com/MrThearMan/django-signal-webhooks + 'django_object_actions', # provides easy Django Admin action buttons on change views https://github.com/crccheck/django-object-actions # our own apps 'abid_utils', # handles ABID ID creation, handling, and models @@ -384,15 +387,11 @@ class NoisyRequestsFilter(logging.Filter): return True -def add_extra_logging_attrs(record): - record.username = '' - try: - record.username = record.request.user.username - except AttributeError: - record.username = "Anonymous" - if hasattr(record, 'request'): - import ipdb; ipdb.set_trace() - return True + +class CustomOutboundWebhookLogFormatter(logging.Formatter): + def format(self, record): + result = super().format(record) + return result.replace('HTTP Request: ', 'OutboundWebhook: ') ERROR_LOG = tempfile.NamedTemporaryFile().name @@ -416,21 +415,13 @@ LOGGING = { "disable_existing_loggers": False, "formatters": { "rich": { - "datefmt": "[%X]", + "datefmt": "[%Y-%m-%d %H:%M:%S]", # "format": "{asctime} {levelname} {module} {name} {message} {username}", - # "format": "%(message)s (user=%(username)s", + "format": "%(name)s %(message)s", }, - "verbose": { - "style": "{", - }, - "simple": { - "format": "{name} {message}", - "style": "{", - }, - "django.server": { - "()": "django.utils.log.ServerFormatter", - # "format": "{message} (user={username})", - "style": "{", + "outbound_webhooks": { + "()": CustomOutboundWebhookLogFormatter, + "datefmt": "[%Y-%m-%d %H:%M:%S]", }, }, "filters": { @@ -443,10 +434,6 @@ LOGGING = { "require_debug_true": { "()": "django.utils.log.RequireDebugTrue", }, - # "add_extra_logging_attrs": { - # "()": "django.utils.log.CallbackFilter", - # "callback": add_extra_logging_attrs, - # }, }, "handlers": { # "console": { @@ -455,7 +442,7 @@ LOGGING = { # "class": "logging.StreamHandler", # 'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'], # }, - "console": { + "default": { "class": "rich.logging.RichHandler", "formatter": "rich", "level": "DEBUG", @@ -463,19 +450,25 @@ LOGGING = { "rich_tracebacks": True, "filters": ["noisyrequestsfilter"], "tracebacks_suppress": [ + django, pydantic, - django.template, ], }, "logfile": { - "level": "ERROR", + "level": "INFO", "class": "logging.handlers.RotatingFileHandler", "filename": ERROR_LOG, "maxBytes": 1024 * 1024 * 25, # 25 MB "backupCount": 10, - "formatter": "verbose", + "formatter": "rich", "filters": ["noisyrequestsfilter"], }, + "outbound_webhooks": { + "class": "rich.logging.RichHandler", + "markup": False, + "rich_tracebacks": True, + "formatter": "outbound_webhooks", + }, # "mail_admins": { # "level": "ERROR", # "filters": ["require_debug_false"], @@ -486,29 +479,35 @@ LOGGING = { }, }, "root": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "INFO", - "formatter": "verbose", + "formatter": "rich", }, "loggers": { "api": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "DEBUG", }, "checks": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "DEBUG", }, "core": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "DEBUG", }, "builtin_plugins": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "DEBUG", }, + "httpx": { + "handlers": ["outbound_webhooks"], + "level": "INFO", + "formatter": "outbound_webhooks", + "propagate": False, + }, "django": { - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "INFO", "filters": ["noisyrequestsfilter"], }, @@ -518,29 +517,27 @@ LOGGING = { "level": "ERROR", }, "django.channels.server": { + # see archivebox.monkey_patches.ModifiedAccessLogGenerator for dedicated daphne server logging settings "propagate": False, - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "INFO", "filters": ["noisyrequestsfilter"], - "formatter": "django.server", }, "django.server": { # logs all requests (2xx, 3xx, 4xx) "propagate": False, - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "INFO", "filters": ["noisyrequestsfilter"], - "formatter": "django.server", }, "django.request": { # only logs 4xx and 5xx errors "propagate": False, - "handlers": ["console", "logfile"], + "handlers": ["default", "logfile"], "level": "INFO", "filters": ["noisyrequestsfilter"], - "formatter": "django.server", }, "django.db.backends": { "propagate": False, - "handlers": ["console"], + "handlers": ["default"], "level": LOG_LEVEL_DATABASE, }, }, diff --git a/archivebox/monkey_patches.py b/archivebox/monkey_patches.py index 8f19f915..599cff00 100644 --- a/archivebox/monkey_patches.py +++ b/archivebox/monkey_patches.py @@ -21,3 +21,32 @@ timezone.utc = datetime.timezone.utc from rich.traceback import install install(show_locals=True) + + +from daphne import access + +class ModifiedAccessLogGenerator(access.AccessLogGenerator): + """Clutge workaround until daphne uses the Python logging framework. https://github.com/django/daphne/pull/473/files""" + + def write_entry(self, host, date, request, status=None, length=None, ident=None, user=None): + + # Ignore noisy requests to staticfiles / favicons / etc. + if 'GET /static/' in request: + return + if 'GET /admin/jsi18n/' in request: + return + if request.endswith("/favicon.ico") or request.endswith("/robots.txt") or request.endswith("/screenshot.png"): + return + + # clean up the log format to mostly match the same format as django.conf.settings.LOGGING rich formats + self.stream.write( + "[%s] HTTP %s (%s) %s\n" + % ( + date.strftime("%Y-%m-%d %H:%M:%S"), + request, + status or "-", + "localhost" if host.startswith("127.") else host.split(":")[0], + ) + ) + +access.AccessLogGenerator.write_entry = ModifiedAccessLogGenerator.write_entry diff --git a/pdm.lock b/pdm.lock index 82dd26c1..9d4ca81b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "ldap", "sonic"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:f940c4c0a330b7b0bcff68a006b29ea3b1292ad6aadd3cfc909de0622f2963ac" +content_hash = "sha256:61d53c8fbfcdaaf18e04d7aab12887caf9260b803db7e5b66a22e37b88824c55" [[metadata.targets]] requires_python = "==3.10.*" @@ -456,6 +456,18 @@ files = [ {file = "django_ninja-1.3.0.tar.gz", hash = "sha256:5b320e2dc0f41a6032bfa7e1ebc33559ae1e911a426f0c6be6674a50b20819be"}, ] +[[package]] +name = "django-object-actions" +version = "4.2.0" +requires_python = ">=3.7,<4.0" +summary = "A Django app for adding object tools for models in the admin" +groups = ["default"] +marker = "python_version == \"3.10\"" +files = [ + {file = "django_object_actions-4.2.0-py3-none-any.whl", hash = "sha256:ae0df9984c68a4f42f219a391b71fa0630fe44a2983b39b8064378ebddcff30c"}, + {file = "django_object_actions-4.2.0.tar.gz", hash = "sha256:e24befedf01b6fcdccbb03c33c0e2c855fd1a88f352a66dc7e2170ba31e80128"}, +] + [[package]] name = "django-pydantic-field" version = "0.3.10" diff --git a/pyproject.toml b/pyproject.toml index 4667e61f..60b41060 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "base32-crockford==0.3.0", "rich>=13.8.0", "channels[daphne]>=4.1.0", + "django-object-actions>=4.2.0", ] homepage = "https://github.com/ArchiveBox/ArchiveBox"