diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py index 062e372f..bd97b60d 100644 --- a/archivebox/abid_utils/admin.py +++ b/archivebox/abid_utils/admin.py @@ -1,39 +1,88 @@ +__package__ = 'archivebox.abid_utils' from django.contrib import admin from datetime import datetime from django.utils.html import format_html +from django.utils.safestring import mark_safe + +from abid_utils.abid import abid_part_from_ts, abid_part_from_uri, abid_part_from_rand, abid_part_from_subtype from api.auth import get_or_create_api_token +from ..util import parse_date + +def highlight_diff(display_val, compare_val): + """highlight each character in red that differs with the char at the same index in compare_val""" + + display_val = str(display_val) + compare_val = str(compare_val) + + diff_chars = mark_safe('').join( + format_html('{}', display_val[i]) + if display_val[i] != compare_val[i] else + format_html('{}', display_val[i]) + for i in range(len(display_val)) + ) + return diff_chars + def get_abid_info(self, obj, request=None): try: + abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅' + + fresh_abid = obj.generate_abid() + fresh_abid_diff = f' !=   .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅' + fresh_uuid_diff = f' !=   .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅' + + id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅' + id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅' + id_pk_diff = f' != .pk: {highlight_diff(obj.pk, obj.id)} ❌' if str(obj.pk) != str(obj.id) else ' == .pk ✅' + + source_ts_val = parse_date(obj.abid_values['ts']) or None + derived_ts = abid_part_from_ts(source_ts_val) if source_ts_val else None + ts_diff = f'!= {highlight_diff(derived_ts, obj.ABID.ts)} ❌' if derived_ts != obj.ABID.ts else '✅' + + derived_uri = abid_part_from_uri(obj.abid_values['uri']) + uri_diff = f'!= {highlight_diff(derived_uri, obj.ABID.uri)} ❌' if derived_uri != obj.ABID.uri else '✅' + + derived_subtype = abid_part_from_subtype(obj.abid_values['subtype']) + subtype_diff = f'!= {highlight_diff(derived_subtype, obj.ABID.subtype)} ❌' if derived_subtype != obj.ABID.subtype else '✅' + + derived_rand = abid_part_from_rand(obj.abid_values['rand']) + rand_diff = f'!= {highlight_diff(derived_rand, obj.ABID.rand)} ❌' if derived_rand != obj.ABID.rand else '✅' + + # any_abid_discrepancies = any( + # '❌' in diff or '!=' in diff + # for diff in (abid_diff, fresh_abid_diff, id_abid_diff, id_pk_diff, ts_diff, uri_diff, subtype_diff, rand_diff) + # ) + # total_diff = f' != .generate_abid() -> {fresh_abid} ❌' if any_abid_discrepancies else '✅' + return format_html( # URL Hash: {}
''' {}     📖 API DOCS

-     .abid:                   {}
-     .abid.uuid:           {}
-     .id:                       {}
+     .abid:                   {}                 {}
+     .abid.uuid:           {}     {}
+     .id:                       {}     {}

-     TS:                  {}   {}        {}: {}
-     URI:                 {}     {}           {}: {} -   SALT:   {}
-     SUBTYPE:       {}           {}                           {}: {}
-     RAND:             {}       {}                 {}: {} +     TS:                  {}   {}        {} {}: {}
+     URI:                 {}     {}           {} {}: {}
+     SUBTYPE:       {}           {}                           {} {}: {}
+     RAND:             {}       {}                 {} {}: {}

    .old_id:                {}
''', obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url, - str(obj.abid), - str(obj.ABID.uuid), - str(obj.id), - obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'], - obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']), - obj.ABID.uri_salt, - obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']), - obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:], + str(obj.abid), mark_safe(fresh_abid_diff), + str(obj.ABID.uuid), mark_safe(fresh_uuid_diff), + str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff), + # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff), + # str(fresh_abid), mark_safe(fresh_abid_diff), + obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(), + obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']), + obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']), + obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:], str(getattr(obj, 'old_id', '')), ) except Exception as e: diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 6d933993..7bdd89a8 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -89,17 +89,25 @@ class ABIDModel(models.Model): abstract = True def save(self, *args: Any, **kwargs: Any) -> None: - if self._state.adding or not self.created: - self.created = timezone.now() + self.created = self.created or timezone.now() - # when first creating a row, self.ABID is the source of truth - # overwrite default prefilled self.id & self.abid with generated self.ABID value - if self._state.adding or not self.id: + assert all(val for val in self.abid_values.values()), f'All ABID src values must be set: {self.abid_values}' + + if self._state.adding: self.id = self.ABID.uuid - if self._state.adding or not self.abid: self.abid = str(self.ABID) + else: + assert self.id, 'id must be set when object exists in DB' + if not self.abid: + self.abid = str(self.ABID) + # assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}' + + # fresh_abid = self.generate_abid() + # if str(fresh_abid) != str(self.abid): + # self.abid = str(fresh_abid) + + return super().save(*args, **kwargs) - super().save(*args, **kwargs) assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}' assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}' assert str(self.uuid) == str(self.ABID.uuid), f'self.uuid ({self.uuid}) does not match .ABID.uuid ({self.ABID.uuid})' diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 885e18ed..61323a84 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -18,6 +18,7 @@ from django.utils.html import format_html from django.utils.safestring import mark_safe from django.shortcuts import render, redirect from django.contrib.auth import get_user_model +from django.contrib.auth.admin import UserAdmin from django.core.paginator import Paginator from django.core.exceptions import ValidationError from django.conf import settings @@ -112,8 +113,84 @@ class ArchiveBoxAdmin(admin.AdminSite): return render(template_name='add.html', request=request, context=context) +class CustomUserAdmin(UserAdmin): + sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined'] + list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined'] + readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set') + fieldsets = [*UserAdmin.fieldsets, ('Data', {'fields': readonly_fields})] + + @admin.display(description='Snapshots') + def snapshot_set(self, obj): + total_count = obj.snapshot_set.count() + return mark_safe('
'.join( + format_html( + '[{}] 📅 {} {}', + snap.pk, + snap.abid, + snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', + snap.url[:64], + ) + for snap in obj.snapshot_set.order_by('-modified')[:10] + ) + f'
{total_count} total records...') + + @admin.display(description='Archive Result Logs') + def archiveresult_set(self, obj): + total_count = obj.archiveresult_set.count() + return mark_safe('
'.join( + format_html( + '
[{}] 📅 {} 📄 {} {}', + result.pk, + result.abid, + result.snapshot.updated.strftime('%Y-%m-%d %H:%M') if result.snapshot.updated else 'pending...', + result.extractor, + result.snapshot.url[:64], + ) + for result in obj.archiveresult_set.order_by('-modified')[:10] + ) + f'
{total_count} total records...') + + @admin.display(description='Tags') + def tag_set(self, obj): + total_count = obj.tag_set.count() + return mark_safe(', '.join( + format_html( + '{}', + tag.pk, + tag.name, + ) + for tag in obj.tag_set.order_by('-modified')[:10] + ) + f'
{total_count} total records...') + + @admin.display(description='API Tokens') + def apitoken_set(self, obj): + total_count = obj.apitoken_set.count() + return mark_safe('
'.join( + format_html( + '
[{}] {} (expires {})', + apitoken.pk, + apitoken.abid, + apitoken.token_redacted[:64], + apitoken.expires, + ) + for apitoken in obj.apitoken_set.order_by('-modified')[:10] + ) + f'
{total_count} total records...') + + @admin.display(description='API Outbound Webhooks') + def outboundwebhook_set(self, obj): + total_count = obj.outboundwebhook_set.count() + return mark_safe('
'.join( + format_html( + '
[{}] {} -> {}', + outboundwebhook.pk, + outboundwebhook.abid, + outboundwebhook.referenced_model, + outboundwebhook.endpoint, + ) + for outboundwebhook in obj.outboundwebhook_set.order_by('-modified')[:10] + ) + f'
{total_count} total records...') + + archivebox_admin = ArchiveBoxAdmin() -archivebox_admin.register(get_user_model()) +archivebox_admin.register(get_user_model(), CustomUserAdmin) archivebox_admin.disable_action('delete_selected') # archivebox_admin.register(CustomPlugin) @@ -576,8 +653,9 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): @admin.register(Tag, site=archivebox_admin) class TagAdmin(ABIDModelAdmin): list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots') + list_filter = ('created', 'created_by') sort_fields = ('name', 'slug', 'abid', 'created_by', 'created') - readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots') + readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'snapshots') search_fields = ('abid', 'name', 'slug') fields = ('name', 'created_by', *readonly_fields) actions = ['delete_selected'] @@ -603,7 +681,7 @@ class TagAdmin(ABIDModelAdmin): snap.url[:64], ) for snap in tag.snapshot_set.order_by('-updated')[:10] - ) + (f'
and {total_count-10} more...' if tag.snapshot_set.count() > 10 else '')) + ) + (f'
{total_count} total snapshots...')) @admin.register(ArchiveResult, site=archivebox_admin) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index a362bdae..20c70797 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -20,7 +20,7 @@ from django.urls import reverse, reverse_lazy from django.db.models import Case, When, Value, IntegerField from django.conf import settings -from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField +from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, get_or_create_system_user_pk from ..system import get_dir_size from ..util import parse_date, base_url @@ -142,22 +142,30 @@ class Snapshot(ABIDModel): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True) abid = ABIDField(prefix=abid_prefix) - url = models.URLField(unique=True, db_index=True) - timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) - - title = models.CharField(max_length=512, null=True, blank=True, db_index=True) - - tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag')) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='snapshot_set') + created = AutoDateTimeField(default=timezone.now, db_index=True) + modified = models.DateTimeField(auto_now=True) + # legacy ts fields added = AutoDateTimeField(default=timezone.now, db_index=True) updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True) + url = models.URLField(unique=True, db_index=True) + timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) + tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag')) + title = models.CharField(max_length=512, null=True, blank=True, db_index=True) + keys = ('url', 'timestamp', 'title', 'tags', 'updated') archiveresult_set: models.Manager['ArchiveResult'] objects = SnapshotManager() + def save(self, *args, **kwargs): + # make sure self.added is seeded with a value before calculating ABID using it + if self._state.adding or not self.added: + self.added = self.added or timezone.now() + return super().save(*args, **kwargs) def __repr__(self) -> str: title = (self.title_stripped or '-')[:64] @@ -440,6 +448,10 @@ class ArchiveResult(ABIDModel): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) + created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='archiveresult_set') + created = AutoDateTimeField(default=timezone.now, db_index=True) + modified = models.DateTimeField(auto_now=True) + snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id') extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)