diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py
index 062e372f..bd97b60d 100644
--- a/archivebox/abid_utils/admin.py
+++ b/archivebox/abid_utils/admin.py
@@ -1,39 +1,88 @@
+__package__ = 'archivebox.abid_utils'
from django.contrib import admin
from datetime import datetime
from django.utils.html import format_html
+from django.utils.safestring import mark_safe
+
+from abid_utils.abid import abid_part_from_ts, abid_part_from_uri, abid_part_from_rand, abid_part_from_subtype
from api.auth import get_or_create_api_token
+from ..util import parse_date
+
+def highlight_diff(display_val, compare_val):
+ """highlight each character in red that differs with the char at the same index in compare_val"""
+
+ display_val = str(display_val)
+ compare_val = str(compare_val)
+
+ diff_chars = mark_safe('').join(
+ format_html('{}', display_val[i])
+ if display_val[i] != compare_val[i] else
+ format_html('{}', display_val[i])
+ for i in range(len(display_val))
+ )
+ return diff_chars
+
def get_abid_info(self, obj, request=None):
try:
+ abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
+
+ fresh_abid = obj.generate_abid()
+ fresh_abid_diff = f' != .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅'
+ fresh_uuid_diff = f' != .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅'
+
+ id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅'
+ id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
+ id_pk_diff = f' != .pk: {highlight_diff(obj.pk, obj.id)} ❌' if str(obj.pk) != str(obj.id) else ' == .pk ✅'
+
+ source_ts_val = parse_date(obj.abid_values['ts']) or None
+ derived_ts = abid_part_from_ts(source_ts_val) if source_ts_val else None
+ ts_diff = f'!= {highlight_diff(derived_ts, obj.ABID.ts)} ❌' if derived_ts != obj.ABID.ts else '✅'
+
+ derived_uri = abid_part_from_uri(obj.abid_values['uri'])
+ uri_diff = f'!= {highlight_diff(derived_uri, obj.ABID.uri)} ❌' if derived_uri != obj.ABID.uri else '✅'
+
+ derived_subtype = abid_part_from_subtype(obj.abid_values['subtype'])
+ subtype_diff = f'!= {highlight_diff(derived_subtype, obj.ABID.subtype)} ❌' if derived_subtype != obj.ABID.subtype else '✅'
+
+ derived_rand = abid_part_from_rand(obj.abid_values['rand'])
+ rand_diff = f'!= {highlight_diff(derived_rand, obj.ABID.rand)} ❌' if derived_rand != obj.ABID.rand else '✅'
+
+ # any_abid_discrepancies = any(
+ # '❌' in diff or '!=' in diff
+ # for diff in (abid_diff, fresh_abid_diff, id_abid_diff, id_pk_diff, ts_diff, uri_diff, subtype_diff, rand_diff)
+ # )
+ # total_diff = f' != .generate_abid() -> {fresh_abid} ❌' if any_abid_discrepancies else '✅'
+
return format_html(
# URL Hash: {}
'''
{} 📖 API DOCS
- .abid: {}
- .abid.uuid: {}
- .id: {}
+ .abid: {}
{}
+ .abid.uuid: {}
{}
+ .id: {}
{}
- TS: {} {}
{}: {}
- URI: {} {}
{}: {}
- SALT: {}
- SUBTYPE: {} {}
{}: {}
- RAND: {} {}
{}: {}
+ TS: {} {}
{}
{}: {}
+ URI: {} {}
{}
{}: {}
+ SUBTYPE: {} {}
{}
{}: {}
+ RAND: {} {}
{}
{}: {}
.old_id: {}
''',
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
- str(obj.abid),
- str(obj.ABID.uuid),
- str(obj.id),
- obj.ABID.ts, str(obj.ABID.uuid)[0:14], obj.abid_ts_src, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'],
- obj.ABID.uri, str(obj.ABID.uuid)[14:26], obj.abid_uri_src, str(obj.abid_values['uri']),
- obj.ABID.uri_salt,
- obj.ABID.subtype, str(obj.ABID.uuid)[26:28], obj.abid_subtype_src, str(obj.abid_values['subtype']),
- obj.ABID.rand, str(obj.ABID.uuid)[28:36], obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
+ str(obj.abid), mark_safe(fresh_abid_diff),
+ str(obj.ABID.uuid), mark_safe(fresh_uuid_diff),
+ str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff),
+ # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
+ # str(fresh_abid), mark_safe(fresh_abid_diff),
+ obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(),
+ obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']),
+ obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']),
+ obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:],
str(getattr(obj, 'old_id', '')),
)
except Exception as e:
diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py
index 6d933993..7bdd89a8 100644
--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@@ -89,17 +89,25 @@ class ABIDModel(models.Model):
abstract = True
def save(self, *args: Any, **kwargs: Any) -> None:
- if self._state.adding or not self.created:
- self.created = timezone.now()
+ self.created = self.created or timezone.now()
- # when first creating a row, self.ABID is the source of truth
- # overwrite default prefilled self.id & self.abid with generated self.ABID value
- if self._state.adding or not self.id:
+ assert all(val for val in self.abid_values.values()), f'All ABID src values must be set: {self.abid_values}'
+
+ if self._state.adding:
self.id = self.ABID.uuid
- if self._state.adding or not self.abid:
self.abid = str(self.ABID)
+ else:
+ assert self.id, 'id must be set when object exists in DB'
+ if not self.abid:
+ self.abid = str(self.ABID)
+ # assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
+
+ # fresh_abid = self.generate_abid()
+ # if str(fresh_abid) != str(self.abid):
+ # self.abid = str(fresh_abid)
+
+ return super().save(*args, **kwargs)
- super().save(*args, **kwargs)
assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}'
assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}'
assert str(self.uuid) == str(self.ABID.uuid), f'self.uuid ({self.uuid}) does not match .ABID.uuid ({self.ABID.uuid})'
diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py
index 885e18ed..61323a84 100644
--- a/archivebox/core/admin.py
+++ b/archivebox/core/admin.py
@@ -18,6 +18,7 @@ from django.utils.html import format_html
from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model
+from django.contrib.auth.admin import UserAdmin
from django.core.paginator import Paginator
from django.core.exceptions import ValidationError
from django.conf import settings
@@ -112,8 +113,84 @@ class ArchiveBoxAdmin(admin.AdminSite):
return render(template_name='add.html', request=request, context=context)
+class CustomUserAdmin(UserAdmin):
+ sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
+ list_display = ['username', 'id', 'email', 'is_superuser', 'last_login', 'date_joined']
+ readonly_fields = ('snapshot_set', 'archiveresult_set', 'tag_set', 'apitoken_set', 'outboundwebhook_set')
+ fieldsets = [*UserAdmin.fieldsets, ('Data', {'fields': readonly_fields})]
+
+ @admin.display(description='Snapshots')
+ def snapshot_set(self, obj):
+ total_count = obj.snapshot_set.count()
+ return mark_safe('
'.join(
+ format_html(
+ '[{}]
📅 {} {}',
+ snap.pk,
+ snap.abid,
+ snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...',
+ snap.url[:64],
+ )
+ for snap in obj.snapshot_set.order_by('-modified')[:10]
+ ) + f'
{total_count} total records...')
+
+ @admin.display(description='Archive Result Logs')
+ def archiveresult_set(self, obj):
+ total_count = obj.archiveresult_set.count()
+ return mark_safe('
'.join(
+ format_html(
+ '[{}]
📅 {} 📄 {} {}',
+ result.pk,
+ result.abid,
+ result.snapshot.updated.strftime('%Y-%m-%d %H:%M') if result.snapshot.updated else 'pending...',
+ result.extractor,
+ result.snapshot.url[:64],
+ )
+ for result in obj.archiveresult_set.order_by('-modified')[:10]
+ ) + f'
{total_count} total records...')
+
+ @admin.display(description='Tags')
+ def tag_set(self, obj):
+ total_count = obj.tag_set.count()
+ return mark_safe(', '.join(
+ format_html(
+ '{}
',
+ tag.pk,
+ tag.name,
+ )
+ for tag in obj.tag_set.order_by('-modified')[:10]
+ ) + f'
{total_count} total records...')
+
+ @admin.display(description='API Tokens')
+ def apitoken_set(self, obj):
+ total_count = obj.apitoken_set.count()
+ return mark_safe('
'.join(
+ format_html(
+ '[{}]
{} (expires {})',
+ apitoken.pk,
+ apitoken.abid,
+ apitoken.token_redacted[:64],
+ apitoken.expires,
+ )
+ for apitoken in obj.apitoken_set.order_by('-modified')[:10]
+ ) + f'
{total_count} total records...')
+
+ @admin.display(description='API Outbound Webhooks')
+ def outboundwebhook_set(self, obj):
+ total_count = obj.outboundwebhook_set.count()
+ return mark_safe('
'.join(
+ format_html(
+ '[{}]
{} -> {}',
+ outboundwebhook.pk,
+ outboundwebhook.abid,
+ outboundwebhook.referenced_model,
+ outboundwebhook.endpoint,
+ )
+ for outboundwebhook in obj.outboundwebhook_set.order_by('-modified')[:10]
+ ) + f'
{total_count} total records...')
+
+
archivebox_admin = ArchiveBoxAdmin()
-archivebox_admin.register(get_user_model())
+archivebox_admin.register(get_user_model(), CustomUserAdmin)
archivebox_admin.disable_action('delete_selected')
# archivebox_admin.register(CustomPlugin)
@@ -576,8 +653,9 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
@admin.register(Tag, site=archivebox_admin)
class TagAdmin(ABIDModelAdmin):
list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
+ list_filter = ('created', 'created_by')
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created')
- readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots')
+ readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'snapshots')
search_fields = ('abid', 'name', 'slug')
fields = ('name', 'created_by', *readonly_fields)
actions = ['delete_selected']
@@ -603,7 +681,7 @@ class TagAdmin(ABIDModelAdmin):
snap.url[:64],
)
for snap in tag.snapshot_set.order_by('-updated')[:10]
- ) + (f'
and {total_count-10} more...' if tag.snapshot_set.count() > 10 else ''))
+ ) + (f'
{total_count} total snapshots...'))
@admin.register(ArchiveResult, site=archivebox_admin)
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index a362bdae..20c70797 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -20,7 +20,7 @@ from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField
from django.conf import settings
-from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
+from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, get_or_create_system_user_pk
from ..system import get_dir_size
from ..util import parse_date, base_url
@@ -142,22 +142,30 @@ class Snapshot(ABIDModel):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True)
abid = ABIDField(prefix=abid_prefix)
- url = models.URLField(unique=True, db_index=True)
- timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
-
- title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
-
- tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
+ created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='snapshot_set')
+ created = AutoDateTimeField(default=timezone.now, db_index=True)
+ modified = models.DateTimeField(auto_now=True)
+ # legacy ts fields
added = AutoDateTimeField(default=timezone.now, db_index=True)
updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True)
+ url = models.URLField(unique=True, db_index=True)
+ timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
+ tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
+ title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
+
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
archiveresult_set: models.Manager['ArchiveResult']
objects = SnapshotManager()
+ def save(self, *args, **kwargs):
+ # make sure self.added is seeded with a value before calculating ABID using it
+ if self._state.adding or not self.added:
+ self.added = self.added or timezone.now()
+ return super().save(*args, **kwargs)
def __repr__(self) -> str:
title = (self.title_stripped or '-')[:64]
@@ -440,6 +448,10 @@ class ArchiveResult(ABIDModel):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
+ created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, related_name='archiveresult_set')
+ created = AutoDateTimeField(default=timezone.now, db_index=True)
+ modified = models.DateTimeField(auto_now=True)
+
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id')
extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32)