rename datetime fields to _at, massively improve ABID generation safety and determinism

This commit is contained in:
Nick Sweeting 2024-09-04 23:42:36 -07:00
parent 68a39b7392
commit cbf2a8fdc3
No known key found for this signature in database
25 changed files with 408 additions and 335 deletions

View file

@ -208,6 +208,7 @@ def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str |
'uri': abid_part_from_uri(uri, salt=salt), 'uri': abid_part_from_uri(uri, salt=salt),
'subtype': abid_part_from_subtype(subtype), 'subtype': abid_part_from_subtype(subtype),
'rand': abid_part_from_rand(rand), 'rand': abid_part_from_rand(rand),
# 'salt': don't add this, salt combined with uri above to form a single hash
} }
@enforce_types @enforce_types

View file

@ -1,58 +1,61 @@
__package__ = 'archivebox.abid_utils' __package__ = 'archivebox.abid_utils'
from django.contrib import admin
from typing import Any
from datetime import datetime from datetime import datetime
from django.contrib import admin, messages
from django.core.exceptions import ValidationError
from django.utils.html import format_html from django.utils.html import format_html
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from django.shortcuts import redirect
from abid_utils.abid import abid_part_from_ts, abid_part_from_uri, abid_part_from_rand, abid_part_from_subtype from abid_utils.abid import ABID, abid_part_from_ts, abid_part_from_uri, abid_part_from_rand, abid_part_from_subtype
from api.auth import get_or_create_api_token from api.auth import get_or_create_api_token
from ..util import parse_date from ..util import parse_date
def highlight_diff(display_val, compare_val): def highlight_diff(display_val: Any, compare_val: Any, invert: bool=False, color_same: str | None=None, color_diff: str | None=None):
"""highlight each character in red that differs with the char at the same index in compare_val""" """highlight each character in red that differs with the char at the same index in compare_val"""
display_val = str(display_val) display_val = str(display_val)
compare_val = str(compare_val) compare_val = str(compare_val)
if len(compare_val) < len(display_val):
compare_val += ' ' * (len(display_val) - len(compare_val))
similar_color, highlighted_color = color_same or 'inherit', color_diff or 'red'
if invert:
similar_color, highlighted_color = color_same or 'green', color_diff or 'inherit'
return mark_safe(''.join( return mark_safe(''.join(
format_html('<span style="color: red;">{}</span>', display_val[i]) format_html('<span style="color: {};">{}</span>', highlighted_color, display_val[i])
if display_val[i] != compare_val[i] else if display_val[i] != compare_val[i] else
format_html('<span display="color: black">{}</span>', display_val[i]) format_html('<span style="color: {};">{}</span>', similar_color, display_val[i])
for i in range(len(display_val)) for i in range(len(display_val))
)) ))
def get_abid_info(self, obj, request=None): def get_abid_info(self, obj, request=None):
try: try:
abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)}' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅' #abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅'
fresh_abid = obj.ABID fresh_abid = ABID(**obj.ABID_FRESH_HASHES)
fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)}' if str(fresh_abid) != str(obj.ABID) else '' fresh_abid_diff = f' != &nbsp; .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)}' if str(fresh_abid) != str(obj.ABID) else ''
fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '' fresh_uuid_diff = f' != &nbsp; .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)}' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else ''
id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅' id_pk_diff = f'❌ != .pk: {highlight_diff(obj.pk, obj.id)}' if str(obj.pk) != str(obj.id) else ''
id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)}' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅'
id_pk_diff = f' != .pk: {highlight_diff(obj.pk, obj.id)}' if str(obj.pk) != str(obj.id) else ' == .pk ✅'
fresh_ts = parse_date(obj.ABID_FRESH_VALUES['ts']) or None fresh_ts = parse_date(obj.ABID_FRESH_VALUES['ts']) or None
derived_ts = abid_part_from_ts(fresh_ts) if fresh_ts else None ts_diff = f'❌ != {highlight_diff( obj.ABID_FRESH_HASHES["ts"], obj.ABID.ts)}' if obj.ABID_FRESH_HASHES["ts"] != obj.ABID.ts else ''
ts_diff = f'!= {highlight_diff(derived_ts, obj.ABID.ts)}' if derived_ts != obj.ABID.ts else ''
derived_uri = abid_part_from_uri(obj.ABID_FRESH_VALUES['uri']) derived_uri = obj.ABID_FRESH_HASHES['uri']
uri_diff = f'!= {highlight_diff(derived_uri, obj.ABID.uri)}' if derived_uri != obj.ABID.uri else '' uri_diff = f'!= {highlight_diff(derived_uri, obj.ABID.uri)}' if derived_uri != obj.ABID.uri else ''
derived_subtype = abid_part_from_subtype(obj.ABID_FRESH_VALUES['subtype']) derived_subtype = obj.ABID_FRESH_HASHES['subtype']
subtype_diff = f'!= {highlight_diff(derived_subtype, obj.ABID.subtype)}' if derived_subtype != obj.ABID.subtype else '' subtype_diff = f'!= {highlight_diff(derived_subtype, obj.ABID.subtype)}' if derived_subtype != obj.ABID.subtype else ''
derived_rand = abid_part_from_rand(obj.ABID_FRESH_VALUES['rand']) derived_rand = obj.ABID_FRESH_HASHES['rand']
rand_diff = f'!= {highlight_diff(derived_rand, obj.ABID.rand)}' if derived_rand != obj.ABID.rand else '' rand_diff = f'❌ != {highlight_diff(derived_rand, obj.ABID.rand)}' if derived_rand != obj.ABID.rand else ''
# any_abid_discrepancies = any(
# '❌' in diff or '!=' in diff
# for diff in (abid_diff, fresh_abid_diff, id_abid_diff, id_pk_diff, ts_diff, uri_diff, subtype_diff, rand_diff)
# )
# total_diff = f' != .generate_abid() -> {fresh_abid} ❌' if any_abid_discrepancies else '✅'
return format_html( return format_html(
# URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/> # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
@ -69,30 +72,34 @@ def get_abid_info(self, obj, request=None):
&nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code><br/> &nbsp; &nbsp; SUBTYPE: &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code><br/>
&nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code></code> &nbsp; &nbsp; RAND: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b style="user-select: all">{}</b> &nbsp; &nbsp; &nbsp; {}</code> &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <code style="font-size: 10px;"><b>{}</b></code> {}: <code style="user-select: all">{}</code></code>
<br/><hr/> <br/><hr/>
<span style="color: #f375a0">{}</span> <code style="color: red"><b>{}</b></code>
</div> </div>
''', ''',
obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url, obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url,
highlight_diff(obj.id, obj.ABID.uuid), mark_safe(id_pk_diff + id_abid_diff), highlight_diff(obj.id, obj.ABID.uuid, invert=True), mark_safe(id_pk_diff),
highlight_diff(obj.ABID.uuid, obj.id), mark_safe(fresh_uuid_diff), highlight_diff(obj.ABID.uuid, obj.id, invert=True), mark_safe(fresh_uuid_diff),
highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff), highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff),
# str(fresh_abid.uuid), mark_safe(fresh_uuid_diff), # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff),
# str(fresh_abid), mark_safe(fresh_abid_diff), # str(fresh_abid), mark_safe(fresh_abid_diff),
highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, fresh_ts and fresh_ts.isoformat(), highlight_diff(obj.ABID.ts, obj.ABID_FRESH_HASHES['ts']), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, fresh_ts and fresh_ts.isoformat(),
highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.ABID_FRESH_VALUES['uri']), highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.ABID_FRESH_VALUES['uri']),
highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.ABID_FRESH_VALUES['subtype']), highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.ABID_FRESH_VALUES['subtype']),
highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.ABID_FRESH_VALUES['rand'])[-7:], highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.ABID_FRESH_VALUES['rand'])[-7:],
f'Some values the ABID depends on have changed since the ABID was issued:' if obj.ABID_FRESH_DIFFS else '',
", ".join(diff['abid_src'] for diff in obj.ABID_FRESH_DIFFS.values()),
) )
except Exception as e: except Exception as e:
# import ipdb; ipdb.set_trace()
return str(e) return str(e)
class ABIDModelAdmin(admin.ModelAdmin): class ABIDModelAdmin(admin.ModelAdmin):
list_display = ('created', 'created_by', 'abid', '__str__') list_display = ('created_at', 'created_by', 'abid', '__str__')
sort_fields = ('created', 'created_by', 'abid', '__str__') sort_fields = ('created_at', 'created_by', 'abid', '__str__')
readonly_fields = ('created', 'modified', '__str__', 'API') readonly_fields = ('created_at', 'modified_at', '__str__', 'abid_info')
@admin.display(description='API Identifiers') @admin.display(description='API Identifiers')
def API(self, obj): def abid_info(self, obj):
return get_abid_info(self, obj, request=self.request) return get_abid_info(self, obj, request=self.request)
def queryset(self, request): def queryset(self, request):

View file

@ -11,7 +11,7 @@ from datetime import datetime, timedelta
from functools import partial from functools import partial
from charidfield import CharIDField # type: ignore[import-untyped] from charidfield import CharIDField # type: ignore[import-untyped]
from django.conf import settings from django.core.exceptions import ValidationError
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from django.db.utils import OperationalError from django.db.utils import OperationalError
@ -59,7 +59,7 @@ def get_or_create_system_user_pk(username='system'):
return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0] return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
# otherwise, create a dedicated "system" user # otherwise, create a dedicated "system" user
user, created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''}) user, _was_created = User.objects.get_or_create(username=username, is_staff=True, is_superuser=True, defaults={'email': '', 'password': ''})
return user.pk return user.pk
@ -68,69 +68,166 @@ class AutoDateTimeField(models.DateTimeField):
# return timezone.now() # return timezone.now()
pass pass
class ABIDError(Exception):
pass
class ABIDFieldsCannotBeChanged(ValidationError, ABIDError):
"""
Properties used as unique identifiers (to generate ABID) cannot be edited after an object is created.
Create a new object instead with your desired changes (and it will be issued a new ABID).
"""
def __init__(self, ABID_FRESH_DIFFS, obj):
self.ABID_FRESH_DIFFS = ABID_FRESH_DIFFS
self.obj = obj
def __str__(self):
keys_changed = ', '.join(diff['abid_src'] for diff in self.ABID_FRESH_DIFFS.values())
return (
f"This {self.obj.__class__.__name__}(abid={str(self.obj.ABID)}) was assigned a fixed, unique ID (ABID) based on its contents when it was created. " +
f'\nThe following changes cannot be made because they would alter the ABID:' +
'\n ' + "\n ".join(f' - {diff["summary"]}' for diff in self.ABID_FRESH_DIFFS.values()) +
f"\nYou must reduce your changes to not affect these fields, or create a new {self.obj.__class__.__name__} object instead."
)
class ABIDModel(models.Model): class ABIDModel(models.Model):
""" """
Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface. Abstract Base Model for other models to depend on. Provides ArchiveBox ID (ABID) interface.
""" """
abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_' abid_prefix: str = DEFAULT_ABID_PREFIX # e.g. 'tag_'
abid_ts_src = 'self.created' # e.g. 'self.created' abid_ts_src = 'self.created_at' # e.g. 'self.created_at'
abid_uri_src = 'None' # e.g. 'self.uri' abid_uri_src = 'None' # e.g. 'self.uri' (MUST BE SET)
abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor' abid_subtype_src = 'self.__class__.__name__' # e.g. 'self.extractor'
abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id' abid_rand_src = 'self.id' # e.g. 'self.uuid' or 'self.id'
abid_salt: str = DEFAULT_ABID_URI_SALT abid_salt: str = DEFAULT_ABID_URI_SALT # combined with self.uri to anonymize hashes on a per-install basis (default is shared globally with all users, means everyone will hash ABC to -> 123 the same around the world, makes it easy to share ABIDs across installs and see if they are for the same URI. Change this if you dont want your hashes to be guessable / in the same hash space as all other users)
abid_drift_allowed: bool = False # set to True to allow abid_field values to change after a fixed ABID has been issued (NOT RECOMMENDED: means values can drift out of sync from original ABID)
# id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') # id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
# abid = ABIDField(prefix=abid_prefix) # abid = ABIDField(prefix=abid_prefix)
# created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) # created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
# created = AutoDateTimeField(default=None, null=False, db_index=True) # created_at = AutoDateTimeField(default=None, null=False, db_index=True)
# modified = models.DateTimeField(auto_now=True) # modified_at = models.DateTimeField(auto_now=True)
class Meta(TypedModelMeta): class Meta(TypedModelMeta):
abstract = True abstract = True
def save(self, *args: Any, **kwargs: Any) -> None: def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Overriden __init__ method ensures we have a stable creation timestamp that fields can use within initialization code pre-saving to DB."""
super().__init__(*args, **kwargs)
# pre-compute a stable timestamp of the obj init time (with abid.ts precision limit applied) for use when object is first created,
# some other fields depend on a timestamp at creation time, and it's nice to have one common timestamp they can all share.
# Used as an alternative to auto_now_add=True + auto_now=True which can produce two different times & requires saving to DB to get the TS.
# (ordinarily fields cant depend on other fields until the obj is saved to db and recalled)
self._init_timestamp = ts_from_abid(abid_part_from_ts(timezone.now()))
def save(self, *args: Any, abid_drift_allowed: bool | None=None, **kwargs: Any) -> None:
"""Overriden save method ensures new ABID is generated while a new object is first saving."""
if self._state.adding: if self._state.adding:
self.pk = self.id = self.id or uuid4() # only runs once when a new object is first saved to the DB
self.created = ts_from_abid(abid_part_from_ts(timezone.now())) # cut off precision to match precision of TS component # sets self.id, self.pk, self.created_by, self.created_at, self.modified_at
self.modified = self.created
self.created_by = self.created_by or get_or_create_system_user_pk()
self.abid = str(self.issue_new_abid()) self.abid = str(self.issue_new_abid())
else:
# otherwise if updating, make sure none of the field changes would invalidate existing ABID
if self.ABID_FRESH_DIFFS:
ovewrite_abid = self.abid_drift_allowed if (abid_drift_allowed is None) else abid_drift_allowed
change_error = ABIDFieldsCannotBeChanged(self.ABID_FRESH_DIFFS, obj=self)
if ovewrite_abid:
print(f'#### DANGER: Changing ABID of existing record ({self.__class__.__name__}.abid_drift_allowed={abid_drift_allowed}), this will break any references to its previous ABID!')
print(change_error)
self.abid = str(self.issue_new_abid(force_new=True))
print(f'#### DANGER: OVERWROTE OLD ABID. NEW ABID=', self.abid)
else:
raise change_error
return super().save(*args, **kwargs) return super().save(*args, **kwargs)
# assert str(self.id) == str(self.ABID.uuid), f'self.id {self.id} does not match self.ABID {self.ABID.uuid}' @property
# assert str(self.abid) == str(self.ABID), f'self.abid {self.id} does not match self.ABID {self.ABID.uuid}' def ABID_SOURCES(self) -> Dict[str, str]:
# assert str(self.uuid) == str(self.ABID.uuid), f'self.uuid ({self.uuid}) does not match .ABID.uuid ({self.ABID.uuid})' """"Get the dict of fresh ABID component values based on the live object's properties."""
assert self.abid_prefix
return {
'prefix': 'self.abid_prefix', # defined as static class vars at build time
'ts': self.abid_ts_src,
'uri': self.abid_uri_src,
'subtype': self.abid_subtype_src,
'rand': self.abid_rand_src,
'salt': 'self.abid_salt', # defined as static class vars at build time
}
@property @property
def ABID_FRESH_VALUES(self) -> Dict[str, Any]: def ABID_FRESH_VALUES(self) -> Dict[str, Any]:
assert self.abid_ts_src != 'None' """"Get the dict of fresh ABID component values based on the live object's properties."""
assert self.abid_uri_src != 'None' abid_sources = self.ABID_SOURCES
assert self.abid_rand_src != 'None' assert all(src != 'None' for src in abid_sources.values())
assert self.abid_subtype_src != 'None'
return { return {
'prefix': self.abid_prefix, 'prefix': eval(abid_sources['prefix']),
'ts': eval(self.abid_ts_src), 'ts': eval(abid_sources['ts']),
'uri': eval(self.abid_uri_src), 'uri': eval(abid_sources['uri']),
'subtype': eval(self.abid_subtype_src), 'subtype': eval(abid_sources['subtype']),
'rand': eval(self.abid_rand_src), 'rand': eval(abid_sources['rand']),
'salt': self.abid_salt, 'salt': eval(abid_sources['salt']),
} }
@property @property
def ABID_FRESH_HASHES(self) -> Dict[str, str]: def ABID_FRESH_HASHES(self) -> Dict[str, str]:
return abid_hashes_from_values(**self.ABID_FRESH_VALUES) """"Get the dict of fresh ABID component hashes based on the live object's properties."""
abid_values = self.ABID_FRESH_VALUES
assert all(val for val in abid_values.values())
return abid_hashes_from_values(
prefix=abid_values['prefix'],
ts=abid_values['ts'],
uri=abid_values['uri'],
subtype=abid_values['subtype'],
rand=abid_values['rand'],
salt=abid_values['salt'],
)
def issue_new_abid(self): @property
def ABID_FRESH_DIFFS(self) -> Dict[str, Dict[str, Any]]:
"""Get the dict of discrepancies between the existing saved ABID and a new fresh ABID computed based on the live object."""
existing_abid = self.ABID
existing_values = {} if self._state.adding else self.__class__.objects.get(pk=self.pk).ABID_FRESH_VALUES
abid_sources = self.ABID_SOURCES
fresh_values = self.ABID_FRESH_VALUES
fresh_hashes = self.ABID_FRESH_HASHES
return {
key: {
'model': self.__class__.__name__,
'pk': self.pk,
'abid_src': abid_sources[key],
'abid_section': key,
'old_val': existing_values.get(key, None),
'old_hash': getattr(existing_abid, key),
'new_val': fresh_values[key],
'new_hash': new_hash,
'summary': f'{abid_sources[key]}= "{existing_values.get(key, None)}" -> "{fresh_values[key]}" (would alter {self.__class__.__name__.lower()}.ABID.{key}={getattr(existing_abid, key)} to {new_hash})',
}
for key, new_hash in fresh_hashes.items()
if getattr(existing_abid, key) != new_hash
}
def issue_new_abid(self, force_new=False) -> ABID:
"""
Issue a new ABID based on the current object's properties, can only be called once on new objects (before they are saved to DB).
"""
if not force_new:
assert self.abid is None, f'Can only issue new ABID for new objects that dont already have one {self.abid}' assert self.abid is None, f'Can only issue new ABID for new objects that dont already have one {self.abid}'
assert self._state.adding, 'Can only issue new ABID when model._state.adding is True' assert self._state.adding, 'Can only issue new ABID when model._state.adding is True'
assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})' assert eval(self.abid_uri_src), f'Can only issue new ABID if self.abid_uri_src is defined ({self.abid_uri_src}={eval(self.abid_uri_src)})'
# Setup Field defaults to be ready for ABID generation
self.abid = None self.abid = None
self.pk = self.id = self.id or uuid4() self.id = self.id or uuid4()
self.created = ts_from_abid(abid_part_from_ts(timezone.now())) # cut off precision to match precision of TS component self.pk = self.id
self.created_at = self.created_at or self._init_timestamp # cut off precision to match precision of TS component
self.modified_at = self.modified_at or self.created_at
self.created_by = self.created_by or get_or_create_system_user_pk()
# Compute fresh ABID values & hashes based on object's live properties
abid_fresh_values = self.ABID_FRESH_VALUES abid_fresh_values = self.ABID_FRESH_VALUES
assert all(abid_fresh_values.values()), f'All ABID_FRESH_VALUES must be set {abid_fresh_values}' assert all(abid_fresh_values.values()), f'All ABID_FRESH_VALUES must be set {abid_fresh_values}'
abid_fresh_hashes = self.ABID_FRESH_HASHES abid_fresh_hashes = self.ABID_FRESH_HASHES
@ -140,64 +237,63 @@ class ABIDModel(models.Model):
assert new_abid.ulid and new_abid.uuid and new_abid.typeid, f'Failed to calculate {abid_fresh_values["prefix"]}_ABID for {self.__class__.__name__}' assert new_abid.ulid and new_abid.uuid and new_abid.typeid, f'Failed to calculate {abid_fresh_values["prefix"]}_ABID for {self.__class__.__name__}'
# store stable ABID on local fields, overwrite them because we are adding a new entry and existing defaults havent touched db yet
self.abid = str(new_abid)
assert str(self.ABID.uuid) == str(new_abid.uuid)
return new_abid return new_abid
@property @property
def ABID(self) -> ABID: def ABID(self) -> ABID:
""" """
aka get_or_generate_abid -> ULIDParts(timestamp='01HX9FPYTR', url='E4A5CCD9', subtype='00', randomness='ZYEBQE') Get the object's existing ABID (from self.abid if it's already saved to DB, otherwise generated fresh)
e.g. -> ABID(ts='01HX9FPYTR', uri='E4A5CCD9', subtype='00', rand='ZYEBQE')
""" """
if not self.abid: if self.abid:
pre_save_abid = self.issue_new_abid()
self.abid = str(pre_save_abid)
return pre_save_abid
return ABID.parse(cast(str, self.abid)) return ABID.parse(cast(str, self.abid))
@property return self.issue_new_abid()
def ULID(self) -> ULID:
"""
Get a ulid.ULID representation of the object's ABID.
"""
return self.ABID.ulid
@property # These are all example helpers to make it easy to access alternate formats of the ABID.*, only add them if you actually need them
def UUID(self) -> UUID: # @property
""" # def UUID(self) -> UUID:
Get a uuid.UUID (v4) representation of the object's ABID. # """
""" # Get a uuid.UUID (v4) representation of the object's ABID.
return self.ABID.uuid # """
# return self.ABID.uuid
@property # @property
def uuid(self) -> str: # def uuid(self) -> str:
""" # """
Get a str uuid.UUID (v4) representation of the object's ABID. # Get a str uuid.UUID (v4) representation of the object's ABID.
""" # """
return str(self.ABID.uuid) # return str(self.ABID.uuid)
@property # @property
def TypeID(self) -> TypeID: # def ULID(self) -> ULID:
""" # """
Get a typeid.TypeID (stripe-style) representation of the object's ABID. # Get a ulid.ULID representation of the object's ABID.
""" # """
return self.ABID.typeid # return self.ABID.ulid
@property # @property
def abid_uri(self) -> str: # def TypeID(self) -> TypeID:
return eval(self.abid_uri_src) # """
# Get a typeid.TypeID (stripe-style) representation of the object's ABID.
# """
# return self.ABID.typeid
@property @property
def api_url(self) -> str: def api_url(self) -> str:
# /api/v1/core/any/{abid} """
Compute the REST API URL to access this object.
e.g. /api/v1/core/snapshot/snp_01BJQMF54D093DXEAWZ6JYRP
"""
return reverse_lazy('api-1:get_any', args=[self.abid]) return reverse_lazy('api-1:get_any', args=[self.abid])
@property @property
def api_docs_url(self) -> str: def api_docs_url(self) -> str:
"""
Compute the REST API Documentation URL to learn about accessing this object.
e.g. /api/v1/docs#/Core%20Models/api_v1_core_get_snapshots
"""
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}' return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
@ -311,7 +407,7 @@ def find_obj_from_abid_rand(rand: Union[ABID, str], model=None) -> List[ABIDMode
) )
for obj in qs: for obj in qs:
if abid in (str(obj.ABID_FRESH), str(obj.id), str(obj.abid)): if abid in (str(obj.ABID), str(obj.id), str(obj.pk), str(obj.abid)):
# found exact match, no need to keep iterating # found exact match, no need to keep iterating
return [obj] return [obj]
partial_matches.append(obj) partial_matches.append(obj)

View file

@ -27,7 +27,7 @@ class APIToken(ABIDModel):
""" """
# ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand> # ABID: apt_<created_ts>_<token_hash>_<user_id_hash>_<uuid_rand>
abid_prefix = 'apt_' abid_prefix = 'apt_'
abid_ts_src = 'self.created' abid_ts_src = 'self.created_at'
abid_uri_src = 'self.token' abid_uri_src = 'self.token'
abid_subtype_src = 'self.created_by_id' abid_subtype_src = 'self.created_by_id'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
@ -36,8 +36,8 @@ class APIToken(ABIDModel):
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
created = AutoDateTimeField(default=None, null=False, db_index=True) created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified = models.DateTimeField(auto_now=True) modified_at = models.DateTimeField(auto_now=True)
token = models.CharField(max_length=32, default=generate_secret_token, unique=True) token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
expires = models.DateTimeField(null=True, blank=True) expires = models.DateTimeField(null=True, blank=True)
@ -59,7 +59,7 @@ class APIToken(ABIDModel):
"abid": str(self.ABID), "abid": str(self.ABID),
"created_by_id": str(self.created_by_id), "created_by_id": str(self.created_by_id),
"token": self.token, "token": self.token,
"created": self.created.isoformat(), "created_at": self.created_at.isoformat(),
"expires": self.expires_as_iso8601, "expires": self.expires_as_iso8601,
} }
@ -95,7 +95,7 @@ class OutboundWebhook(ABIDModel, WebhookBase):
settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook' settings.SIGNAL_WEBHOOKS_CUSTOM_MODEL = 'api.models.OutboundWebhook'
""" """
abid_prefix = 'whk_' abid_prefix = 'whk_'
abid_ts_src = 'self.created' abid_ts_src = 'self.created_at'
abid_uri_src = 'self.endpoint' abid_uri_src = 'self.endpoint'
abid_subtype_src = 'self.ref' abid_subtype_src = 'self.ref'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
@ -104,8 +104,8 @@ class OutboundWebhook(ABIDModel, WebhookBase):
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False)
created = AutoDateTimeField(default=None, null=False, db_index=True) created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified = models.DateTimeField(auto_now=True) modified_at = models.DateTimeField(auto_now=True)
# More fields here: WebhookBase... # More fields here: WebhookBase...

View file

@ -94,7 +94,7 @@ class ListCommandSchema(Schema):
status: Optional[StatusChoices] = StatusChoices.indexed status: Optional[StatusChoices] = StatusChoices.indexed
after: Optional[float] = 0 after: Optional[float] = 0
before: Optional[float] = 999999999999999 before: Optional[float] = 999999999999999
sort: str = 'added' sort: str = 'bookmarked_at'
as_json: bool = True as_json: bool = True
as_html: bool = False as_html: bool = False
as_csv: str | bool = 'timestamp,url' as_csv: str | bool = 'timestamp,url'

View file

@ -60,22 +60,17 @@ class CustomPagination(PaginationBase):
### ArchiveResult ######################################################################### ### ArchiveResult #########################################################################
class ArchiveResultSchema(Schema): class MinimalArchiveResultSchema(Schema):
TYPE: str = 'core.models.ArchiveResult' TYPE: str = 'core.models.ArchiveResult'
id: UUID id: UUID
abid: str abid: str
modified: datetime modified_at: datetime
created: datetime created_at: datetime
created_by_id: str created_by_id: str
created_by_username: str created_by_username: str
snapshot_abid: str
snapshot_timestamp: str
snapshot_url: str
snapshot_tags: str
extractor: str extractor: str
cmd_version: Optional[str] cmd_version: Optional[str]
cmd: List[str] cmd: List[str]
@ -92,20 +87,12 @@ class ArchiveResultSchema(Schema):
User = get_user_model() User = get_user_model()
return User.objects.get(id=obj.created_by_id).username return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_pk(obj):
return str(obj.pk)
@staticmethod
def resolve_uuid(obj):
return str(obj.uuid)
@staticmethod @staticmethod
def resolve_abid(obj): def resolve_abid(obj):
return str(obj.ABID) return str(obj.ABID)
@staticmethod @staticmethod
def resolve_created(obj): def resolve_created_at(obj):
return obj.start_ts return obj.start_ts
@staticmethod @staticmethod
@ -116,13 +103,28 @@ class ArchiveResultSchema(Schema):
def resolve_snapshot_url(obj): def resolve_snapshot_url(obj):
return obj.snapshot.url return obj.snapshot.url
@staticmethod
def resolve_snapshot_id(obj):
return str(obj.snapshot_id)
@staticmethod @staticmethod
def resolve_snapshot_abid(obj): def resolve_snapshot_abid(obj):
return str(obj.snapshot.ABID) return str(obj.snapshot.ABID)
@staticmethod @staticmethod
def resolve_snapshot_tags(obj): def resolve_snapshot_tags(obj):
return obj.snapshot.tags_str() return sorted(tag.name for tag in obj.snapshot.tags.all())
class ArchiveResultSchema(MinimalArchiveResultSchema):
TYPE: str = 'core.models.ArchiveResult'
# ... Extends MinimalArchiveResultSchema fields ...
snapshot_id: UUID
snapshot_abid: str
snapshot_timestamp: str
snapshot_url: str
snapshot_tags: List[str]
class ArchiveResultFilterSchema(FilterSchema): class ArchiveResultFilterSchema(FilterSchema):
@ -140,9 +142,9 @@ class ArchiveResultFilterSchema(FilterSchema):
pwd: Optional[str] = Field(None, q='pwd__icontains') pwd: Optional[str] = Field(None, q='pwd__icontains')
cmd_version: Optional[str] = Field(None, q='cmd_version') cmd_version: Optional[str] = Field(None, q='cmd_version')
created: Optional[datetime] = Field(None, q='updated') created_at: Optional[datetime] = Field(None, q='created_at')
created__gte: Optional[datetime] = Field(None, q='updated__gte') created_at__gte: Optional[datetime] = Field(None, q='created_at__gte')
created__lt: Optional[datetime] = Field(None, q='updated__lt') created_at__lt: Optional[datetime] = Field(None, q='created_at__lt')
@router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult") @router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult")
@ -194,23 +196,25 @@ class SnapshotSchema(Schema):
id: UUID id: UUID
abid: str abid: str
modified: datetime
created: datetime
created_by_id: str created_by_id: str
created_by_username: str created_by_username: str
created_at: datetime
modified_at: datetime
bookmarked_at: datetime
downloaded_at: Optional[datetime]
url: str url: str
tags: str tags: List[str]
title: Optional[str] title: Optional[str]
timestamp: str timestamp: str
archive_path: str archive_path: str
bookmarked: datetime # url_for_admin: str
added: datetime # url_for_view: str
updated: Optional[datetime]
num_archiveresults: int num_archiveresults: int
archiveresults: List[ArchiveResultSchema] archiveresults: List[MinimalArchiveResultSchema]
@staticmethod @staticmethod
def resolve_created_by_id(obj): def resolve_created_by_id(obj):
@ -221,21 +225,21 @@ class SnapshotSchema(Schema):
User = get_user_model() User = get_user_model()
return User.objects.get(id=obj.created_by_id).username return User.objects.get(id=obj.created_by_id).username
@staticmethod
def resolve_pk(obj):
return str(obj.pk)
@staticmethod
def resolve_uuid(obj):
return str(obj.uuid)
@staticmethod @staticmethod
def resolve_abid(obj): def resolve_abid(obj):
return str(obj.ABID) return str(obj.ABID)
@staticmethod @staticmethod
def resolve_tags(obj): def resolve_tags(obj):
return obj.tags_str() return sorted(tag.name for tag in obj.tags.all())
# @staticmethod
# def resolve_url_for_admin(obj):
# return f"/admin/core/snapshot/{obj.id}/change/"
# @staticmethod
# def resolve_url_for_view(obj):
# return f"/{obj.archive_path}"
@staticmethod @staticmethod
def resolve_num_archiveresults(obj, context): def resolve_num_archiveresults(obj, context):
@ -255,12 +259,12 @@ class SnapshotFilterSchema(FilterSchema):
created_by_id: str = Field(None, q='created_by_id') created_by_id: str = Field(None, q='created_by_id')
created_by_username: str = Field(None, q='created_by__username__icontains') created_by_username: str = Field(None, q='created_by__username__icontains')
created__gte: datetime = Field(None, q='created__gte') created_at__gte: datetime = Field(None, q='created_at__gte')
created__lt: datetime = Field(None, q='created__lt') created_at__lt: datetime = Field(None, q='created_at__lt')
created: datetime = Field(None, q='created') created_at: datetime = Field(None, q='created_at')
modified: datetime = Field(None, q='modified') modified_at: datetime = Field(None, q='modified_at')
modified__gte: datetime = Field(None, q='modified__gte') modified_at__gte: datetime = Field(None, q='modified_at__gte')
modified__lt: datetime = Field(None, q='modified__lt') modified_at__lt: datetime = Field(None, q='modified_at__lt')
search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith']) search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'timestamp__startswith'])
url: Optional[str] = Field(None, q='url') url: Optional[str] = Field(None, q='url')
@ -268,8 +272,8 @@ class SnapshotFilterSchema(FilterSchema):
title: Optional[str] = Field(None, q='title__icontains') title: Optional[str] = Field(None, q='title__icontains')
timestamp: Optional[str] = Field(None, q='timestamp__startswith') timestamp: Optional[str] = Field(None, q='timestamp__startswith')
added__gte: Optional[datetime] = Field(None, q='added__gte') bookmarked_at__gte: Optional[datetime] = Field(None, q='bookmarked_at__gte')
added__lt: Optional[datetime] = Field(None, q='added__lt') bookmarked_at__lt: Optional[datetime] = Field(None, q='bookmarked_at__lt')
@ -285,7 +289,7 @@ def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_arch
@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot") @router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot")
def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
"""Get a specific Snapshot by abid, uuid, or pk.""" """Get a specific Snapshot by abid or id."""
request.with_archiveresults = with_archiveresults request.with_archiveresults = with_archiveresults
snapshot = None snapshot = None
try: try:
@ -311,7 +315,7 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# #
# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema) # @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema): # def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_id) # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
# #
# for attr, value in payload.dict().items(): # for attr, value in payload.dict().items():
# setattr(snapshot, attr, value) # setattr(snapshot, attr, value)
@ -321,7 +325,7 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
# #
# @router.delete("/snapshot/{snapshot_id}") # @router.delete("/snapshot/{snapshot_id}")
# def delete_snapshot(request, snapshot_id: str): # def delete_snapshot(request, snapshot_id: str):
# snapshot = get_object_or_404(Snapshot, uuid=snapshot_id) # snapshot = get_object_or_404(Snapshot, id=snapshot_id)
# snapshot.delete() # snapshot.delete()
# return {"success": True} # return {"success": True}
@ -336,8 +340,8 @@ class TagSchema(Schema):
id: UUID id: UUID
abid: str abid: str
modified: datetime modified_at: datetime
created: datetime created_at: datetime
created_by_id: str created_by_id: str
created_by_username: str created_by_username: str

View file

@ -127,10 +127,10 @@ class CustomUserAdmin(UserAdmin):
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}', '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> {}',
snap.pk, snap.pk,
snap.abid, snap.abid,
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
snap.url[:64], snap.url[:64],
) )
for snap in obj.snapshot_set.order_by('-modified')[:10] for snap in obj.snapshot_set.order_by('-modified_at')[:10]
) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>') ) + f'<br/><a href="/admin/core/snapshot/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
@admin.display(description='Archive Result Logs') @admin.display(description='Archive Result Logs')
@ -141,11 +141,11 @@ class CustomUserAdmin(UserAdmin):
'<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}', '<code><a href="/admin/core/archiveresult/{}/change"><b>[{}]</b></a></code> <b>📅 {}</b> <b>📄 {}</b> {}',
result.pk, result.pk,
result.abid, result.abid,
result.snapshot.updated.strftime('%Y-%m-%d %H:%M') if result.snapshot.updated else 'pending...', result.snapshot.downloaded_at.strftime('%Y-%m-%d %H:%M') if result.snapshot.downloaded_at else 'pending...',
result.extractor, result.extractor,
result.snapshot.url[:64], result.snapshot.url[:64],
) )
for result in obj.archiveresult_set.order_by('-modified')[:10] for result in obj.archiveresult_set.order_by('-modified_at')[:10]
) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>') ) + f'<br/><a href="/admin/core/archiveresult/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
@admin.display(description='Tags') @admin.display(description='Tags')
@ -157,7 +157,7 @@ class CustomUserAdmin(UserAdmin):
tag.pk, tag.pk,
tag.name, tag.name,
) )
for tag in obj.tag_set.order_by('-modified')[:10] for tag in obj.tag_set.order_by('-modified_at')[:10]
) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>') ) + f'<br/><a href="/admin/core/tag/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
@admin.display(description='API Tokens') @admin.display(description='API Tokens')
@ -171,7 +171,7 @@ class CustomUserAdmin(UserAdmin):
apitoken.token_redacted[:64], apitoken.token_redacted[:64],
apitoken.expires, apitoken.expires,
) )
for apitoken in obj.apitoken_set.order_by('-modified')[:10] for apitoken in obj.apitoken_set.order_by('-modified_at')[:10]
) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>') ) + f'<br/><a href="/admin/api/apitoken/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
@admin.display(description='API Outbound Webhooks') @admin.display(description='API Outbound Webhooks')
@ -185,7 +185,7 @@ class CustomUserAdmin(UserAdmin):
outboundwebhook.referenced_model, outboundwebhook.referenced_model,
outboundwebhook.endpoint, outboundwebhook.endpoint,
) )
for outboundwebhook in obj.outboundwebhook_set.order_by('-modified')[:10] for outboundwebhook in obj.outboundwebhook_set.order_by('-modified_at')[:10]
) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>') ) + f'<br/><a href="/admin/api/outboundwebhook/?created_by__id__exact={obj.pk}">{total_count} total records...<a>')
@ -351,13 +351,13 @@ class SnapshotActionForm(ActionForm):
@admin.register(Snapshot, site=archivebox_admin) @admin.register(Snapshot, site=archivebox_admin)
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
list_display = ('created', 'title_str', 'files', 'size', 'url_str') list_display = ('created_at', 'title_str', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'created') sort_fields = ('title_str', 'url_str', 'created_at')
readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'created', 'created', 'updated', 'modified', 'API', 'link_dir') readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name') search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
list_filter = ('created', 'updated', 'archiveresult__status', 'created_by', 'tags__name') list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
fields = ('url', 'created_by', 'title',*readonly_fields) fields = ('url', 'title', 'created_by', 'bookmarked_at', *readonly_fields)
ordering = ['-created'] ordering = ['-created_at']
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots'] actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
inlines = [TagInline, ArchiveResultInline] inlines = [TagInline, ArchiveResultInline]
list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000) list_per_page = min(max(5, CONFIG.SNAPSHOTS_PER_PAGE), 5000)
@ -377,30 +377,6 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}') self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}')
return super().changelist_view(request, GLOBAL_CONTEXT) return super().changelist_view(request, GLOBAL_CONTEXT)
def change_view(self, request, object_id, form_url="", extra_context=None):
self.request = request
snapshot = None
try:
snapshot = snapshot or Snapshot.objects.get(id=object_id)
except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError):
pass
try:
snapshot = snapshot or Snapshot.objects.get(abid=Snapshot.abid_prefix + object_id.split('_', 1)[-1])
except (Snapshot.DoesNotExist, ValidationError):
pass
if snapshot:
object_id = str(snapshot.id)
return super().change_view(
request,
object_id,
form_url,
extra_context=extra_context,
)
def get_urls(self): def get_urls(self):
urls = super().get_urls() urls = super().get_urls()
@ -416,8 +392,20 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
# self.request = request # self.request = request
# return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult')) # return super().get_queryset(request).prefetch_related('archiveresult_set').distinct() # .annotate(archiveresult_count=Count('archiveresult'))
def tag_list(self, obj): @admin.action(
return ', '.join(tag.name for tag in obj.tags.all()) description="Imported Timestamp"
)
def imported_timestamp(self, obj):
context = RequestContext(self.request, {
'bookmarked_date': obj.bookmarked,
'timestamp': obj.timestamp,
})
html = Template("""{{bookmarked_date}} (<code>{{timestamp}}</code>)""")
return mark_safe(html.render(context))
# pretty_time = obj.bookmarked.strftime('%Y-%m-%d %H:%M:%S')
# return f'{pretty_time} ({obj.timestamp})'
# TODO: figure out a different way to do this, you cant nest forms so this doenst work # TODO: figure out a different way to do this, you cant nest forms so this doenst work
# def action(self, obj): # def action(self, obj):
@ -647,14 +635,14 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
@admin.register(Tag, site=archivebox_admin) @admin.register(Tag, site=archivebox_admin)
class TagAdmin(ABIDModelAdmin): class TagAdmin(ABIDModelAdmin):
list_display = ('created', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots') list_display = ('created_at', 'created_by', 'abid', 'name', 'num_snapshots', 'snapshots')
list_filter = ('created', 'created_by') list_filter = ('created_at', 'created_by')
sort_fields = ('name', 'slug', 'abid', 'created_by', 'created') sort_fields = ('name', 'slug', 'abid', 'created_by', 'created_at')
readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'snapshots') readonly_fields = ('slug', 'abid', 'created_at', 'modified_at', 'abid_info', 'snapshots')
search_fields = ('abid', 'name', 'slug') search_fields = ('abid', 'name', 'slug')
fields = ('name', 'created_by', *readonly_fields) fields = ('name', 'created_by', *readonly_fields)
actions = ['delete_selected'] actions = ['delete_selected']
ordering = ['-created'] ordering = ['-created_at']
paginator = AccelleratedPaginator paginator = AccelleratedPaginator
@ -672,10 +660,10 @@ class TagAdmin(ABIDModelAdmin):
format_html( format_html(
'<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}', '<code><a href="/admin/core/snapshot/{}/change"><b>[{}]</b></a></code> {}',
snap.pk, snap.pk,
snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', snap.downloaded_at.strftime('%Y-%m-%d %H:%M') if snap.downloaded_at else 'pending...',
snap.url[:64], snap.url[:64],
) )
for snap in tag.snapshot_set.order_by('-updated')[:10] for snap in tag.snapshot_set.order_by('-downloaded_at')[:10]
) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>')) ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={tag.id}">{total_count} total snapshots...<a>'))
@ -683,7 +671,7 @@ class TagAdmin(ABIDModelAdmin):
class ArchiveResultAdmin(ABIDModelAdmin): class ArchiveResultAdmin(ABIDModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str') list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status') sort_fields = ('start_ts', 'extractor', 'status')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created', 'modified', 'API', 'output_summary') readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields) fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
autocomplete_fields = ['snapshot'] autocomplete_fields = ['snapshot']
@ -706,7 +694,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
'<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>', '<a href="/archive/{}/index.html"><b><code>[{}]</code></b> &nbsp; {} &nbsp; {}</a><br/>',
result.snapshot.timestamp, result.snapshot.timestamp,
result.snapshot.abid, result.snapshot.abid,
result.snapshot.added.strftime('%Y-%m-%d %H:%M'), result.snapshot.bookmarked_at.strftime('%Y-%m-%d %H:%M'),
result.snapshot.url[:128], result.snapshot.url[:128],
) )
@ -765,18 +753,18 @@ class ArchiveResultAdmin(ABIDModelAdmin):
@admin.register(APIToken, site=archivebox_admin) @admin.register(APIToken, site=archivebox_admin)
class APITokenAdmin(ABIDModelAdmin): class APITokenAdmin(ABIDModelAdmin):
list_display = ('created', 'abid', 'created_by', 'token_redacted', 'expires') list_display = ('created_at', 'abid', 'created_by', 'token_redacted', 'expires')
sort_fields = ('abid', 'created', 'created_by', 'expires') sort_fields = ('abid', 'created_at', 'created_by', 'expires')
readonly_fields = ('created', 'modified', 'API') readonly_fields = ('created_at', 'modified_at', 'abid_info')
search_fields = ('id', 'abid', 'created_by__username', 'token') search_fields = ('id', 'abid', 'created_by__username', 'token')
fields = ('created_by', 'token', 'expires', *readonly_fields) fields = ('created_by', 'token', 'expires', *readonly_fields)
list_filter = ('created_by',) list_filter = ('created_by',)
ordering = ['-created'] ordering = ['-created_at']
list_per_page = 100 list_per_page = 100
@admin.register(get_webhook_model(), site=archivebox_admin) @admin.register(get_webhook_model(), site=archivebox_admin)
class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin): class CustomWebhookAdmin(WebhookAdmin, ABIDModelAdmin):
list_display = ('created', 'created_by', 'abid', *WebhookAdmin.list_display) list_display = ('created_at', 'created_by', 'abid', *WebhookAdmin.list_display)
sort_fields = ('created', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error') sort_fields = ('created_at', 'created_by', 'abid', 'referenced_model', 'endpoint', 'last_success', 'last_error')
readonly_fields = ('created', 'modified', 'API', *WebhookAdmin.readonly_fields) readonly_fields = ('created_at', 'modified_at', 'abid_info', *WebhookAdmin.readonly_fields)

View file

@ -1,9 +1,13 @@
# Generated by Django 5.0.6 on 2024-08-18 05:20 # Generated by Django 5.0.6 on 2024-08-18 05:20
import core.models import core.models
import random
from django.db import migrations, models from django.db import migrations, models
def rand_int_id():
return random.getrandbits(32)
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
@ -14,6 +18,6 @@ class Migration(migrations.Migration):
migrations.AlterField( migrations.AlterField(
model_name='archiveresult', model_name='archiveresult',
name='id', name='id',
field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='ID'), field=models.BigIntegerField(default=rand_int_id, primary_key=True, serialize=False, verbose_name='ID'),
), ),
] ]

View file

@ -2,11 +2,16 @@
import core.models import core.models
import uuid import uuid
import random
from django.db import migrations, models from django.db import migrations, models
from abid_utils.abid import ABID from abid_utils.abid import ABID
def rand_int_id():
return random.getrandbits(32)
def update_archiveresult_ids(apps, schema_editor): def update_archiveresult_ids(apps, schema_editor):
ArchiveResult = apps.get_model("core", "ArchiveResult") ArchiveResult = apps.get_model("core", "ArchiveResult")
num_total = ArchiveResult.objects.all().count() num_total = ArchiveResult.objects.all().count()
@ -30,7 +35,7 @@ class Migration(migrations.Migration):
migrations.AlterField( migrations.AlterField(
model_name='archiveresult', model_name='archiveresult',
name='old_id', name='old_id',
field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='ID'), field=models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='ID'),
), ),
migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop), migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop),
migrations.AlterField( migrations.AlterField(

View file

@ -2,9 +2,13 @@
import core.models import core.models
import uuid import uuid
import random
from django.db import migrations, models from django.db import migrations, models
def rand_int_id():
return random.getrandbits(32)
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
@ -20,6 +24,6 @@ class Migration(migrations.Migration):
migrations.AlterField( migrations.AlterField(
model_name='archiveresult', model_name='archiveresult',
name='old_id', name='old_id',
field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='Old ID'), field=models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID'),
), ),
] ]

View file

@ -1,9 +1,12 @@
# Generated by Django 5.0.6 on 2024-08-20 03:30 # Generated by Django 5.0.6 on 2024-08-20 03:30
import core.models import random
from django.db import migrations, models from django.db import migrations, models
def rand_int_id():
return random.getrandbits(32)
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
@ -14,6 +17,6 @@ class Migration(migrations.Migration):
migrations.AlterField( migrations.AlterField(
model_name='tag', model_name='tag',
name='old_id', name='old_id',
field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='Old ID'), field=models.BigIntegerField(default=rand_int_id, primary_key=True, serialize=False, verbose_name='Old ID'),
), ),
] ]

View file

@ -3,8 +3,11 @@
import core.models import core.models
import django.db.models.deletion import django.db.models.deletion
import uuid import uuid
import random
from django.db import migrations, models from django.db import migrations, models
def rand_int_id():
return random.getrandbits(32)
class Migration(migrations.Migration): class Migration(migrations.Migration):
@ -26,6 +29,6 @@ class Migration(migrations.Migration):
migrations.AlterField( migrations.AlterField(
model_name='tag', model_name='tag',
name='old_id', name='old_id',
field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, unique=True, verbose_name='Old ID'), field=models.BigIntegerField(default=rand_int_id, serialize=False, unique=True, verbose_name='Old ID'),
), ),
] ]

View file

@ -5,10 +5,7 @@ from typing import Optional, List, Dict, Iterable
from django_stubs_ext.db.models import TypedModelMeta from django_stubs_ext.db.models import TypedModelMeta
import json import json
import random
import uuid
from uuid import uuid4
from pathlib import Path from pathlib import Path
from django.db import models from django.db import models
@ -18,9 +15,10 @@ from django.utils.text import slugify
from django.core.cache import cache from django.core.cache import cache
from django.urls import reverse, reverse_lazy from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin
from django.conf import settings from django.conf import settings
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, get_or_create_system_user_pk from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from ..system import get_dir_size from ..system import get_dir_size
from ..util import parse_date, base_url from ..util import parse_date, base_url
@ -29,13 +27,10 @@ from ..index.html import snapshot_icons
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
def rand_int_id():
return random.getrandbits(32)
# class BaseModel(models.Model): # class BaseModel(models.Model):
# # TODO: migrate all models to a shared base class with all our standard fields and helpers: # # TODO: migrate all models to a shared base class with all our standard fields and helpers:
# # ulid/created/modified/owner/is_deleted/as_json/from_json/etc. # # ulid/created_at/modified_at/created_by/is_deleted/as_json/from_json/etc.
# # # #
# # id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # # id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID')
# # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True) # # ulid = models.CharField(max_length=26, null=True, blank=True, db_index=True, unique=True)
@ -51,17 +46,18 @@ class Tag(ABIDModel):
Based on django-taggit model + ABID base. Based on django-taggit model + ABID base.
""" """
abid_prefix = 'tag_' abid_prefix = 'tag_'
abid_ts_src = 'self.created' abid_ts_src = 'self.created_at'
abid_uri_src = 'self.slug' abid_uri_src = 'self.slug'
abid_subtype_src = '"03"' abid_subtype_src = '"03"'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
abid_drift_allowed = True
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='tag_set') created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='tag_set')
created = AutoDateTimeField(default=None, null=False, db_index=True) created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified = models.DateTimeField(auto_now=True) modified_at = models.DateTimeField(auto_now=True)
name = models.CharField(unique=True, blank=False, max_length=100) name = models.CharField(unique=True, blank=False, max_length=100)
slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False) slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False)
@ -131,33 +127,41 @@ class SnapshotManager(models.Manager):
class Snapshot(ABIDModel): class Snapshot(ABIDModel):
abid_prefix = 'snp_' abid_prefix = 'snp_'
abid_ts_src = 'self.created' abid_ts_src = 'self.created_at'
abid_uri_src = 'self.url' abid_uri_src = 'self.url'
abid_subtype_src = '"01"' abid_subtype_src = '"01"'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
abid_drift_allowed = False
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='snapshot_set') created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='snapshot_set')
created = AutoDateTimeField(default=None, null=False, db_index=True) created_at = AutoDateTimeField(default=None, null=False, db_index=True) # loaded from self._init_timestamp
modified = models.DateTimeField(auto_now=True) modified_at = models.DateTimeField(auto_now=True)
# legacy ts fields # legacy ts fields
added = AutoDateTimeField(default=None, null=False, editable=True, db_index=True) bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True)
updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True) downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True)
url = models.URLField(unique=True, db_index=True) url = models.URLField(unique=True, db_index=True)
timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag')) tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
title = models.CharField(max_length=512, null=True, blank=True, db_index=True) title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
keys = ('url', 'timestamp', 'title', 'tags', 'updated') keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at')
archiveresult_set: models.Manager['ArchiveResult'] archiveresult_set: models.Manager['ArchiveResult']
objects = SnapshotManager() objects = SnapshotManager()
def save(self, *args, **kwargs):
if not self.bookmarked_at:
self.bookmarked_at = self.created_at or self._init_timestamp
super().save(*args, **kwargs)
def __repr__(self) -> str: def __repr__(self) -> str:
title = (self.title_stripped or '-')[:64] title = (self.title_stripped or '-')[:64]
return f'[{self.timestamp}] {self.url[:64]} ({title})' return f'[{self.timestamp}] {self.url[:64]} ({title})'
@ -185,9 +189,10 @@ class Snapshot(ABIDModel):
from ..index import load_link_details from ..index import load_link_details
return load_link_details(self.as_link()) return load_link_details(self.as_link())
@admin.display(description='Tags')
def tags_str(self, nocache=True) -> str | None: def tags_str(self, nocache=True) -> str | None:
calc_tags_str = lambda: ','.join(sorted(tag.name for tag in self.tags.all())) calc_tags_str = lambda: ','.join(sorted(tag.name for tag in self.tags.all()))
cache_key = f'{self.pk}-{(self.updated or self.added).timestamp()}-tags' cache_key = f'{self.pk}-{(self.downloaded_at or self.bookmarked_at).timestamp()}-tags'
if hasattr(self, '_prefetched_objects_cache') and 'tags' in self._prefetched_objects_cache: if hasattr(self, '_prefetched_objects_cache') and 'tags' in self._prefetched_objects_cache:
# tags are pre-fetched already, use them directly (best because db is always freshest) # tags are pre-fetched already, use them directly (best because db is always freshest)
@ -255,7 +260,7 @@ class Snapshot(ABIDModel):
@cached_property @cached_property
def archive_size(self): def archive_size(self):
cache_key = f'{str(self.pk)[:12]}-{(self.updated or self.added).timestamp()}-size' cache_key = f'{str(self.pk)[:12]}-{(self.downloaded_at or self.bookmarked_at).timestamp()}-size'
def calc_dir_size(): def calc_dir_size():
try: try:
@ -274,7 +279,7 @@ class Snapshot(ABIDModel):
for result in self.archiveresult_set.all() for result in self.archiveresult_set.all()
if result.extractor == 'screenshot' and result.status =='succeeded' and result.output if result.extractor == 'screenshot' and result.status =='succeeded' and result.output
), ),
key=lambda result: result.created, key=lambda result: result.created_at,
) or [None])[-1] ) or [None])[-1]
else: else:
result = self.archiveresult_set.filter( result = self.archiveresult_set.filter(
@ -359,7 +364,7 @@ class Snapshot(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True) -> Path: # def get_storage_dir(self, create=True, symlink=True) -> Path:
# date_str = self.added.strftime('%Y%m%d') # date_str = self.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.url) # domain_str = domain(self.url)
# abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid) # abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'snapshots' / date_str / domain_str / str(self.ulid)
@ -407,10 +412,11 @@ class ArchiveResultManager(models.Manager):
class ArchiveResult(ABIDModel): class ArchiveResult(ABIDModel):
abid_prefix = 'res_' abid_prefix = 'res_'
abid_ts_src = 'self.snapshot.added' abid_ts_src = 'self.snapshot.created_at'
abid_uri_src = 'self.snapshot.url' abid_uri_src = 'self.snapshot.url'
abid_subtype_src = 'self.extractor' abid_subtype_src = 'self.extractor'
abid_rand_src = 'self.id' abid_rand_src = 'self.id'
abid_drift_allowed = True
EXTRACTOR_CHOICES = ( EXTRACTOR_CHOICES = (
('htmltotext', 'htmltotext'), ('htmltotext', 'htmltotext'),
@ -438,8 +444,8 @@ class ArchiveResult(ABIDModel):
abid = ABIDField(prefix=abid_prefix) abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='archiveresult_set') created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='archiveresult_set')
created = AutoDateTimeField(default=None, null=False, db_index=True) created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified = models.DateTimeField(auto_now=True) modified_at = models.DateTimeField(auto_now=True)
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id') snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id')
@ -460,6 +466,7 @@ class ArchiveResult(ABIDModel):
def __str__(self): def __str__(self):
# return f'[{self.abid}] 📅 {self.start_ts.strftime("%Y-%m-%d %H:%M")} 📄 {self.extractor} {self.snapshot.url}'
return self.extractor return self.extractor
@cached_property @cached_property
@ -503,7 +510,7 @@ class ArchiveResult(ABIDModel):
# def get_storage_dir(self, create=True, symlink=True): # def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.added.strftime('%Y%m%d') # date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')
# domain_str = domain(self.snapshot.url) # domain_str = domain(self.snapshot.url)
# abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid) # abs_storage_dir = Path(settings.CONFIG.ARCHIVE_DIR) / 'results' / date_str / domain_str / self.extractor / str(self.ulid)

View file

@ -211,7 +211,7 @@ class SnapshotView(View):
format_html( format_html(
( (
'<center><br/><br/><br/>' '<center><br/><br/><br/>'
'No Snapshot directories match the given timestamp or UUID: <code>{}</code><br/><br/>' 'No Snapshot directories match the given timestamp/ID/ABID: <code>{}</code><br/><br/>'
'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>' 'You can <a href="/add/" target="_top">add a new Snapshot</a>, or return to the <a href="/" target="_top">Main Index</a>'
'</center>' '</center>'
), ),
@ -225,18 +225,18 @@ class SnapshotView(View):
snapshot_hrefs = mark_safe('<br/>').join( snapshot_hrefs = mark_safe('<br/>').join(
format_html( format_html(
'{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>', '{} <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
snap.added.strftime('%Y-%m-%d %H:%M:%S'), snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
snap.timestamp, snap.timestamp,
snap.timestamp, snap.timestamp,
snap.url, snap.url,
snap.title_stripped[:64] or '', snap.title_stripped[:64] or '',
) )
for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added') for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
) )
return HttpResponse( return HttpResponse(
format_html( format_html(
( (
'Multiple Snapshots match the given timestamp/UUID <code>{}</code><br/><pre>' 'Multiple Snapshots match the given timestamp/ID/ABID <code>{}</code><br/><pre>'
), ),
slug, slug,
) + snapshot_hrefs + format_html( ) + snapshot_hrefs + format_html(
@ -257,12 +257,12 @@ class SnapshotView(View):
( (
'<center><br/><br/><br/>' '<center><br/><br/><br/>'
f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a>: <a href="{snapshot.url}" target="_blank" rel="noreferrer">{snapshot.url}</a><br/>' f'Snapshot <a href="/archive/{snapshot.timestamp}/index.html" target="_top"><b><code>[{snapshot.timestamp}]</code></b></a>: <a href="{snapshot.url}" target="_blank" rel="noreferrer">{snapshot.url}</a><br/>'
f'was queued on {str(snapshot.added).split(".")[0]}, ' f'was queued on {str(snapshot.bookmarked_at).split(".")[0]}, '
f'but no files have been saved yet in:<br/><b><a href="/archive/{snapshot.timestamp}/" target="_top"><code>{snapshot.timestamp}</code></a><code>/' f'but no files have been saved yet in:<br/><b><a href="/archive/{snapshot.timestamp}/" target="_top"><code>{snapshot.timestamp}</code></a><code>/'
'{}' '{}'
f'</code></b><br/><br/>' f'</code></b><br/><br/>'
'It\'s possible {} ' 'It\'s possible {} '
f'during the last capture on {str(snapshot.added).split(".")[0]},<br/>or that the archiving process has not completed yet.<br/>' f'during the last capture on {str(snapshot.bookmarked_at).split(".")[0]},<br/>or that the archiving process has not completed yet.<br/>'
f'<pre><code># run this cmd to finish/retry archiving this Snapshot</code><br/>' f'<pre><code># run this cmd to finish/retry archiving this Snapshot</code><br/>'
f'<code style="user-select: all; color: #333">archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>' f'<code style="user-select: all; color: #333">archivebox update -t timestamp {snapshot.timestamp}</code></pre><br/><br/>'
'<div class="text-align: left; width: 100%; max-width: 400px">' '<div class="text-align: left; width: 100%; max-width: 400px">'
@ -270,7 +270,7 @@ class SnapshotView(View):
f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>' f'- list all the <a href="/archive/{snapshot.timestamp}/" target="_top">Snapshot files <code>.*</code></a><br/>'
f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>' f'- view the <a href="/archive/{snapshot.timestamp}/index.html" target="_top">Snapshot <code>./index.html</code></a><br/>'
f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>' f'- go to the <a href="/admin/core/snapshot/{snapshot.pk}/change/" target="_top">Snapshot admin</a> to edit<br/>'
f'- go to the <a href="/admin/core/snapshot/?uuid__startswith={snapshot.uuid}" target="_top">Snapshot actions</a> to re-archive<br/>' f'- go to the <a href="/admin/core/snapshot/?id__exact={snapshot.id}" target="_top">Snapshot actions</a> to re-archive<br/>'
'- or return to <a href="/" target="_top">the main index...</a></div>' '- or return to <a href="/" target="_top">the main index...</a></div>'
'</center>' '</center>'
), ),
@ -343,7 +343,7 @@ class SnapshotView(View):
snapshot_hrefs = mark_safe('<br/>').join( snapshot_hrefs = mark_safe('<br/>').join(
format_html( format_html(
'{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>', '{} <code style="font-size: 0.8em">{}</code> <a href="/archive/{}/index.html"><b><code>{}</code></b></a> {} <b>{}</b>',
snap.added.strftime('%Y-%m-%d %H:%M:%S'), snap.bookmarked_at.strftime('%Y-%m-%d %H:%M:%S'),
snap.abid, snap.abid,
snap.timestamp, snap.timestamp,
snap.timestamp, snap.timestamp,
@ -353,7 +353,7 @@ class SnapshotView(View):
for snap in Snapshot.objects.filter( for snap in Snapshot.objects.filter(
Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path)) Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path))
| Q(abid__icontains=path) | Q(id__icontains=path) | Q(abid__icontains=path) | Q(id__icontains=path)
).only('url', 'timestamp', 'title', 'added').order_by('-added') ).only('url', 'timestamp', 'title', 'bookmarked_at').order_by('-bookmarked_at')
) )
return HttpResponse( return HttpResponse(
format_html( format_html(
@ -376,7 +376,7 @@ class PublicIndexView(ListView):
template_name = 'public_index.html' template_name = 'public_index.html'
model = Snapshot model = Snapshot
paginate_by = SNAPSHOTS_PER_PAGE paginate_by = SNAPSHOTS_PER_PAGE
ordering = ['-added'] ordering = ['-bookmarked_at', '-created_at']
def get_context_data(self, **kwargs): def get_context_data(self, **kwargs):
return { return {

View file

@ -134,7 +134,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
link = load_link_details(link, out_dir=out_dir) link = load_link_details(link, out_dir=out_dir)
write_link_details(link, out_dir=out_dir, skip_sql_index=False) write_link_details(link, out_dir=out_dir, skip_sql_index=False)
log_link_archiving_started(link, str(out_dir), is_new) log_link_archiving_started(link, str(out_dir), is_new)
link = link.overwrite(updated=datetime.now(timezone.utc)) link = link.overwrite(downloaded_at=datetime.now(timezone.utc))
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
start_ts = datetime.now(timezone.utc) start_ts = datetime.now(timezone.utc)
@ -157,11 +157,11 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status, created_by_id=snapshot.created_by_id) output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status, created_by_id=snapshot.created_by_id)
# bump the updated time on the main Snapshot here, this is critical # bump the downloaded_at time on the main Snapshot here, this is critical
# to be able to cache summaries of the ArchiveResults for a given # to be able to cache summaries of the ArchiveResults for a given
# snapshot without having to load all the results from the DB each time. # snapshot without having to load all the results from the DB each time.
# (we use {Snapshot.pk}-{Snapshot.updated} as the cache key and assume # (we use {Snapshot.pk}-{Snapshot.downloaded_at} as the cache key and assume
# ArchiveResults are unchanged as long as the updated timestamp is unchanged) # ArchiveResults are unchanged as long as the downloaded_at timestamp is unchanged)
snapshot.save() snapshot.save()
else: else:
# print('{black} X {}{reset}'.format(method_name, **ANSI)) # print('{black} X {}{reset}'.format(method_name, **ANSI))

View file

@ -245,7 +245,7 @@ def wget_output_path(link: Link, nocache: bool=False) -> Optional[str]:
# https://example.com/abc/test/?v=zzVa_tX1OiI # https://example.com/abc/test/?v=zzVa_tX1OiI
# > example.com/abc/test/index.html@v=zzVa_tX1OiI.html # > example.com/abc/test/index.html@v=zzVa_tX1OiI.html
cache_key = f'{link.url_hash}:{link.timestamp}-{link.updated and link.updated.timestamp()}-wget-output-path' cache_key = f'{link.url_hash}:{link.timestamp}-{link.downloaded_at and link.downloaded_at.timestamp()}-wget-output-path'
if not nocache: if not nocache:
from django.core.cache import cache from django.core.cache import cache

View file

@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str:
def snapshot_icons(snapshot) -> str: def snapshot_icons(snapshot) -> str:
cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}' cache_key = f'result_icons:{snapshot.pk}:{(snapshot.downloaded_at or snapshot.modified_at or snapshot.created_at or snapshot.bookmarked_at).timestamp()}'
def calc_snapshot_icons(): def calc_snapshot_icons():
from core.models import ArchiveResult from core.models import ArchiveResult

View file

@ -132,7 +132,7 @@ class Link:
tags: Optional[str] tags: Optional[str]
sources: List[str] sources: List[str]
history: Dict[str, List[ArchiveResult]] = field(default_factory=lambda: {}) history: Dict[str, List[ArchiveResult]] = field(default_factory=lambda: {})
updated: Optional[datetime] = None downloaded_at: Optional[datetime] = None
schema: str = 'Link' schema: str = 'Link'
def __str__(self) -> str: def __str__(self) -> str:
@ -164,7 +164,7 @@ class Link:
assert isinstance(self.timestamp, str) and self.timestamp assert isinstance(self.timestamp, str) and self.timestamp
assert self.timestamp.replace('.', '').isdigit() assert self.timestamp.replace('.', '').isdigit()
assert isinstance(self.url, str) and '://' in self.url assert isinstance(self.url, str) and '://' in self.url
assert self.updated is None or isinstance(self.updated, datetime) assert self.downloaded_at is None or isinstance(self.downloaded_at, datetime)
assert self.title is None or (isinstance(self.title, str) and self.title) assert self.title is None or (isinstance(self.title, str) and self.title)
assert self.tags is None or isinstance(self.tags, str) assert self.tags is None or isinstance(self.tags, str)
assert isinstance(self.sources, list) assert isinstance(self.sources, list)
@ -184,7 +184,7 @@ class Link:
'url': self.url, 'url': self.url,
'title': self.title or None, 'title': self.title or None,
'timestamp': self.timestamp, 'timestamp': self.timestamp,
'updated': self.updated or None, 'downloaded_at': self.downloaded_at or None,
'tags': self.tags or None, 'tags': self.tags or None,
'sources': self.sources or [], 'sources': self.sources or [],
'history': self.history or {}, 'history': self.history or {},
@ -210,7 +210,7 @@ class Link:
'icons': None, # only used to render static index in index/html.py, remove if no longer needed there 'icons': None, # only used to render static index in index/html.py, remove if no longer needed there
'bookmarked_date': self.bookmarked_date, 'bookmarked_date': self.bookmarked_date,
'updated_date': self.updated_date, 'downloaded_datestr': self.downloaded_datestr,
'oldest_archive_date': self.oldest_archive_date, 'oldest_archive_date': self.oldest_archive_date,
'newest_archive_date': self.newest_archive_date, 'newest_archive_date': self.newest_archive_date,
@ -236,7 +236,7 @@ class Link:
for key, val in json_info.items() for key, val in json_info.items()
if key in cls.field_names() if key in cls.field_names()
} }
info['updated'] = parse_date(info.get('updated')) info['downloaded_at'] = parse_date(info.get('updated') or info.get('downloaded_at'))
info['sources'] = info.get('sources') or [] info['sources'] = info.get('sources') or []
json_history = info.get('history') or {} json_history = info.get('history') or {}
@ -347,8 +347,8 @@ class Link:
@property @property
def updated_date(self) -> Optional[str]: def downloaded_datestr(self) -> Optional[str]:
return ts_to_date_str(self.updated) if self.updated else None return ts_to_date_str(self.downloaded_at) if self.downloaded_at else None
@property @property
def archive_dates(self) -> List[datetime]: def archive_dates(self) -> List[datetime]:

View file

@ -540,9 +540,9 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
last_login = User.objects.order_by('last_login').last() last_login = User.objects.order_by('last_login').last()
if last_login: if last_login:
print(f' Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}') print(f' Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}')
last_updated = Snapshot.objects.order_by('updated').last() last_downloaded = Snapshot.objects.order_by('downloaded_at').last()
if last_updated: if last_downloaded:
print(f' Last changes: {str(last_updated.updated)[:16]}') print(f' Last changes: {str(last_downloaded.downloaded_at)[:16]}')
if not users: if not users:
print() print()
@ -550,13 +550,13 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(' archivebox manage createsuperuser') print(' archivebox manage createsuperuser')
print() print()
for snapshot in links.order_by('-updated')[:10]: for snapshot in links.order_by('-downloaded_at')[:10]:
if not snapshot.updated: if not snapshot.downloaded_at:
continue continue
print( print(
ANSI['black'], ANSI['black'],
( (
f' > {str(snapshot.updated)[:16]} ' f' > {str(snapshot.downloaded_at)[:16]} '
f'[{snapshot.num_outputs} {("X", "")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] ' f'[{snapshot.num_outputs} {("X", "")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] '
f'"{snapshot.title}": {snapshot.url}' f'"{snapshot.title}": {snapshot.url}'
)[:TERM_WIDTH()], )[:TERM_WIDTH()],

View file

@ -1,50 +1 @@
__package__ = 'archivebox.plugantic' __package__ = 'archivebox.plugantic'
# import uuid
# from django.db import models
# from typing_extensions import Self
# from django_pydantic_field import SchemaField
# from django.conf import settings
# from abid_utils.models import ABIDModel, ABIDField
# # from .plugins import Plugin as PluginSchema, CORE_PLUGIN
# from .binproviders import BinProvider
# from .binaries import Binary
# from .configs import WgetOptionsConfig
# from .extractors import Extractor
# from .replayers import Replayer
# PLUGINS_ROOT = settings.CONFIG['OUTPUT_DIR'] / 'plugins'
# PLUGINS_ROOT.mkdir(exist_ok=True)
# class CustomPlugin(ABIDModel):
# abid_prefix = 'plg_'
# abid_ts_src = 'self.added'
# abid_uri_src = 'self.name'
# abid_subtype_src = '"09"'
# abid_rand_src = 'self.id'
# id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk
# uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True)
# abid = ABIDField(prefix=abid_prefix)
# name = models.CharField(max_length=64, blank=False, unique=True)
# path = models.FilePathField(path=str(PLUGINS_ROOT), match='*', recursive=True, allow_folders=True, allow_files=False)
# # replayers: list[Replayer] = SchemaField()
# # binaries: list[Replayer] = SchemaField()
# # extractors: list[Replayer] = SchemaField()
# # @classmethod
# # def from_loaded_plugin(cls, plugin: PluginSchema) -> Self:
# # new_obj = cls(
# # schema=plugin,
# # )
# # return new_obj

View file

@ -148,7 +148,7 @@
<div class="card"> <div class="card">
<div class="card-info"> <div class="card-info">
<a href="{% url 'admin:core_snapshot_change' obj.pk %}"> <a href="{% url 'admin:core_snapshot_change' obj.pk %}">
<span class="timestamp">{{obj.added}}</span> <span class="timestamp">{{obj.bookmarked_at}}</span>
</a> </a>
<label> <label>
<span class="num_outputs">📄 &nbsp; {{obj.num_outputs}}</span> &nbsp; &nbsp; <span class="num_outputs">📄 &nbsp; {{obj.num_outputs}}</span> &nbsp; &nbsp;

View file

@ -1,8 +1,8 @@
{% load static tz core_tags %} {% load static tz core_tags %}
<tr> <tr>
<td title="Bookmarked: {{link.bookmarked_date|localtime}} ({{link.timestamp}})" data-sort="{{link.added.timestamp}}"> <td title="Bookmarked: {{link.bookmarked_date|localtime}} ({{link.timestamp}})" data-sort="{{link.bookmarked_at.timestamp}}">
{{ link.added|localtime }} {{ link.bookmarked_at|localtime }}
</td> </td>
<td class="title-col" style="opacity: {% if link.title %}1{% else %}0.3{% endif %}" title="{{link.title|default:'Not yet archived...'}}"> <td class="title-col" style="opacity: {% if link.title %}1{% else %}0.3{% endif %}" title="{{link.title|default:'Not yet archived...'}}">
{% if link.is_archived %} {% if link.is_archived %}

View file

@ -317,7 +317,7 @@
</div> </div>
<div title="Date last checked" class="info-chunk" title="UTC Timezone"> <div title="Date last checked" class="info-chunk" title="UTC Timezone">
<h5>Last Checked</h5> <h5>Last Checked</h5>
{{updated_date}} {{downloaded_datestr}}
</div> </div>
</div> </div>
<div class="col-lg-4"> <div class="col-lg-4">

View file

@ -379,8 +379,8 @@
</small> </small>
</div> </div>
<div class="col-lg-2" style="padding-top: 4px"> <div class="col-lg-2" style="padding-top: 4px">
<a href="/archive/{{url}}" title="Date Added: {{bookmarked_date}} | First Archived: {{oldest_archive_date|default:updated_date}} | Last Checked: {{updated_date}} (UTC)"> <a href="/archive/{{url}}" title="Date Added: {{bookmarked_date}} | First Archived: {{oldest_archive_date|default:downloaded_datestr}} | Last Checked: {{downloaded_datestr}} (UTC)">
{{oldest_archive_date|default:updated_date|default:bookmarked_date}} {{oldest_archive_date|default:downloaded_datestr|default:bookmarked_date}}
</a> </a>
<br/> <br/>
<div class="external-links"> <div class="external-links">

View file

@ -200,7 +200,7 @@ def parse_date(date: Any) -> Optional[datetime]:
date = str(date) date = str(date)
if isinstance(date, str): if isinstance(date, str):
return dateparser(date, settings={'TIMEZONE': 'UTC'}).replace(tzinfo=timezone.utc) return dateparser(date, settings={'TIMEZONE': 'UTC'}).astimezone(timezone.utc)
raise ValueError('Tried to parse invalid date! {}'.format(date)) raise ValueError('Tried to parse invalid date! {}'.format(date))