From d060eaa499fc69e35e64685177b66a9a7cf9e67e Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 4 Sep 2024 00:08:14 -0700 Subject: [PATCH] abid gradual improvements, some regrets --- archivebox/abid_utils/abid.py | 42 ++++++++++++++++++++++------------ archivebox/abid_utils/admin.py | 25 ++++++++++---------- archivebox/core/admin.py | 3 ++- archivebox/index/html.py | 12 +++++++--- 4 files changed, 51 insertions(+), 31 deletions(-) diff --git a/archivebox/abid_utils/abid.py b/archivebox/abid_utils/abid.py index a0e71937..e294e6a5 100644 --- a/archivebox/abid_utils/abid.py +++ b/archivebox/abid_utils/abid.py @@ -1,4 +1,6 @@ -from typing import NamedTuple, Any, Union, Optional +__package__ = 'archivebox.abid_utils' + +from typing import NamedTuple, Any, Union, Optional, Dict import ulid import uuid6 @@ -9,6 +11,7 @@ from uuid import UUID from typeid import TypeID # type: ignore[import-untyped] from datetime import datetime +from ..util import enforce_types ABID_PREFIX_LEN = 4 @@ -108,6 +111,7 @@ class ABID(NamedTuple): #################################################### +@enforce_types def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str: """ 'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25' @@ -130,17 +134,19 @@ def uri_hash(uri: Union[str, bytes], salt: str=DEFAULT_ABID_URI_SALT) -> str: return hashlib.sha256(uri_bytes).hexdigest().upper() -def abid_part_from_prefix(prefix: Optional[str]) -> str: +@enforce_types +def abid_part_from_prefix(prefix: str) -> str: """ 'snp_' """ - if prefix is None: - return 'obj_' + # if prefix is None: + # return 'obj_' prefix = prefix.strip('_').lower() assert len(prefix) == 3 return prefix + '_' +@enforce_types def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str: """ 'E4A5CCD9' # takes first 8 characters of sha256(url) @@ -148,12 +154,14 @@ def abid_part_from_uri(uri: str, salt: str=DEFAULT_ABID_URI_SALT) -> str: uri = str(uri) return uri_hash(uri, salt=salt)[:ABID_URI_LEN] -def abid_part_from_ts(ts: Optional[datetime]) -> str: +@enforce_types +def abid_part_from_ts(ts: datetime) -> str: """ '01HX9FPYTR' # produces 10 character Timestamp section of ulid based on added date """ - return str(ulid.from_timestamp(ts) if ts else ulid.new())[:ABID_TS_LEN] + return str(ulid.from_timestamp(ts))[:ABID_TS_LEN] +@enforce_types def abid_part_from_subtype(subtype: str) -> str: """ Snapshots have 01 type, other objects have other subtypes like wget/media/etc. @@ -165,6 +173,7 @@ def abid_part_from_subtype(subtype: str) -> str: return hashlib.sha256(subtype.encode('utf-8')).hexdigest()[:ABID_SUBTYPE_LEN].upper() +@enforce_types def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str: """ 'ZYEBQE' # takes last 6 characters of randomness from existing legacy uuid db field @@ -186,17 +195,22 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str: return str(rand)[-ABID_RAND_LEN:].upper() -def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID: +@enforce_types +def abid_hashes_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> Dict[str, str]: + return { + 'prefix': abid_part_from_prefix(prefix), + 'ts': abid_part_from_ts(ts), + 'uri': abid_part_from_uri(uri, salt=salt), + 'subtype': abid_part_from_subtype(subtype), + 'rand': abid_part_from_rand(rand), + } + +@enforce_types +def abid_from_values(prefix: str, ts: datetime, uri: str, subtype: str, rand: Union[str, UUID, None, int], salt: str=DEFAULT_ABID_URI_SALT) -> ABID: """ Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). """ - abid = ABID( - prefix=abid_part_from_prefix(prefix), - ts=abid_part_from_ts(ts), - uri=abid_part_from_uri(uri, salt=salt), - subtype=abid_part_from_subtype(subtype), - rand=abid_part_from_rand(rand), - ) + abid = ABID(**abid_hashes_from_values(prefix, ts, uri, subtype, rand, salt=salt)) assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for ts={ts} uri={uri} subtyp={subtype} rand={rand}' return abid diff --git a/archivebox/abid_utils/admin.py b/archivebox/abid_utils/admin.py index bd97b60d..46adf3f7 100644 --- a/archivebox/abid_utils/admin.py +++ b/archivebox/abid_utils/admin.py @@ -16,21 +16,20 @@ def highlight_diff(display_val, compare_val): display_val = str(display_val) compare_val = str(compare_val) - diff_chars = mark_safe('').join( + return mark_safe(''.join( format_html('{}', display_val[i]) if display_val[i] != compare_val[i] else format_html('{}', display_val[i]) for i in range(len(display_val)) - ) - return diff_chars + )) def get_abid_info(self, obj, request=None): try: abid_diff = f' != obj.ABID: {highlight_diff(obj.ABID, obj.abid)} ❌' if str(obj.ABID) != str(obj.abid) else ' == .ABID ✅' - fresh_abid = obj.generate_abid() - fresh_abid_diff = f' !=   .fresh_abid: {highlight_diff(obj.ABID, fresh_abid)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅' - fresh_uuid_diff = f' !=   .fresh_uuid: {highlight_diff(obj.ABID.uuid, fresh_abid.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅' + fresh_abid = obj.ABID_FRESH + fresh_abid_diff = f' !=   .fresh_abid: {highlight_diff(fresh_abid, obj.ABID)} ❌' if str(fresh_abid) != str(obj.ABID) else '✅' + fresh_uuid_diff = f' !=   .fresh_uuid: {highlight_diff(fresh_abid.uuid, obj.ABID.uuid)} ❌' if str(fresh_abid.uuid) != str(obj.ABID.uuid) else '✅' id_fresh_abid_diff = f' != .fresh_abid ❌' if str(fresh_abid.uuid) != str(obj.id) else ' == .fresh_abid ✅' id_abid_diff = f' != .abid.uuid: {highlight_diff(obj.ABID.uuid, obj.id)} ❌' if str(obj.id) != str(obj.ABID.uuid) else ' == .abid ✅' @@ -74,16 +73,16 @@ def get_abid_info(self, obj, request=None): ''', obj.api_url + (f'?api_key={get_or_create_api_token(request.user)}' if request and request.user else ''), obj.api_url, obj.api_docs_url, - str(obj.abid), mark_safe(fresh_abid_diff), - str(obj.ABID.uuid), mark_safe(fresh_uuid_diff), + highlight_diff(obj.abid, fresh_abid), mark_safe(fresh_abid_diff), + highlight_diff(obj.ABID.uuid, fresh_abid.uuid), mark_safe(fresh_uuid_diff), str(obj.id), mark_safe(id_pk_diff + id_abid_diff + id_fresh_abid_diff), # str(fresh_abid.uuid), mark_safe(fresh_uuid_diff), # str(fresh_abid), mark_safe(fresh_abid_diff), - obj.ABID.ts, str(obj.ABID.uuid)[0:14], mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(), - obj.ABID.uri, str(obj.ABID.uuid)[14:26], mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']), - obj.ABID.subtype, str(obj.ABID.uuid)[26:28], mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']), - obj.ABID.rand, str(obj.ABID.uuid)[28:36], mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:], - str(getattr(obj, 'old_id', '')), + highlight_diff(obj.ABID.ts, derived_ts), highlight_diff(str(obj.ABID.uuid)[0:14], str(fresh_abid.uuid)[0:14]), mark_safe(ts_diff), obj.abid_ts_src, source_ts_val and source_ts_val.isoformat(), + highlight_diff(obj.ABID.uri, derived_uri), highlight_diff(str(obj.ABID.uuid)[14:26], str(fresh_abid.uuid)[14:26]), mark_safe(uri_diff), obj.abid_uri_src, str(obj.abid_values['uri']), + highlight_diff(obj.ABID.subtype, derived_subtype), highlight_diff(str(obj.ABID.uuid)[26:28], str(fresh_abid.uuid)[26:28]), mark_safe(subtype_diff), obj.abid_subtype_src, str(obj.abid_values['subtype']), + highlight_diff(obj.ABID.rand, derived_rand), highlight_diff(str(obj.ABID.uuid)[28:36], str(fresh_abid.uuid)[28:36]), mark_safe(rand_diff), obj.abid_rand_src, str(obj.abid_values['rand'])[-7:], + highlight_diff(getattr(obj, 'old_id', ''), obj.pk), ) except Exception as e: return str(e) diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 61323a84..832a9348 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -352,7 +352,7 @@ class SnapshotActionForm(ActionForm): @admin.register(Snapshot, site=archivebox_admin) class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): list_display = ('added', 'title_str', 'files', 'size', 'url_str') - sort_fields = ('title_str', 'url_str', 'added', 'files') + sort_fields = ('title_str', 'url_str', 'added') readonly_fields = ('tags_str', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir') search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name') list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags__name') @@ -510,6 +510,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): # ordering='archiveresult_count', ) def files(self, obj): + # return '-' return snapshot_icons(obj) diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 2e5d18bc..504385b2 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -118,7 +118,7 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str: def snapshot_icons(snapshot) -> str: - cache_key = f'{snapshot.pk}-{(snapshot.updated or snapshot.added).timestamp()}-snapshot-icons' + cache_key = f'result_icons:{snapshot.pk}:{(snapshot.modified or snapshot.created or snapshot.added).timestamp()}' def calc_snapshot_icons(): from core.models import ArchiveResult @@ -133,6 +133,7 @@ def snapshot_icons(snapshot) -> str: else: archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False) + # import ipdb; ipdb.set_trace() link = snapshot.as_link() path = link.archive_path canon = link.canonical_outputs() @@ -197,7 +198,12 @@ def snapshot_icons(snapshot) -> str: # print(((end - start).total_seconds()*1000) // 1, 'ms') return result - return cache.get_or_set(cache_key, calc_snapshot_icons) - # return calc_snapshot_icons() + cache_result = cache.get(cache_key) + if cache_result: + return cache_result + + fresh_result = calc_snapshot_icons() + cache.set(cache_key, fresh_result, timeout=60 * 60 * 24) + return fresh_result