minor snapshot details ui fixes and migrations log msg improvements

This commit is contained in:
Nick Sweeting 2024-06-03 04:00:18 -07:00
parent 78f0ae469e
commit de489d3c60
No known key found for this signature in database
3 changed files with 18 additions and 13 deletions

View file

@ -47,12 +47,14 @@ def calculate_abid(self):
def copy_snapshot_uuids(apps, schema_editor): def copy_snapshot_uuids(apps, schema_editor):
print(' Copying snapshot.id -> snapshot.uuid...')
Snapshot = apps.get_model("core", "Snapshot") Snapshot = apps.get_model("core", "Snapshot")
for snapshot in Snapshot.objects.all(): for snapshot in Snapshot.objects.all():
snapshot.uuid = snapshot.id snapshot.uuid = snapshot.id
snapshot.save(update_fields=["uuid"]) snapshot.save(update_fields=["uuid"])
def generate_snapshot_abids(apps, schema_editor): def generate_snapshot_abids(apps, schema_editor):
print(' Generating snapshot.abid values...')
Snapshot = apps.get_model("core", "Snapshot") Snapshot = apps.get_model("core", "Snapshot")
for snapshot in Snapshot.objects.all(): for snapshot in Snapshot.objects.all():
snapshot.abid_prefix = 'snp_' snapshot.abid_prefix = 'snp_'
@ -65,6 +67,7 @@ def generate_snapshot_abids(apps, schema_editor):
snapshot.save(update_fields=["abid"]) snapshot.save(update_fields=["abid"])
def generate_archiveresult_abids(apps, schema_editor): def generate_archiveresult_abids(apps, schema_editor):
print(' Generating ArchiveResult.abid values... (may take an hour or longer for large collections...)')
ArchiveResult = apps.get_model("core", "ArchiveResult") ArchiveResult = apps.get_model("core", "ArchiveResult")
Snapshot = apps.get_model("core", "Snapshot") Snapshot = apps.get_model("core", "Snapshot")
for result in ArchiveResult.objects.all(): for result in ArchiveResult.objects.all():

View file

@ -90,7 +90,7 @@ class SnapshotView(View):
archiveresults[result.extractor] = result_info archiveresults[result.extractor] = result_info
existing_files = {result['path'] for result in archiveresults.values()} existing_files = {result['path'] for result in archiveresults.values()}
min_size_threshold = 128 # bytes min_size_threshold = 10_000 # bytes
allowed_extensions = { allowed_extensions = {
'txt', 'txt',
'html', 'html',
@ -108,12 +108,14 @@ class SnapshotView(View):
'md', 'md',
} }
# iterate through all the files in the snapshot dir and add the biggest ones to the result list # iterate through all the files in the snapshot dir and add the biggest ones to the result list
for result_file in Path(snapshot.link_dir).glob('*/*/*'): snap_dir = Path(snapshot.link_dir)
for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
extension = result_file.suffix.lstrip('.').lower() extension = result_file.suffix.lstrip('.').lower()
if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions: if result_file.is_dir() or result_file.name.startswith('.') or extension not in allowed_extensions:
continue continue
if result_file.name in existing_files: if result_file.name in existing_files or result_file.name == 'index.html':
continue continue
file_size = result_file.stat().st_size or 0 file_size = result_file.stat().st_size or 0
@ -121,7 +123,7 @@ class SnapshotView(View):
if file_size > min_size_threshold: if file_size > min_size_threshold:
archiveresults[result_file.name] = { archiveresults[result_file.name] = {
'name': result_file.stem, 'name': result_file.stem,
'path': result_file.relative_to(snapshot.link_dir), 'path': result_file.relative_to(snap_dir),
'ts': ts_to_date_str(result_file.stat().st_mtime or 0), 'ts': ts_to_date_str(result_file.stat().st_mtime or 0),
'size': file_size, 'size': file_size,
} }
@ -140,7 +142,7 @@ class SnapshotView(View):
link_info = link._asdict(extended=True) link_info = link._asdict(extended=True)
try: try:
warc_path = 'warc/' + list(Path(snapshot.link_dir).glob('warc/*.warc.*'))[0].name warc_path = 'warc/' + list(Path(snap_dir).glob('warc/*.warc.*'))[0].name
except IndexError: except IndexError:
warc_path = 'warc/' warc_path = 'warc/'
@ -160,7 +162,7 @@ class SnapshotView(View):
'warc_path': warc_path, 'warc_path': warc_path,
'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS, 'PREVIEW_ORIGINALS': PREVIEW_ORIGINALS,
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name'])), 'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
'best_result': best_result, 'best_result': best_result,
# 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234', # 'tags_str': 'somealskejrewlkrjwer,werlmwrwlekrjewlkrjwer324m532l,4m32,23m324234',
} }

View file

@ -401,13 +401,13 @@
<div class="col-lg-2"> <div class="col-lg-2">
<div class="card {% if forloop.first %}selected-card{% endif %}"> <div class="card {% if forloop.first %}selected-card{% endif %}">
<div class="card-body"> <div class="card-body">
<a href="{{result.path}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})"> <a href="{{result.path|urlencode}}" target="preview" title="./{{result.path}} (downloaded {{result.ts}})">
<h4>{{result.name}} <small>({{result.size|filesizeformat}})</small></h4> <h4>{{result.name|truncatechars:24}} <small>({{result.size|filesizeformat}})</small></h4>
<!-- <p class="card-text" ><code>./{{result.path|truncatechars:30}}</code></p> --> <!-- <p class="card-text" ><code>./{{result.path|truncatechars:30}}</code></p> -->
</a> </a>
<!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>--> <!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
</div> </div>
<iframe class="card-img-top" src="{{result.path}}" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe> <iframe class="card-img-top" src="{{result.path|urlencode}}?autoplay=0" allow="autoplay 'none'; fullscreen 'none'; navigation-override 'none'; " sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" scrolling="no" loading="lazy"></iframe>
</div> </div>
</div> </div>
{% endfor %} {% endfor %}
@ -419,7 +419,7 @@
<a href="./" target="preview"> <a href="./" target="preview">
<h4>Headers, JSON, etc.</h4> <h4>Headers, JSON, etc.</h4>
</a> </a>
<!--<a href="{{result.path}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>--> <!--<a href="{{result.path|urlencode}}" target="preview"><h4 class="card-title">{{result.name}}</h4></a>-->
</div> </div>
<iframe class="card-img-top" src="./" sandbox="" scrolling="no" loading="lazy"></iframe> <iframe class="card-img-top" src="./" sandbox="" scrolling="no" loading="lazy"></iframe>
</div> </div>
@ -430,7 +430,7 @@
<iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path}}" name="preview"></iframe> <iframe id="main-frame" sandbox="allow-same-origin allow-top-navigation-by-user-activation allow-scripts allow-forms" class="full-page-iframe" src="{{best_result.path|urlencode}}" name="preview"></iframe>
@ -444,9 +444,9 @@
this.src = this.src + '#toolbar=0' this.src = this.src + '#toolbar=0'
} }
this.onload = function() { this.onload = function() {
if (this.src.endsWith('.pdf')) { if (this.src.includes('.pdf')) {
this.removeAttribute('sandbox') this.removeAttribute('sandbox')
this.src = this.src + '#toolbar=0' this.src = this.src.split('?autoplay=')[0] + '#toolbar=0'
} }
try { try {
// doesnt work if frame origin rules prevent accessing its DOM via JS // doesnt work if frame origin rules prevent accessing its DOM via JS