run all blocking commands in background threads and show nice UI messages as confirmation

This commit is contained in:
Nick Sweeting 2024-09-06 02:54:22 -07:00
parent b56b1cac35
commit 52386d9c16
No known key found for this signature in database
4 changed files with 79 additions and 85 deletions

View file

@ -2,19 +2,15 @@ __package__ = 'archivebox.core'
import os import os
from io import StringIO import threading
from pathlib import Path from pathlib import Path
from contextlib import redirect_stdout
from datetime import datetime, timezone
from typing import Dict, Any
from django.contrib import admin from django.contrib import admin, messages
from django.urls import path, reverse, resolve from django.urls import path, reverse, resolve
from django.utils import timezone from django.utils import timezone
from django.utils.functional import cached_property from django.utils.functional import cached_property
from django.utils.html import format_html from django.utils.html import format_html
from django.utils.safestring import mark_safe from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.auth.admin import UserAdmin from django.contrib.auth.admin import UserAdmin
from django.core.paginator import Paginator from django.core.paginator import Paginator
@ -28,10 +24,9 @@ from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model from signal_webhooks.utils import get_webhook_model
# from plugantic.admin import CustomPlugin # from plugantic.admin import CustomPlugin
from ..util import htmldecode, urldecode, ansi_to_html from ..util import htmldecode, urldecode
from core.models import Snapshot, ArchiveResult, Tag from core.models import Snapshot, ArchiveResult, Tag
from core.forms import AddLinkForm
from core.mixins import SearchResultsAdminMixin from core.mixins import SearchResultsAdminMixin
from api.models import APIToken from api.models import APIToken
from abid_utils.admin import ABIDModelAdmin from abid_utils.admin import ABIDModelAdmin
@ -65,50 +60,6 @@ class ArchiveBoxAdmin(admin.AdminSite):
site_title = 'Index' site_title = 'Index'
namespace = 'admin' namespace = 'admin'
def get_urls(self):
return [
path('core/snapshot/add/', self.add_view, name='Add'),
] + super().get_urls()
def add_view(self, request):
if not request.user.is_authenticated:
return redirect(f'/admin/login/?next={request.path}')
request.current_app = self.name
context: Dict[str, Any] = {
**self.each_context(request),
'title': 'Add URLs',
}
if request.method == 'GET':
context['form'] = AddLinkForm()
elif request.method == 'POST':
form = AddLinkForm(request.POST)
if form.is_valid():
url = form.cleaned_data["url"]
print(f'[+] Adding URL: {url}')
depth = 0 if form.cleaned_data["depth"] == "0" else 1
input_kwargs = {
"urls": url,
"depth": depth,
"update_all": False,
"out_dir": CONFIG.OUTPUT_DIR,
}
add_stdout = StringIO()
with redirect_stdout(add_stdout):
add(**input_kwargs)
print(add_stdout.getvalue())
context.update({
"stdout": ansi_to_html(add_stdout.getvalue().strip()),
"form": AddLinkForm(),
})
else:
context["form"] = form
return render(template_name='add.html', request=request, context=context)
class CustomUserAdmin(UserAdmin): class CustomUserAdmin(UserAdmin):
sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined'] sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined']
@ -558,19 +509,37 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
description=" Get Title" description=" Get Title"
) )
def update_titles(self, request, queryset): def update_titles(self, request, queryset):
archive_links([ links = [snapshot.as_link() for snapshot in queryset]
snapshot.as_link() if len(links) < 3:
for snapshot in queryset # run syncronously if there are only 1 or 2 links
], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR) archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR)
messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
else:
# otherwise run in a bg thread
bg_thread = threading.Thread(
target=archive_links,
args=(links,),
kwargs={"overwrite": True, "methods": ['title', 'favicon'], "out_dir": CONFIG.OUTPUT_DIR},
)
bg_thread.setDaemon(True)
bg_thread.start()
messages.success(request, f"Title and favicon are updating in the background for {len(links)} URLs. (refresh in a few minutes to see results)")
@admin.action( @admin.action(
description="⬇️ Get Missing" description="⬇️ Get Missing"
) )
def update_snapshots(self, request, queryset): def update_snapshots(self, request, queryset):
archive_links([ links = [snapshot.as_link() for snapshot in queryset]
snapshot.as_link() bg_thread = threading.Thread(
for snapshot in queryset target=archive_links,
], out_dir=CONFIG.OUTPUT_DIR) args=(links,),
kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR},
)
bg_thread.setDaemon(True)
bg_thread.start()
messages.success(
request, f"Re-trying any previously failed methods for {len(links)} URLs in the background. (refresh in a few minutes to see results)"
)
@admin.action( @admin.action(
@ -578,24 +547,44 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
) )
def resnapshot_snapshot(self, request, queryset): def resnapshot_snapshot(self, request, queryset):
for snapshot in queryset: for snapshot in queryset:
timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds') timestamp = timezone.now().isoformat('T', 'seconds')
new_url = snapshot.url.split('#')[0] + f'#{timestamp}' new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
add(new_url, tag=snapshot.tags_str())
bg_thread = threading.Thread(target=add, args=(new_url,), kwargs={'tag': snapshot.tags_str()})
bg_thread.setDaemon(True)
bg_thread.start()
messages.success(
request,
f"Creating new fresh snapshots for {len(queryset.count())} URLs in the background. (refresh in a few minutes to see results)",
)
@admin.action( @admin.action(
description="♲ Redo" description="♲ Redo"
) )
def overwrite_snapshots(self, request, queryset): def overwrite_snapshots(self, request, queryset):
archive_links([ links = [snapshot.as_link() for snapshot in queryset]
snapshot.as_link() bg_thread = threading.Thread(
for snapshot in queryset target=archive_links,
], overwrite=True, out_dir=CONFIG.OUTPUT_DIR) args=(links,),
kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR},
)
bg_thread.setDaemon(True)
bg_thread.start()
messages.success(
request,
f"Clearing all previous results and re-downloading {len(links)} URLs in the background. (refresh in a few minutes to see results)",
)
@admin.action( @admin.action(
description="☠️ Delete" description="☠️ Delete"
) )
def delete_snapshots(self, request, queryset): def delete_snapshots(self, request, queryset):
remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR) remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR)
messages.success(
request,
f"Succesfully deleted {len(queryset.count())} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed.",
)
@admin.action( @admin.action(
@ -606,6 +595,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
print('[+] Adding tags', tags, 'to Snapshots', queryset) print('[+] Adding tags', tags, 'to Snapshots', queryset)
for obj in queryset: for obj in queryset:
obj.tags.add(*tags) obj.tags.add(*tags)
messages.success(
request,
f"Added {len(tags)} tags to {len(queryset.count())} Snapshots.",
)
@admin.action( @admin.action(
@ -616,10 +609,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
print('[-] Removing tags', tags, 'to Snapshots', queryset) print('[-] Removing tags', tags, 'to Snapshots', queryset)
for obj in queryset: for obj in queryset:
obj.tags.remove(*tags) obj.tags.remove(*tags)
messages.success(
request,
f"Removed {len(tags)} tags from {len(queryset.count())} Snapshots.",
)
# @admin.register(SnapshotTag, site=archivebox_admin) # @admin.register(SnapshotTag, site=archivebox_admin)

View file

@ -2,17 +2,17 @@ __package__ = 'archivebox.core'
from typing import Callable from typing import Callable
from io import StringIO import threading
from pathlib import Path from pathlib import Path
from contextlib import redirect_stdout
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
from django.http import HttpRequest, HttpResponse, Http404 from django.http import HttpRequest, HttpResponse, Http404
from django.utils.html import format_html, mark_safe from django.utils.html import format_html, mark_safe
from django.views import View, static from django.views import View
from django.views.generic.list import ListView from django.views.generic.list import ListView
from django.views.generic import FormView from django.views.generic import FormView
from django.db.models import Q from django.db.models import Q
from django.contrib import messages
from django.contrib.auth.mixins import UserPassesTestMixin from django.contrib.auth.mixins import UserPassesTestMixin
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator from django.utils.decorators import method_decorator
@ -477,18 +477,19 @@ class AddView(UserPassesTestMixin, FormView):
} }
if extractors: if extractors:
input_kwargs.update({"extractors": extractors}) input_kwargs.update({"extractors": extractors})
add_stdout = StringIO()
with redirect_stdout(add_stdout):
add(**input_kwargs)
print(add_stdout.getvalue())
context = self.get_context_data() bg_thread = threading.Thread(target=add, kwargs=input_kwargs)
bg_thread.setDaemon(True)
bg_thread.start()
context.update({ rough_url_count = url.count('://')
"stdout": ansi_to_html(add_stdout.getvalue().strip()),
"form": AddLinkForm() messages.success(
}) self.request,
return render(template_name=self.template_name, request=self.request, context=context) f"Adding {rough_url_count} URLs in the background. (refresh in a few minutes to see results)",
)
return redirect("/admin/core/snapshot/")
class HealthCheckView(View): class HealthCheckView(View):

View file

@ -178,7 +178,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
ts ts
) + "\n" + str(e) + "\n")) ) + "\n" + str(e) + "\n"))
#f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
# print(f' ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command) # print(f' ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command)
raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format( raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
method_name, method_name,

View file

@ -1,7 +1,7 @@
{% load i18n static %} {% load i18n static %}
<div id="user-tools"> <div id="user-tools">
<a href="{% url 'admin:Add' %}">Add </a> &nbsp; &nbsp; <a href="{% url 'add' %}">Add </a> &nbsp; &nbsp;
<a href="{% url 'Home' %}">Snapshots</a> | <a href="{% url 'Home' %}">Snapshots</a> |
<a href="/admin/core/tag/">Tags</a> | <a href="/admin/core/tag/">Tags</a> |
<a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp; <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;