diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 208d7e61..df625e89 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -2,19 +2,15 @@ __package__ = 'archivebox.core' import os -from io import StringIO +import threading from pathlib import Path -from contextlib import redirect_stdout -from datetime import datetime, timezone -from typing import Dict, Any -from django.contrib import admin +from django.contrib import admin, messages from django.urls import path, reverse, resolve from django.utils import timezone from django.utils.functional import cached_property from django.utils.html import format_html from django.utils.safestring import mark_safe -from django.shortcuts import render, redirect from django.contrib.auth import get_user_model from django.contrib.auth.admin import UserAdmin from django.core.paginator import Paginator @@ -28,10 +24,9 @@ from signal_webhooks.admin import WebhookAdmin from signal_webhooks.utils import get_webhook_model # from plugantic.admin import CustomPlugin -from ..util import htmldecode, urldecode, ansi_to_html +from ..util import htmldecode, urldecode from core.models import Snapshot, ArchiveResult, Tag -from core.forms import AddLinkForm from core.mixins import SearchResultsAdminMixin from api.models import APIToken from abid_utils.admin import ABIDModelAdmin @@ -65,50 +60,6 @@ class ArchiveBoxAdmin(admin.AdminSite): site_title = 'Index' namespace = 'admin' - def get_urls(self): - return [ - path('core/snapshot/add/', self.add_view, name='Add'), - ] + super().get_urls() - - def add_view(self, request): - if not request.user.is_authenticated: - return redirect(f'/admin/login/?next={request.path}') - - request.current_app = self.name - context: Dict[str, Any] = { - **self.each_context(request), - 'title': 'Add URLs', - } - - if request.method == 'GET': - context['form'] = AddLinkForm() - - elif request.method == 'POST': - form = AddLinkForm(request.POST) - if form.is_valid(): - url = form.cleaned_data["url"] - print(f'[+] Adding URL: {url}') - depth = 0 if form.cleaned_data["depth"] == "0" else 1 - input_kwargs = { - "urls": url, - "depth": depth, - "update_all": False, - "out_dir": CONFIG.OUTPUT_DIR, - } - add_stdout = StringIO() - with redirect_stdout(add_stdout): - add(**input_kwargs) - print(add_stdout.getvalue()) - - context.update({ - "stdout": ansi_to_html(add_stdout.getvalue().strip()), - "form": AddLinkForm(), - }) - else: - context["form"] = form - - return render(template_name='add.html', request=request, context=context) - class CustomUserAdmin(UserAdmin): sort_fields = ['id', 'email', 'username', 'is_superuser', 'last_login', 'date_joined'] @@ -558,19 +509,37 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): description="ℹ️ Get Title" ) def update_titles(self, request, queryset): - archive_links([ - snapshot.as_link() - for snapshot in queryset - ], overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR) + links = [snapshot.as_link() for snapshot in queryset] + if len(links) < 3: + # run syncronously if there are only 1 or 2 links + archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR) + messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.") + else: + # otherwise run in a bg thread + bg_thread = threading.Thread( + target=archive_links, + args=(links,), + kwargs={"overwrite": True, "methods": ['title', 'favicon'], "out_dir": CONFIG.OUTPUT_DIR}, + ) + bg_thread.setDaemon(True) + bg_thread.start() + messages.success(request, f"Title and favicon are updating in the background for {len(links)} URLs. (refresh in a few minutes to see results)") @admin.action( description="⬇️ Get Missing" ) def update_snapshots(self, request, queryset): - archive_links([ - snapshot.as_link() - for snapshot in queryset - ], out_dir=CONFIG.OUTPUT_DIR) + links = [snapshot.as_link() for snapshot in queryset] + bg_thread = threading.Thread( + target=archive_links, + args=(links,), + kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR}, + ) + bg_thread.setDaemon(True) + bg_thread.start() + messages.success( + request, f"Re-trying any previously failed methods for {len(links)} URLs in the background. (refresh in a few minutes to see results)" + ) @admin.action( @@ -578,24 +547,44 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): ) def resnapshot_snapshot(self, request, queryset): for snapshot in queryset: - timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds') + timestamp = timezone.now().isoformat('T', 'seconds') new_url = snapshot.url.split('#')[0] + f'#{timestamp}' - add(new_url, tag=snapshot.tags_str()) + + bg_thread = threading.Thread(target=add, args=(new_url,), kwargs={'tag': snapshot.tags_str()}) + bg_thread.setDaemon(True) + bg_thread.start() + + messages.success( + request, + f"Creating new fresh snapshots for {len(queryset.count())} URLs in the background. (refresh in a few minutes to see results)", + ) @admin.action( description="♲ Redo" ) def overwrite_snapshots(self, request, queryset): - archive_links([ - snapshot.as_link() - for snapshot in queryset - ], overwrite=True, out_dir=CONFIG.OUTPUT_DIR) + links = [snapshot.as_link() for snapshot in queryset] + bg_thread = threading.Thread( + target=archive_links, + args=(links,), + kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR}, + ) + bg_thread.setDaemon(True) + bg_thread.start() + messages.success( + request, + f"Clearing all previous results and re-downloading {len(links)} URLs in the background. (refresh in a few minutes to see results)", + ) @admin.action( description="☠️ Delete" ) def delete_snapshots(self, request, queryset): remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR) + messages.success( + request, + f"Succesfully deleted {len(queryset.count())} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed.", + ) @admin.action( @@ -606,6 +595,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): print('[+] Adding tags', tags, 'to Snapshots', queryset) for obj in queryset: obj.tags.add(*tags) + messages.success( + request, + f"Added {len(tags)} tags to {len(queryset.count())} Snapshots.", + ) @admin.action( @@ -616,10 +609,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): print('[-] Removing tags', tags, 'to Snapshots', queryset) for obj in queryset: obj.tags.remove(*tags) - - - - + messages.success( + request, + f"Removed {len(tags)} tags from {len(queryset.count())} Snapshots.", + ) # @admin.register(SnapshotTag, site=archivebox_admin) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index ec084e99..260a6f70 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -2,17 +2,17 @@ __package__ = 'archivebox.core' from typing import Callable -from io import StringIO +import threading from pathlib import Path -from contextlib import redirect_stdout from django.shortcuts import render, redirect from django.http import HttpRequest, HttpResponse, Http404 from django.utils.html import format_html, mark_safe -from django.views import View, static +from django.views import View from django.views.generic.list import ListView from django.views.generic import FormView from django.db.models import Q +from django.contrib import messages from django.contrib.auth.mixins import UserPassesTestMixin from django.views.decorators.csrf import csrf_exempt from django.utils.decorators import method_decorator @@ -477,18 +477,19 @@ class AddView(UserPassesTestMixin, FormView): } if extractors: input_kwargs.update({"extractors": extractors}) - add_stdout = StringIO() - with redirect_stdout(add_stdout): - add(**input_kwargs) - print(add_stdout.getvalue()) - context = self.get_context_data() + bg_thread = threading.Thread(target=add, kwargs=input_kwargs) + bg_thread.setDaemon(True) + bg_thread.start() - context.update({ - "stdout": ansi_to_html(add_stdout.getvalue().strip()), - "form": AddLinkForm() - }) - return render(template_name=self.template_name, request=self.request, context=context) + rough_url_count = url.count('://') + + messages.success( + self.request, + f"Adding {rough_url_count} URLs in the background. (refresh in a few minutes to see results)", + ) + + return redirect("/admin/core/snapshot/") class HealthCheckView(View): diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 1432b271..c373dbdf 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -178,7 +178,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s ts ) + "\n" + str(e) + "\n")) #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") - + # print(f' ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command) raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format( method_name, diff --git a/archivebox/templates/core/navigation.html b/archivebox/templates/core/navigation.html index 7dea3d96..e909c362 100644 --- a/archivebox/templates/core/navigation.html +++ b/archivebox/templates/core/navigation.html @@ -1,7 +1,7 @@ {% load i18n static %}
- Add ➕     + Add ➕     Snapshots | Tags | Log