From b9e17fa0d11ed1fc6d7c211e59a8b9db187a12fe Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 25 Jun 2020 23:32:01 -0400 Subject: [PATCH] rename archivebox-info to archivebox-status --- ...rchivebox_info.py => archivebox_status.py} | 10 +-- archivebox/config/__init__.py | 8 +-- archivebox/core/models.py | 18 +++-- archivebox/index/schema.py | 9 +++ archivebox/main.py | 68 ++++++++++++------- 5 files changed, 76 insertions(+), 37 deletions(-) rename archivebox/cli/{archivebox_info.py => archivebox_status.py} (77%) diff --git a/archivebox/cli/archivebox_info.py b/archivebox/cli/archivebox_status.py similarity index 77% rename from archivebox/cli/archivebox_info.py rename to archivebox/cli/archivebox_status.py index 814690b8..ac9b56d8 100644 --- a/archivebox/cli/archivebox_info.py +++ b/archivebox/cli/archivebox_status.py @@ -1,30 +1,30 @@ #!/usr/bin/env python3 __package__ = 'archivebox.cli' -__command__ = 'archivebox info' +__command__ = 'archivebox status' import sys import argparse from typing import Optional, List, IO -from ..main import info, docstring +from ..main import status, docstring from ..config import OUTPUT_DIR from .logging import SmartFormatter, reject_stdin -@docstring(info.__doc__) +@docstring(status.__doc__) def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: parser = argparse.ArgumentParser( prog=__command__, - description=info.__doc__, + description=status.__doc__, add_help=True, formatter_class=SmartFormatter, ) parser.parse_args(args or ()) reject_stdin(__command__, stdin) - info(out_dir=pwd or OUTPUT_DIR) + status(out_dir=pwd or OUTPUT_DIR) if __name__ == '__main__': diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index df2b01c8..f357e9d2 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -785,14 +785,14 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) -> json_index_exists = os.path.exists(os.path.join(output_dir, JSON_INDEX_FILENAME)) if not json_index_exists: - stderr('[X] No archive main index was found in current directory.', color='red') - stderr(f' {output_dir}') + stderr('[X] No archivebox index found in the current directory.', color='red') + stderr(f' {output_dir}', color='lightyellow') stderr() - stderr(' Are you running archivebox in the right folder?') + stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI'])) stderr(' cd path/to/your/archive/folder') stderr(' archivebox [command]') stderr() - stderr(' To create a new archive collection or import existing data in this folder, run:') + stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI'])) stderr(' archivebox init') raise SystemExit(2) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index f343fcbc..2c0c9e37 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -12,13 +12,13 @@ class Snapshot(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) url = models.URLField(unique=True) - timestamp = models.CharField(max_length=32, null=True, default=None) + timestamp = models.CharField(max_length=32, null=True, default=None, db_index=True) - title = models.CharField(max_length=128, null=True, default=None) - tags = models.CharField(max_length=256, null=True, default=None) + title = models.CharField(max_length=128, null=True, default=None, db_index=True) + tags = models.CharField(max_length=256, null=True, default=None, db_index=True) - added = models.DateTimeField(auto_now_add=True) - updated = models.DateTimeField(null=True, default=None) + added = models.DateTimeField(auto_now_add=True, db_index=True) + updated = models.DateTimeField(null=True, default=None, db_index=True) # bookmarked = models.DateTimeField() keys = ('url', 'timestamp', 'title', 'tags', 'updated') @@ -68,3 +68,11 @@ class Snapshot(models.Model): @property def link_dir(self): return self.as_link().link_dir + + @property + def archive_path(self): + return self.as_link().archive_path + + @property + def archive_size(self): + return self.as_link().archive_size diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index a8f50373..637e0589 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -9,6 +9,8 @@ from typing import List, Dict, Any, Optional, Union from dataclasses import dataclass, asdict, field, fields +from ..system import get_dir_size + class ArchiveError(Exception): def __init__(self, message, hints=None): super().__init__(message) @@ -227,6 +229,13 @@ class Link: from ..config import ARCHIVE_DIR_NAME return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp) + @property + def archive_size(self) -> float: + try: + return get_dir_size(self.archive_path)[0] + except Exception: + return 0 + ### URL Helpers @property def url_hash(self): diff --git a/archivebox/main.py b/archivebox/main.py index 68e7e8ba..7b03e5b0 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -89,6 +89,7 @@ from .config import ( get_real_name, ) from .cli.logging import ( + TERM_WIDTH, TimedProgress, log_archiving_started, log_archiving_paused, @@ -161,7 +162,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None: {lightred}Example Use:{reset} mkdir my-archive; cd my-archive/ archivebox init - archivebox info + archivebox status archivebox add https://example.com/some/page archivebox add --depth=1 ~/Downloads/bookmarks_export.html @@ -364,7 +365,7 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None: print(' X ' + '\n X '.join(f'{folder} {link}' for folder, link in invalid_folders.items())) print() print(' {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI)) - print(' archivebox info') + print(' archivebox status') print(' archivebox list --status=invalid') @@ -387,16 +388,20 @@ def init(force: bool=False, out_dir: str=OUTPUT_DIR) -> None: @enforce_types -def info(out_dir: str=OUTPUT_DIR) -> None: +def status(out_dir: str=OUTPUT_DIR) -> None: """Print out some info and statistics about the archive collection""" check_data_folder(out_dir=out_dir) - print('{green}[*] Scanning archive collection main index...{reset}'.format(**ANSI)) - print(f' {out_dir}/*') + from core.models import Snapshot + from django.contrib.auth import get_user_model + User = get_user_model() + + print('{green}[*] Scanning archive main index...{reset}'.format(**ANSI)) + print(ANSI['lightyellow'], f' {out_dir}/*', ANSI['reset']) num_bytes, num_dirs, num_files = get_dir_size(out_dir, recursive=False, pattern='index.') size = printable_filesize(num_bytes) - print(f' Size: {size} across {num_files} files') + print(f' Index size: {size} across {num_files} files') print() links = list(load_main_index(out_dir=out_dir)) @@ -404,33 +409,23 @@ def info(out_dir: str=OUTPUT_DIR) -> None: num_sql_links = sum(1 for link in parse_sql_main_index(out_dir=out_dir)) num_html_links = sum(1 for url in parse_html_main_index(out_dir=out_dir)) num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir)) - users = get_admins().values_list('username', flat=True) print(f' > JSON Main Index: {num_json_links} links'.ljust(36), f'(found in {JSON_INDEX_FILENAME})') print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') print(f' > HTML Main Index: {num_html_links} links'.ljust(36), f'(found in {HTML_INDEX_FILENAME})') print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)') - print(f' > Admin: {len(users)} users {", ".join(users)}'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') - if num_html_links != len(links) or num_sql_links != len(links): print() print(' {lightred}Hint:{reset} You can fix index count differences automatically by running:'.format(**ANSI)) print(' archivebox init') - if not users: - print() - print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI)) - print(' archivebox manage createsuperuser') - print() - print('{green}[*] Scanning archive collection link data directories...{reset}'.format(**ANSI)) - print(f' {ARCHIVE_DIR}/*') - + print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI)) + print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset']) num_bytes, num_dirs, num_files = get_dir_size(ARCHIVE_DIR) size = printable_filesize(num_bytes) print(f' Size: {size} across {num_files} files in {num_dirs} directories') - print() - + print(ANSI['black']) num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) num_archived = len(get_archived_folders(links, out_dir=out_dir)) num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) @@ -454,23 +449,50 @@ def info(out_dir: str=OUTPUT_DIR) -> None: print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})') print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})') print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})') - + + print(ANSI['reset']) + if num_indexed: - print() print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI)) print(' archivebox list --status= (e.g. indexed, corrupted, archived, etc.)') if orphaned: - print() print(' {lightred}Hint:{reset} To automatically import orphaned data directories into the main index, run:'.format(**ANSI)) print(' archivebox init') if num_invalid: - print() print(' {lightred}Hint:{reset} You may need to manually remove or fix some invalid data directories, afterwards make sure to run:'.format(**ANSI)) print(' archivebox init') print() + print('{green}[*] Scanning recent archive changes and user logins:{reset}'.format(**ANSI)) + print(ANSI['lightyellow'], f' {LOGS_DIR}/*', ANSI['reset']) + users = get_admins().values_list('username', flat=True) + print(f' UI users {len(users)}: {", ".join(users)}') + last_login = User.objects.order_by('last_login').last() + print(f' Last UI login: {last_login.username} @ {str(last_login.last_login)[:16]}') + last_updated = Snapshot.objects.order_by('updated').last() + print(f' Last changed: {str(last_updated.updated)[:16]}') + + if not users: + print() + print(' {lightred}Hint:{reset} You can create an admin user by running:'.format(**ANSI)) + print(' archivebox manage createsuperuser') + + print() + for snapshot in Snapshot.objects.order_by('-updated')[:10]: + if not snapshot.updated: + continue + print( + ANSI['black'], + ( + f' > {str(snapshot.updated)[:16]} ' + f'[{snapshot.num_outputs} {("X", "√")[snapshot.is_archived]} {printable_filesize(snapshot.archive_size)}] ' + f'"{snapshot.title}": {snapshot.url}' + )[:TERM_WIDTH()], + ANSI['reset'], + ) + print(ANSI['black'], ' ...', ANSI['reset']) @enforce_types