From 6a6ae7468e90c8b22d0dd1fcc4514f51501ed4d6 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 25 Apr 2024 21:36:11 -0700 Subject: [PATCH] fix lint errors --- archivebox/api/v1_auth.py | 1 - archivebox/api/v1_cli.py | 2 +- archivebox/api/v1_core.py | 2 +- archivebox/core/auth.py | 2 -- archivebox/core/auth_ldap.py | 2 -- archivebox/logging_util.py | 4 ++-- archivebox/parsers/__init__.py | 2 -- archivebox/parsers/generic_json.py | 18 +++++------------- archivebox/parsers/generic_jsonl.py | 2 -- archivebox/parsers/generic_txt.py | 2 -- archivebox/util.py | 12 ++++++------ bin/lint.sh | 2 +- 12 files changed, 16 insertions(+), 35 deletions(-) diff --git a/archivebox/api/v1_auth.py b/archivebox/api/v1_auth.py index 4a631137..4cc0f4fa 100644 --- a/archivebox/api/v1_auth.py +++ b/archivebox/api/v1_auth.py @@ -2,7 +2,6 @@ __package__ = 'archivebox.api' from typing import Optional -from django.contrib.auth import authenticate from ninja import Router, Schema from api.models import APIToken diff --git a/archivebox/api/v1_cli.py b/archivebox/api/v1_cli.py index 6e737464..adb3be86 100644 --- a/archivebox/api/v1_cli.py +++ b/archivebox/api/v1_cli.py @@ -157,7 +157,7 @@ def cli_update(request, args: UpdateCommandSchema): @router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]') -def cli_add(request, args: ScheduleCommandSchema): +def cli_schedule(request, args: ScheduleCommandSchema): result = schedule( import_path=args.import_path, add=args.add, diff --git a/archivebox/api/v1_core.py b/archivebox/api/v1_core.py index 452614f6..f6144ace 100644 --- a/archivebox/api/v1_core.py +++ b/archivebox/api/v1_core.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.api' from uuid import UUID -from typing import List, Optional, Union +from typing import List, Optional from datetime import datetime from django.shortcuts import get_object_or_404 diff --git a/archivebox/core/auth.py b/archivebox/core/auth.py index e5bf896d..048f029c 100644 --- a/archivebox/core/auth.py +++ b/archivebox/core/auth.py @@ -1,7 +1,5 @@ __package__ = 'archivebox.core' -import os -from django.conf import settings from ..config import ( LDAP diff --git a/archivebox/core/auth_ldap.py b/archivebox/core/auth_ldap.py index 9057683c..b5e2877e 100644 --- a/archivebox/core/auth_ldap.py +++ b/archivebox/core/auth_ldap.py @@ -1,10 +1,8 @@ -from django.conf import settings from ..config import ( LDAP_CREATE_SUPERUSER ) def create_user(sender, user=None, ldap_user=None, **kwargs): - if not user.id and LDAP_CREATE_SUPERUSER: user.is_superuser = True diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index a7ff95b7..de7c4474 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -494,12 +494,12 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool): if delete: file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()] print( - f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' + + f' {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' f' ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)' ) else: print( - ' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' + + ' Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' ' (Pass --delete if you also want to permanently delete the data folders)' ) diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 694ecc79..99cd690d 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -7,7 +7,6 @@ For examples of supported import formats see tests/. __package__ = 'archivebox.parsers' -import re from io import StringIO from typing import IO, Tuple, List, Optional @@ -28,7 +27,6 @@ from ..util import ( htmldecode, download_url, enforce_types, - find_all_urls, ) from ..index.schema import Link from ..logging_util import TimedProgress, log_source_saved diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 8b64f55e..082203fb 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -72,21 +72,13 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: json_file.seek(0) - try: - links = json.load(json_file) - if type(links) != list: - raise Exception('JSON parser expects list of objects, maybe this is JSONL?') - except json.decoder.JSONDecodeError: - # sometimes the first line is a comment or other junk, so try without - json_file.seek(0) - first_line = json_file.readline() - #print(' > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '') - links = json.load(json_file) - # we may fail again, which means we really don't know what to do - + links = json.load(json_file) + if type(links) != list: + raise Exception('JSON parser expects list of objects, maybe this is JSONL?') + for link in links: if link: - yield jsonObjectToLink(link,json_file.name) + yield jsonObjectToLink(link, json_file.name) KEY = 'json' NAME = 'Generic JSON' diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py index 8ee94b28..d7dceb63 100644 --- a/archivebox/parsers/generic_jsonl.py +++ b/archivebox/parsers/generic_jsonl.py @@ -3,11 +3,9 @@ __package__ = 'archivebox.parsers' import json from typing import IO, Iterable -from datetime import datetime, timezone from ..index.schema import Link from ..util import ( - htmldecode, enforce_types, ) diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py index 561514e0..6511f44f 100644 --- a/archivebox/parsers/generic_txt.py +++ b/archivebox/parsers/generic_txt.py @@ -1,8 +1,6 @@ __package__ = 'archivebox.parsers' __description__ = 'Plain Text' -import re - from typing import IO, Iterable from datetime import datetime, timezone from pathlib import Path diff --git a/archivebox/util.py b/archivebox/util.py index d1b4daf8..e8ed8517 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -62,12 +62,12 @@ COLOR_REGEX = re.compile(r'\[(?P\d+)(;(?P\d+)(;(?P\d+))?)?m # https://mathiasbynens.be/demo/url-regex URL_REGEX = re.compile( - r'(?=(' + - r'http[s]?://' + # start matching from allowed schemes - r'(?:[a-zA-Z]|[0-9]' + # followed by allowed alphanum characters - r'|[-_$@.&+!*\(\),]' + # or allowed symbols (keep hyphen first to match literal hyphen) - r'|[^\u0000-\u007F])+' + # or allowed unicode bytes - r'[^\]\[<>"\'\s]+' + # stop parsing at these symbols + r'(?=(' + r'http[s]?://' # start matching from allowed schemes + r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters + r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen) + r'|[^\u0000-\u007F])+' # or allowed unicode bytes + r'[^\]\[<>"\'\s]+' # stop parsing at these symbols r'))', re.IGNORECASE | re.UNICODE, ) diff --git a/bin/lint.sh b/bin/lint.sh index bd8beef8..6797b6d3 100755 --- a/bin/lint.sh +++ b/bin/lint.sh @@ -15,7 +15,7 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" source "$DIR/.venv/bin/activate" echo "[*] Running flake8..." -cd archivebox +cd "$DIR/archivebox" flake8 . && echo "√ No errors found." echo