From f383648ffc80e64bfa399efc5e1b7766fe7de3dd Mon Sep 17 00:00:00 2001 From: JDC Date: Thu, 19 Nov 2020 17:33:53 -0500 Subject: [PATCH] Use a generator for snapshot flush from index --- archivebox/main.py | 2 +- archivebox/search/__init__.py | 7 +++---- archivebox/search/backends/sonic.py | 6 +++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/archivebox/main.py b/archivebox/main.py index 504cd670..7d13a5c4 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -666,7 +666,7 @@ def remove(filter_str: Optional[str]=None, to_remove = snapshots.count() remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) - flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)]) + flush_search_index(snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))) all_snapshots = load_main_index(out_dir=out_dir) log_removal_finished(all_snapshots.count(), to_remove) diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 93245bda..59bb6fe5 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -1,4 +1,4 @@ -from typing import List, Union +from typing import List, Union, Generator from pathlib import Path from importlib import import_module @@ -39,7 +39,7 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: backend.index(snapshot_id=str(snap.id), texts=texts) @enforce_types -def query_search_index(text: str) -> List: +def query_search_index(text: str) -> List[str]: if search_backend_enabled(): backend = import_backend() return backend.search(text) @@ -47,9 +47,8 @@ def query_search_index(text: str) -> List: return [] @enforce_types -def flush_search_index(snapshot_ids: List[str]): +def flush_search_index(snapshot_ids: Generator[str, None, None]): if not indexing_enabled() or not snapshot_ids: return backend = import_backend() backend.flush(snapshot_ids) - \ No newline at end of file diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index 8fd93ae8..7dc4d5b0 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Generator from sonic import IngestClient, SearchClient @@ -13,13 +13,13 @@ def index(snapshot_id: str, texts: List[str]): ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text)) @enforce_types -def search(text: str) -> List: +def search(text: str) -> List[str]: with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text) return snap_ids @enforce_types -def flush(snapshot_ids: List[str]): +def flush(snapshot_ids: Generator[str, None, None]): with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: for id in snapshot_ids: ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))