add extractors arg to oneshot command and bump version to v0.5.1

This commit is contained in:
Nick Sweeting 2020-12-11 15:48:46 +02:00
parent a194bb6301
commit 9fa70b3452
4 changed files with 15 additions and 6 deletions

View file

@ -89,8 +89,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
index_only=command.index_only,
overwrite=command.overwrite,
init=command.init,
out_dir=pwd or OUTPUT_DIR,
extractors=command.extract,
out_dir=pwd or OUTPUT_DIR,
)

View file

@ -36,6 +36,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
' ~/Desktop/sites_list.csv\n'
)
)
parser.add_argument(
"--extract",
type=str,
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
This does not take precedence over the configuration",
default=""
)
parser.add_argument(
'--out-dir',
type=str,
@ -55,6 +62,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
oneshot(
url=stdin_url or url,
out_dir=Path(command.out_dir).resolve(),
extractors=command.extract,
)

View file

@ -511,7 +511,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
@enforce_types
def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
"""
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
@ -523,7 +523,8 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
color='red'
)
raise SystemExit(2)
methods = ignore_methods(['title'])
methods = extractors.split(",") if extractors else ignore_methods(['title'])
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
return oneshot_link
@ -534,8 +535,8 @@ def add(urls: Union[str, List[str]],
index_only: bool=False,
overwrite: bool=False,
init: bool=False,
out_dir: Path=OUTPUT_DIR,
extractors: str="") -> List[Link]:
extractors: str="",
out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive"""
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'

View file

@ -1,6 +1,6 @@
{
"name": "archivebox",
"version": "0.5.0",
"version": "0.5.1",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"license": "MIT",