diff --git a/setup.py b/setup.py index 049528fb..12002580 100755 --- a/setup.py +++ b/setup.py @@ -65,6 +65,9 @@ setuptools.setup( "sphinx-rtd-theme", "recommonmark", ], + "test": [ + "pytest" + ] # 'redis': ['redis', 'django-redis'], # 'pywb': ['pywb', 'redis'], }, diff --git a/tests/test_init.py b/tests/test_init.py new file mode 100644 index 00000000..b870a599 --- /dev/null +++ b/tests/test_init.py @@ -0,0 +1,40 @@ +# archivebox init +# archivebox add + +import os +import subprocess +from pathlib import Path +import json + +import pytest + +@pytest.fixture +def process(tmp_path): + os.chdir(tmp_path) + process = subprocess.run(['archivebox', 'init'], capture_output=True) + return process + + +def test_init(tmp_path, process): + assert "Initializing a new ArchiveBox collection in this folder..." in process.stdout.decode("utf-8") + +def test_update(tmp_path, process): + os.chdir(tmp_path) + update_process = subprocess.run(['archivebox', 'init'], capture_output=True) + assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8") + +def test_add_link(tmp_path, process): + os.chdir(tmp_path) + add_process = subprocess.run(['archivebox', 'add', 'http://example.com'], capture_output=True) + archived_item_path = list(tmp_path.glob('archive/**/*'))[0] + + assert "index.json" in [x.name for x in archived_item_path.iterdir()] + + with open(archived_item_path / "index.json", "r") as f: + output_json = json.load(f) + assert "IANA — IANA-managed Reserved Domains" == output_json['history']['title'][0]['output'] + + with open(tmp_path / "index.html", "r") as f: + output_html = f.read() + assert "IANA — IANA-managed Reserved Domains" in output_html + diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 00000000..19ed31c0 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,21 @@ +#@enforce_types +#def download_url(url: str, timeout: int=None) -> str: +# """Download the contents of a remote url and return the text""" +# from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT +# timeout = timeout or TIMEOUT +# response = requests.get( +# url, +# headers={'User-Agent': WGET_USER_AGENT}, +# verify=CHECK_SSL_VALIDITY, +# timeout=timeout, +# ) +# if response.headers.get('Content-Type') == 'application/rss+xml': +# # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py +# _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?''' +# _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P[\w-]+)') +# _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE) +# _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE) +# match = _BODY_ENCODING_STR_RE.search(response.text[:1024]) +# if match: +# response.encoding = match.group('xmlcharset') +# return response.text \ No newline at end of file