From 96b1e4a8ec1eb64c979c185b912ef6d60b25074f Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@sweeting.me>
Date: Mon, 13 Jul 2020 11:22:58 -0400
Subject: [PATCH] accept local paths as valid link URLs when parsing

---
 archivebox/parsers/generic_txt.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py
index cc3653a0..61d1973f 100644
--- a/archivebox/parsers/generic_txt.py
+++ b/archivebox/parsers/generic_txt.py
@@ -5,6 +5,7 @@ import re
 
 from typing import IO, Iterable
 from datetime import datetime
+from pathlib import Path
 
 from ..index.schema import Link
 from ..util import (
@@ -13,14 +14,28 @@ from ..util import (
     URL_REGEX
 )
 
+
 @enforce_types
 def parse_generic_txt_export(text_file: IO[str]) -> Iterable[Link]:
     """Parse raw links from each line in a text file"""
 
     text_file.seek(0)
     for line in text_file.readlines():
-        urls = re.findall(URL_REGEX, line) if line.strip() else ()
-        for url in urls:                                                # type: ignore
+        if not line.strip():
+            continue
+
+        # if the line is a local file path that resolves, then we can archive it
+        if Path(line).exists():
+            yield Link(
+                url=line,
+                timestamp=str(datetime.now().timestamp()),
+                title=None,
+                tags=None,
+                sources=[text_file.name],
+            )
+
+        # otherwise look for anything that looks like a URL in the line
+        for url in re.findall(URL_REGEX, line):
             yield Link(
                 url=htmldecode(url),
                 timestamp=str(datetime.now().timestamp()),