diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py
index c033ab28..99d11a1b 100644
--- a/archivebox/parsers/__init__.py
+++ b/archivebox/parsers/__init__.py
@@ -233,6 +233,10 @@ _test_url_strs = {
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
'http://example7.com': 1,
+ 'https://': 0,
+ 'https://[test]': 0,
+ 'http://"test"': 0,
+ 'http://\'test\'': 0,
'[https://example8.com/what/is/this.php?what=1]': 1,
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
'https://example10.com#and-thing=2 "': 1,
diff --git a/archivebox/util.py b/archivebox/util.py
index daf3025e..cfa7d931 100644
--- a/archivebox/util.py
+++ b/archivebox/util.py
@@ -59,7 +59,7 @@ URL_REGEX = re.compile(
r'(?=('
r'http[s]?://' # start matching from allowed schemes
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
- r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
+ r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
r'))',