mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2024-09-19 23:49:07 -04:00
Merge pull request #911 from hannah98/dev
This commit is contained in:
commit
663918a372
3 changed files with 10 additions and 3 deletions
|
@ -79,6 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
||||||
'URL_WHITELIST': {'type': str, 'default': None},
|
'URL_WHITELIST': {'type': str, 'default': None},
|
||||||
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
|
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
|
||||||
|
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
|
||||||
},
|
},
|
||||||
|
|
||||||
'SERVER_CONFIG': {
|
'SERVER_CONFIG': {
|
||||||
|
|
|
@ -98,6 +98,7 @@ class ConfigDict(BaseConfig, total=False):
|
||||||
WGET_ARGS: List[str]
|
WGET_ARGS: List[str]
|
||||||
CURL_ARGS: List[str]
|
CURL_ARGS: List[str]
|
||||||
GIT_ARGS: List[str]
|
GIT_ARGS: List[str]
|
||||||
|
TAG_SEPARATOR_PATTERN: str
|
||||||
|
|
||||||
|
|
||||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
__package__ = 'archivebox.index'
|
__package__ = 'archivebox.index'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple, Iterator
|
from typing import List, Tuple, Iterator
|
||||||
|
@ -8,7 +10,10 @@ from django.db import transaction
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
from ..util import enforce_types, parse_date
|
from ..util import enforce_types, parse_date
|
||||||
from ..config import OUTPUT_DIR
|
from ..config import (
|
||||||
|
OUTPUT_DIR,
|
||||||
|
TAG_SEPARATOR_PATTERN,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
### Main Links Index
|
### Main Links Index
|
||||||
|
@ -35,7 +40,7 @@ def write_link_to_sql_index(link: Link):
|
||||||
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
|
||||||
|
|
||||||
tag_list = list(dict.fromkeys(
|
tag_list = list(dict.fromkeys(
|
||||||
tag.strip() for tag in (link.tags or '').split(',')
|
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||||
))
|
))
|
||||||
info.pop('tags')
|
info.pop('tags')
|
||||||
|
|
||||||
|
@ -107,7 +112,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
snap.title = link.title
|
snap.title = link.title
|
||||||
|
|
||||||
tag_list = list(dict.fromkeys(
|
tag_list = list(dict.fromkeys(
|
||||||
tag.strip() for tag in (link.tags or '').split(',')
|
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
|
||||||
))
|
))
|
||||||
|
|
||||||
snap.save()
|
snap.save()
|
||||||
|
|
Loading…
Reference in a new issue