Merge pull request #104 from nodh/fix_issue_103

Enable importing files from wallabag
This commit is contained in:
Nick Sweeting 2018-10-08 12:52:16 -04:00 committed by GitHub
commit b882db5828
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,3 +1,5 @@
# coding: utf-8
""" """
Everything related to parsing links from bookmark services. Everything related to parsing links from bookmark services.
@ -84,8 +86,7 @@ def parse_pocket_export(html_file):
yield info yield info
def parse_json_export(json_file): def parse_json_export(json_file):
"""Parse JSON-format bookmarks export files (produced by pinboard.in/export/)""" """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)"""
json_file.seek(0) json_file.seek(0)
json_content = json.load(json_file) json_content = json.load(json_file)
for line in json_content: for line in json_content:
@ -97,15 +98,25 @@ def parse_json_export(json_file):
timestamp = str(erg['timestamp']/10000000) # chrome/ff histories use a very precise timestamp timestamp = str(erg['timestamp']/10000000) # chrome/ff histories use a very precise timestamp
elif erg.get('time'): elif erg.get('time'):
timestamp = str(datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ').timestamp()) timestamp = str(datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ').timestamp())
elif erg.get('created_at'):
timestamp = str(datetime.strptime(erg['created_at'], '%Y-%m-%dT%H:%M:%S%z').timestamp())
else: else:
timestamp = str(datetime.now().timestamp()) timestamp = str(datetime.now().timestamp())
if erg.get('href'):
url = erg['href']
else:
url = erg['url']
if erg.get('description'):
title = (erg.get('description') or '').replace(' — Readability', '')
else:
title = erg['title']
info = { info = {
'url': erg['href'], 'url': url,
'domain': domain(erg['href']), 'domain': domain(url),
'base_url': base_url(erg['href']), 'base_url': base_url(url),
'timestamp': timestamp, 'timestamp': timestamp,
'tags': erg.get('tags') or '', 'tags': erg.get('tags') or '',
'title': (erg.get('description') or '').replace(' — Readability', ''), 'title': title,
'sources': [json_file.name], 'sources': [json_file.name],
} }
info['type'] = get_link_type(info) info['type'] = get_link_type(info)