feat: Add support for singlefile in docker

This commit is contained in:
Cristian 2020-08-03 13:19:47 -05:00
parent 5b6eb5e4ad
commit 06d0e9de6c
3 changed files with 35 additions and 16 deletions

View file

@ -10,8 +10,8 @@
FROM python:3.8-slim-buster
LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
description="All-in-one personal internet archiving container"
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
description="All-in-one personal internet archiving container"
ENV TZ=UTC \
LANGUAGE=en_US:en \
@ -22,28 +22,41 @@ ENV TZ=UTC \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
CODE_PATH=/app \
VENV_PATH=/venv \
DATA_PATH=/data
DATA_PATH=/data \
EXTRA_PATH=/extra
# First install CLI utils and base deps, then Chrome + Fons
# First install CLI utils and base deps, then Chrome + Fons + nodejs
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
dumb-init jq git wget curl youtube-dl ffmpeg \
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
dumb-init jq git wget curl youtube-dl ffmpeg \
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
&& curl -sL https://deb.nodesource.com/setup_14.x | bash - \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
google-chrome-stable \
fontconfig \
fonts-ipafont-gothic \
fonts-wqy-zenhei \
fonts-thai-tlwg \
fonts-kacst \
fonts-symbola \
fonts-noto \
fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
google-chrome-stable \
fontconfig \
fonts-ipafont-gothic \
fonts-wqy-zenhei \
fonts-thai-tlwg \
fonts-kacst \
fonts-symbola \
fonts-noto \
fonts-freefont-ttf \
nodejs \
unzip \
&& rm -rf /var/lib/apt/lists/*
# Clone singlefile and move it to the /bin folder so archivebox can find it
WORKDIR "$EXTRA_PATH"
RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
&& unzip -q SingleFile.zip \
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
&& chmod +x SingleFile-master/cli/single-file \
&& ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file"
# Run everything from here on out as non-privileged user
RUN groupadd --system archivebox \

View file

@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
from pathlib import Path
from typing import Optional
import json
from ..index.schema import Link, ArchiveResult, ArchiveError
from ..system import run, chmod_file
from ..util import (
enforce_types,
chrome_args
)
from ..config import (
TIMEOUT,
@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
out_dir = out_dir or link.link_dir
output = str(Path(out_dir).absolute() / "singlefile.html")
browser_args = chrome_args(TIMEOUT=0)
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
cmd = [
SINGLEFILE_BINARY,
'--browser-executable-path={}'.format(CHROME_BINARY),
'--browser-args="{}"'.format(json.dumps(browser_args[1:])),
link.url,
output
]

View file

@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
@enforce_types
def printable_dependency_version(name: str, dependency: Dict) -> str:
version = None
if dependency['enabled']:
if dependency['is_valid']:
color, symbol, note, version = 'green', '', 'valid', ''