Move some utils and constants to rip module

Signed-off-by: nathom <nathanthomas707@gmail.com>
This commit is contained in:
nathom 2021-07-03 15:49:55 -07:00
parent e2483ca90c
commit ec5afef1b3
5 changed files with 95 additions and 79 deletions

29
rip/constants.py Normal file
View file

@ -0,0 +1,29 @@
import click
import re
import os
from pathlib import Path
APPNAME = "streamrip"
APP_DIR = click.get_app_dir(APPNAME)
HOME = Path.home()
LOG_DIR = CACHE_DIR = CONFIG_DIR = APP_DIR
CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
DB_PATH = os.path.join(LOG_DIR, "downloads.db")
FAILED_DB_PATH = os.path.join(LOG_DIR, "failed_downloads.db")
DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
URL_REGEX = re.compile(
r"https?://(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:/"
r"(album|artist|track|playlist|video|label))|(?:\/[-\w]+?))+\/([-\w]+)"
)
SOUNDCLOUD_URL_REGEX = re.compile(r"https://soundcloud.com/[-\w:/]+")
SOUNDCLOUD_CLIENT_ID = re.compile("a3e059563d7fd3372b49b37f00a00bcf")
LASTFM_URL_REGEX = re.compile(r"https://www.last.fm/user/\w+/playlists/\w+")
QOBUZ_INTERPRETER_URL_REGEX = re.compile(
r"https?://www\.qobuz\.com/\w\w-\w\w/interpreter/[-\w]+/[-\w]+"
)
DEEZER_DYNAMIC_LINK_REGEX = re.compile(r"https://deezer\.page\.link/\w+")
YOUTUBE_URL_REGEX = re.compile(r"https://www\.youtube\.com/watch\?v=[-\w]+")

View file

@ -5,6 +5,7 @@ import html
import logging
import os
import re
from getpass import getpass
from hashlib import md5
from string import Formatter
@ -32,16 +33,17 @@ from streamrip.clients import (
TidalClient,
)
from .config import Config
from streamrip.constants import (
from streamrip.constants import MEDIA_TYPES
from .constants import (
URL_REGEX,
SOUNDCLOUD_URL_REGEX,
LASTFM_URL_REGEX,
QOBUZ_INTERPRETER_URL_REGEX,
YOUTUBE_URL_REGEX,
DEEZER_DYNAMIC_LINK_REGEX,
CONFIG_PATH,
DB_PATH,
DEEZER_DYNAMIC_LINK_REGEX,
LASTFM_URL_REGEX,
MEDIA_TYPES,
QOBUZ_INTERPRETER_URL_REGEX,
SOUNDCLOUD_URL_REGEX,
URL_REGEX,
YOUTUBE_URL_REGEX,
FAILED_DB_PATH,
)
from . import db
from streamrip.exceptions import (
@ -51,11 +53,11 @@ from streamrip.exceptions import (
NoResultsFound,
ParsingError,
)
from streamrip.utils import extract_deezer_dynamic_link, extract_interpreter_url
from .utils import extract_deezer_dynamic_link, extract_interpreter_url
logger = logging.getLogger("streamrip")
# ---------------- Constants ------------------ #
Media = Union[
Type[Album],
Type[Playlist],
@ -72,6 +74,7 @@ MEDIA_CLASS: Dict[str, Media] = {
"label": Label,
"video": Video,
}
# ---------------------------------------------- #
class MusicDL(list):
@ -86,13 +89,6 @@ class MusicDL(list):
:param config:
:type config: Optional[Config]
"""
self.url_parse = re.compile(URL_REGEX)
self.soundcloud_url_parse = re.compile(SOUNDCLOUD_URL_REGEX)
self.lastfm_url_parse = re.compile(LASTFM_URL_REGEX)
self.interpreter_url_parse = re.compile(QOBUZ_INTERPRETER_URL_REGEX)
self.youtube_url_parse = re.compile(YOUTUBE_URL_REGEX)
self.deezer_dynamic_url_parse = re.compile(DEEZER_DYNAMIC_LINK_REGEX)
self.config: Config
if config is None:
self.config = Config(CONFIG_PATH)
@ -136,7 +132,7 @@ class MusicDL(list):
# youtube is handled by youtube-dl, so much of the
# processing is not necessary
youtube_urls = self.youtube_url_parse.findall(url)
youtube_urls = YOUTUBE_URL_REGEX.findall(url)
if youtube_urls != []:
self.extend(YoutubeVideo(u) for u in youtube_urls)
@ -333,7 +329,7 @@ class MusicDL(list):
"""
parsed: List[Tuple[str, str, str]] = []
interpreter_urls = self.interpreter_url_parse.findall(url)
interpreter_urls = QOBUZ_INTERPRETER_URL_REGEX.findall(url)
if interpreter_urls:
click.secho(
"Extracting IDs from Qobuz interpreter urls. Use urls "
@ -344,9 +340,9 @@ class MusicDL(list):
("qobuz", "artist", extract_interpreter_url(u))
for u in interpreter_urls
)
url = self.interpreter_url_parse.sub("", url)
url = QOBUZ_INTERPRETER_URL_REGEX.sub("", url)
dynamic_urls = self.deezer_dynamic_url_parse.findall(url)
dynamic_urls = DEEZER_DYNAMIC_LINK_REGEX.findall(url)
if dynamic_urls:
click.secho(
"Extracting IDs from Deezer dynamic link. Use urls "
@ -358,8 +354,8 @@ class MusicDL(list):
("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls
)
parsed.extend(self.url_parse.findall(url)) # Qobuz, Tidal, Dezer
soundcloud_urls = self.soundcloud_url_parse.findall(url)
parsed.extend(URL_REGEX.findall(url)) # Qobuz, Tidal, Dezer
soundcloud_urls = URL_REGEX.findall(url)
soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls]
parsed.extend(
@ -392,7 +388,7 @@ class MusicDL(list):
# For testing:
# https://www.last.fm/user/nathan3895/playlists/12058911
user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+")
lastfm_urls = self.lastfm_url_parse.findall(urls)
lastfm_urls = LASTFM_URL_REGEX.findall(urls)
try:
lastfm_source = self.config.session["lastfm"]["source"]
lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"]

46
rip/utils.py Normal file
View file

@ -0,0 +1,46 @@
from streamrip.utils import gen_threadsafe_session
from streamrip.constants import AGENT
from typing import Tuple
import re
interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
def extract_interpreter_url(url: str) -> str:
"""Extract artist ID from a Qobuz interpreter url.
:param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
:type url: str
:rtype: str
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = interpreter_artist_regex.search(r.text)
if match:
return match.group(1)
raise Exception(
"Unable to extract artist id from interpreter url. Use a "
"url that contains an artist id."
)
deezer_id_link_regex = re.compile(
r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
)
def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
"""Extract a deezer url that includes an ID from a deezer.page.link url.
:param url:
:type url: str
:rtype: Tuple[str, str]
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = deezer_id_link_regex.search(r.text)
if match:
return match.group(1), match.group(2)
raise Exception("Unable to extract Deezer dynamic link.")

View file

@ -1,22 +1,7 @@
"""Constants that are kept in one place."""
import os
from pathlib import Path
import click
import mutagen.id3 as id3
APPNAME = "streamrip"
CACHE_DIR = click.get_app_dir(APPNAME)
CONFIG_DIR = click.get_app_dir(APPNAME)
CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
LOG_DIR = click.get_app_dir(APPNAME)
DB_PATH = os.path.join(LOG_DIR, "downloads.db")
HOME = Path.home()
DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0"
TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"

View file

@ -325,46 +325,6 @@ def decho(message, fg=None):
interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
def extract_interpreter_url(url: str) -> str:
"""Extract artist ID from a Qobuz interpreter url.
:param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
:type url: str
:rtype: str
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = interpreter_artist_regex.search(r.text)
if match:
return match.group(1)
raise Exception(
"Unable to extract artist id from interpreter url. Use a "
"url that contains an artist id."
)
deezer_id_link_regex = re.compile(
r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
)
def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
"""Extract a deezer url that includes an ID from a deezer.page.link url.
:param url:
:type url: str
:rtype: Tuple[str, str]
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = deezer_id_link_regex.search(r.text)
if match:
return match.group(1), match.group(2)
raise Exception("Unable to extract Deezer dynamic link.")
def get_container(quality: int, source: str) -> str:
"""Get the file container given the quality.