From ec5afef1b3abcd9d4498b070df853b0ce133f703 Mon Sep 17 00:00:00 2001 From: nathom Date: Sat, 3 Jul 2021 15:49:55 -0700 Subject: [PATCH] Move some utils and constants to rip module Signed-off-by: nathom --- rip/constants.py | 29 ++++++++++++++++++++++++++ rip/core.py | 44 ++++++++++++++++++---------------------- rip/utils.py | 46 ++++++++++++++++++++++++++++++++++++++++++ streamrip/constants.py | 15 -------------- streamrip/utils.py | 40 ------------------------------------ 5 files changed, 95 insertions(+), 79 deletions(-) create mode 100644 rip/constants.py create mode 100644 rip/utils.py diff --git a/rip/constants.py b/rip/constants.py new file mode 100644 index 0000000..ee73712 --- /dev/null +++ b/rip/constants.py @@ -0,0 +1,29 @@ +import click +import re +import os +from pathlib import Path + +APPNAME = "streamrip" +APP_DIR = click.get_app_dir(APPNAME) +HOME = Path.home() + +LOG_DIR = CACHE_DIR = CONFIG_DIR = APP_DIR + +CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml") +DB_PATH = os.path.join(LOG_DIR, "downloads.db") +FAILED_DB_PATH = os.path.join(LOG_DIR, "failed_downloads.db") + +DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads") + +URL_REGEX = re.compile( + r"https?://(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:/" + r"(album|artist|track|playlist|video|label))|(?:\/[-\w]+?))+\/([-\w]+)" +) +SOUNDCLOUD_URL_REGEX = re.compile(r"https://soundcloud.com/[-\w:/]+") +SOUNDCLOUD_CLIENT_ID = re.compile("a3e059563d7fd3372b49b37f00a00bcf") +LASTFM_URL_REGEX = re.compile(r"https://www.last.fm/user/\w+/playlists/\w+") +QOBUZ_INTERPRETER_URL_REGEX = re.compile( + r"https?://www\.qobuz\.com/\w\w-\w\w/interpreter/[-\w]+/[-\w]+" +) +DEEZER_DYNAMIC_LINK_REGEX = re.compile(r"https://deezer\.page\.link/\w+") +YOUTUBE_URL_REGEX = re.compile(r"https://www\.youtube\.com/watch\?v=[-\w]+") diff --git a/rip/core.py b/rip/core.py index de5e601..f496503 100644 --- a/rip/core.py +++ b/rip/core.py @@ -5,6 +5,7 @@ import html import logging import os import re + from getpass import getpass from hashlib import md5 from string import Formatter @@ -32,16 +33,17 @@ from streamrip.clients import ( TidalClient, ) from .config import Config -from streamrip.constants import ( +from streamrip.constants import MEDIA_TYPES +from .constants import ( + URL_REGEX, + SOUNDCLOUD_URL_REGEX, + LASTFM_URL_REGEX, + QOBUZ_INTERPRETER_URL_REGEX, + YOUTUBE_URL_REGEX, + DEEZER_DYNAMIC_LINK_REGEX, CONFIG_PATH, DB_PATH, - DEEZER_DYNAMIC_LINK_REGEX, - LASTFM_URL_REGEX, - MEDIA_TYPES, - QOBUZ_INTERPRETER_URL_REGEX, - SOUNDCLOUD_URL_REGEX, - URL_REGEX, - YOUTUBE_URL_REGEX, + FAILED_DB_PATH, ) from . import db from streamrip.exceptions import ( @@ -51,11 +53,11 @@ from streamrip.exceptions import ( NoResultsFound, ParsingError, ) -from streamrip.utils import extract_deezer_dynamic_link, extract_interpreter_url +from .utils import extract_deezer_dynamic_link, extract_interpreter_url logger = logging.getLogger("streamrip") - +# ---------------- Constants ------------------ # Media = Union[ Type[Album], Type[Playlist], @@ -72,6 +74,7 @@ MEDIA_CLASS: Dict[str, Media] = { "label": Label, "video": Video, } +# ---------------------------------------------- # class MusicDL(list): @@ -86,13 +89,6 @@ class MusicDL(list): :param config: :type config: Optional[Config] """ - self.url_parse = re.compile(URL_REGEX) - self.soundcloud_url_parse = re.compile(SOUNDCLOUD_URL_REGEX) - self.lastfm_url_parse = re.compile(LASTFM_URL_REGEX) - self.interpreter_url_parse = re.compile(QOBUZ_INTERPRETER_URL_REGEX) - self.youtube_url_parse = re.compile(YOUTUBE_URL_REGEX) - self.deezer_dynamic_url_parse = re.compile(DEEZER_DYNAMIC_LINK_REGEX) - self.config: Config if config is None: self.config = Config(CONFIG_PATH) @@ -136,7 +132,7 @@ class MusicDL(list): # youtube is handled by youtube-dl, so much of the # processing is not necessary - youtube_urls = self.youtube_url_parse.findall(url) + youtube_urls = YOUTUBE_URL_REGEX.findall(url) if youtube_urls != []: self.extend(YoutubeVideo(u) for u in youtube_urls) @@ -333,7 +329,7 @@ class MusicDL(list): """ parsed: List[Tuple[str, str, str]] = [] - interpreter_urls = self.interpreter_url_parse.findall(url) + interpreter_urls = QOBUZ_INTERPRETER_URL_REGEX.findall(url) if interpreter_urls: click.secho( "Extracting IDs from Qobuz interpreter urls. Use urls " @@ -344,9 +340,9 @@ class MusicDL(list): ("qobuz", "artist", extract_interpreter_url(u)) for u in interpreter_urls ) - url = self.interpreter_url_parse.sub("", url) + url = QOBUZ_INTERPRETER_URL_REGEX.sub("", url) - dynamic_urls = self.deezer_dynamic_url_parse.findall(url) + dynamic_urls = DEEZER_DYNAMIC_LINK_REGEX.findall(url) if dynamic_urls: click.secho( "Extracting IDs from Deezer dynamic link. Use urls " @@ -358,8 +354,8 @@ class MusicDL(list): ("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls ) - parsed.extend(self.url_parse.findall(url)) # Qobuz, Tidal, Dezer - soundcloud_urls = self.soundcloud_url_parse.findall(url) + parsed.extend(URL_REGEX.findall(url)) # Qobuz, Tidal, Dezer + soundcloud_urls = URL_REGEX.findall(url) soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] parsed.extend( @@ -392,7 +388,7 @@ class MusicDL(list): # For testing: # https://www.last.fm/user/nathan3895/playlists/12058911 user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+") - lastfm_urls = self.lastfm_url_parse.findall(urls) + lastfm_urls = LASTFM_URL_REGEX.findall(urls) try: lastfm_source = self.config.session["lastfm"]["source"] lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"] diff --git a/rip/utils.py b/rip/utils.py new file mode 100644 index 0000000..ac91776 --- /dev/null +++ b/rip/utils.py @@ -0,0 +1,46 @@ +from streamrip.utils import gen_threadsafe_session +from streamrip.constants import AGENT +from typing import Tuple +import re + +interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'") + + +def extract_interpreter_url(url: str) -> str: + """Extract artist ID from a Qobuz interpreter url. + + :param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums" + :type url: str + :rtype: str + """ + session = gen_threadsafe_session({"User-Agent": AGENT}) + r = session.get(url) + match = interpreter_artist_regex.search(r.text) + if match: + return match.group(1) + + raise Exception( + "Unable to extract artist id from interpreter url. Use a " + "url that contains an artist id." + ) + + +deezer_id_link_regex = re.compile( + r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)" +) + + +def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]: + """Extract a deezer url that includes an ID from a deezer.page.link url. + + :param url: + :type url: str + :rtype: Tuple[str, str] + """ + session = gen_threadsafe_session({"User-Agent": AGENT}) + r = session.get(url) + match = deezer_id_link_regex.search(r.text) + if match: + return match.group(1), match.group(2) + + raise Exception("Unable to extract Deezer dynamic link.") diff --git a/streamrip/constants.py b/streamrip/constants.py index c97818e..1e417a9 100644 --- a/streamrip/constants.py +++ b/streamrip/constants.py @@ -1,22 +1,7 @@ """Constants that are kept in one place.""" -import os -from pathlib import Path - -import click import mutagen.id3 as id3 -APPNAME = "streamrip" - -CACHE_DIR = click.get_app_dir(APPNAME) -CONFIG_DIR = click.get_app_dir(APPNAME) -CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml") -LOG_DIR = click.get_app_dir(APPNAME) -DB_PATH = os.path.join(LOG_DIR, "downloads.db") - -HOME = Path.home() -DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads") - AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg" diff --git a/streamrip/utils.py b/streamrip/utils.py index 615f5e1..f5cb588 100644 --- a/streamrip/utils.py +++ b/streamrip/utils.py @@ -325,46 +325,6 @@ def decho(message, fg=None): interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'") -def extract_interpreter_url(url: str) -> str: - """Extract artist ID from a Qobuz interpreter url. - - :param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums" - :type url: str - :rtype: str - """ - session = gen_threadsafe_session({"User-Agent": AGENT}) - r = session.get(url) - match = interpreter_artist_regex.search(r.text) - if match: - return match.group(1) - - raise Exception( - "Unable to extract artist id from interpreter url. Use a " - "url that contains an artist id." - ) - - -deezer_id_link_regex = re.compile( - r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)" -) - - -def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]: - """Extract a deezer url that includes an ID from a deezer.page.link url. - - :param url: - :type url: str - :rtype: Tuple[str, str] - """ - session = gen_threadsafe_session({"User-Agent": AGENT}) - r = session.get(url) - match = deezer_id_link_regex.search(r.text) - if match: - return match.group(1), match.group(2) - - raise Exception("Unable to extract Deezer dynamic link.") - - def get_container(quality: int, source: str) -> str: """Get the file container given the quality.