Move some utils and constants to rip module

Signed-off-by: nathom <nathanthomas707@gmail.com>
2024-09-19 19:28:46 -04:00 · 2021-07-03 15:49:55 -07:00 · 2021-07-03 15:49:55 -07:00 · ec5afef1b3
commit ec5afef1b3
parent e2483ca90c
5 changed files with 95 additions and 79 deletions
--- a/rip/constants.py
+++ b/rip/constants.py
@ -0,0 +1,29 @@
+import click
+import re
+import os
+from pathlib import Path
+
+APPNAME = "streamrip"
+APP_DIR = click.get_app_dir(APPNAME)
+HOME = Path.home()
+
+LOG_DIR = CACHE_DIR = CONFIG_DIR = APP_DIR
+
+CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
+DB_PATH = os.path.join(LOG_DIR, "downloads.db")
+FAILED_DB_PATH = os.path.join(LOG_DIR, "failed_downloads.db")
+
+DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
+
+URL_REGEX = re.compile(
+    r"https?://(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:/"
+    r"(album|artist|track|playlist|video|label))|(?:\/[-\w]+?))+\/([-\w]+)"
+)
+SOUNDCLOUD_URL_REGEX = re.compile(r"https://soundcloud.com/[-\w:/]+")
+SOUNDCLOUD_CLIENT_ID = re.compile("a3e059563d7fd3372b49b37f00a00bcf")
+LASTFM_URL_REGEX = re.compile(r"https://www.last.fm/user/\w+/playlists/\w+")
+QOBUZ_INTERPRETER_URL_REGEX = re.compile(
+    r"https?://www\.qobuz\.com/\w\w-\w\w/interpreter/[-\w]+/[-\w]+"
+)
+DEEZER_DYNAMIC_LINK_REGEX = re.compile(r"https://deezer\.page\.link/\w+")
+YOUTUBE_URL_REGEX = re.compile(r"https://www\.youtube\.com/watch\?v=[-\w]+")
--- a/rip/core.py
+++ b/rip/core.py
@ -5,6 +5,7 @@ import html
 import logging
 import os
 import re
+
 from getpass import getpass
 from hashlib import md5
 from string import Formatter
@ -32,16 +33,17 @@ from streamrip.clients import (
    TidalClient,
 )
 from .config import Config
-from streamrip.constants import (
+from streamrip.constants import MEDIA_TYPES
+from .constants import (
+    URL_REGEX,
+    SOUNDCLOUD_URL_REGEX,
+    LASTFM_URL_REGEX,
+    QOBUZ_INTERPRETER_URL_REGEX,
+    YOUTUBE_URL_REGEX,
+    DEEZER_DYNAMIC_LINK_REGEX,
    CONFIG_PATH,
    DB_PATH,
-    DEEZER_DYNAMIC_LINK_REGEX,
-    LASTFM_URL_REGEX,
-    MEDIA_TYPES,
-    QOBUZ_INTERPRETER_URL_REGEX,
-    SOUNDCLOUD_URL_REGEX,
-    URL_REGEX,
-    YOUTUBE_URL_REGEX,
+    FAILED_DB_PATH,
 )
 from . import db
 from streamrip.exceptions import (
@ -51,11 +53,11 @@ from streamrip.exceptions import (
    NoResultsFound,
    ParsingError,
 )
-from streamrip.utils import extract_deezer_dynamic_link, extract_interpreter_url
+from .utils import extract_deezer_dynamic_link, extract_interpreter_url

 logger = logging.getLogger("streamrip")

-
+# ---------------- Constants ------------------ #
 Media = Union[
    Type[Album],
    Type[Playlist],
@ -72,6 +74,7 @@ MEDIA_CLASS: Dict[str, Media] = {
    "label": Label,
    "video": Video,
 }
+# ---------------------------------------------- #


 class MusicDL(list):
@ -86,13 +89,6 @@ class MusicDL(list):
        :param config:
        :type config: Optional[Config]
        """
-        self.url_parse = re.compile(URL_REGEX)
-        self.soundcloud_url_parse = re.compile(SOUNDCLOUD_URL_REGEX)
-        self.lastfm_url_parse = re.compile(LASTFM_URL_REGEX)
-        self.interpreter_url_parse = re.compile(QOBUZ_INTERPRETER_URL_REGEX)
-        self.youtube_url_parse = re.compile(YOUTUBE_URL_REGEX)
-        self.deezer_dynamic_url_parse = re.compile(DEEZER_DYNAMIC_LINK_REGEX)
-
        self.config: Config
        if config is None:
            self.config = Config(CONFIG_PATH)
@ -136,7 +132,7 @@ class MusicDL(list):

        # youtube is handled by youtube-dl, so much of the
        # processing is not necessary
-        youtube_urls = self.youtube_url_parse.findall(url)
+        youtube_urls = YOUTUBE_URL_REGEX.findall(url)
        if youtube_urls != []:
            self.extend(YoutubeVideo(u) for u in youtube_urls)

@ -333,7 +329,7 @@ class MusicDL(list):
        """
        parsed: List[Tuple[str, str, str]] = []

-        interpreter_urls = self.interpreter_url_parse.findall(url)
+        interpreter_urls = QOBUZ_INTERPRETER_URL_REGEX.findall(url)
        if interpreter_urls:
            click.secho(
                "Extracting IDs from Qobuz interpreter urls. Use urls "
@ -344,9 +340,9 @@ class MusicDL(list):
                ("qobuz", "artist", extract_interpreter_url(u))
                for u in interpreter_urls
            )
-            url = self.interpreter_url_parse.sub("", url)
+            url = QOBUZ_INTERPRETER_URL_REGEX.sub("", url)

-        dynamic_urls = self.deezer_dynamic_url_parse.findall(url)
+        dynamic_urls = DEEZER_DYNAMIC_LINK_REGEX.findall(url)
        if dynamic_urls:
            click.secho(
                "Extracting IDs from Deezer dynamic link. Use urls "
@ -358,8 +354,8 @@ class MusicDL(list):
                ("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls
            )

-        parsed.extend(self.url_parse.findall(url))  # Qobuz, Tidal, Dezer
-        soundcloud_urls = self.soundcloud_url_parse.findall(url)
+        parsed.extend(URL_REGEX.findall(url))  # Qobuz, Tidal, Dezer
+        soundcloud_urls = URL_REGEX.findall(url)
        soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls]

        parsed.extend(
@ -392,7 +388,7 @@ class MusicDL(list):
        # For testing:
        # https://www.last.fm/user/nathan3895/playlists/12058911
        user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+")
-        lastfm_urls = self.lastfm_url_parse.findall(urls)
+        lastfm_urls = LASTFM_URL_REGEX.findall(urls)
        try:
            lastfm_source = self.config.session["lastfm"]["source"]
            lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"]
--- a/rip/utils.py
+++ b/rip/utils.py
@ -0,0 +1,46 @@
+from streamrip.utils import gen_threadsafe_session
+from streamrip.constants import AGENT
+from typing import Tuple
+import re
+
+interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
+
+
+def extract_interpreter_url(url: str) -> str:
+    """Extract artist ID from a Qobuz interpreter url.
+
+    :param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
+    :type url: str
+    :rtype: str
+    """
+    session = gen_threadsafe_session({"User-Agent": AGENT})
+    r = session.get(url)
+    match = interpreter_artist_regex.search(r.text)
+    if match:
+        return match.group(1)
+
+    raise Exception(
+        "Unable to extract artist id from interpreter url. Use a "
+        "url that contains an artist id."
+    )
+
+
+deezer_id_link_regex = re.compile(
+    r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
+)
+
+
+def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
+    """Extract a deezer url that includes an ID from a deezer.page.link url.
+
+    :param url:
+    :type url: str
+    :rtype: Tuple[str, str]
+    """
+    session = gen_threadsafe_session({"User-Agent": AGENT})
+    r = session.get(url)
+    match = deezer_id_link_regex.search(r.text)
+    if match:
+        return match.group(1), match.group(2)
+
+    raise Exception("Unable to extract Deezer dynamic link.")
--- a/streamrip/constants.py
+++ b/streamrip/constants.py
@ -1,22 +1,7 @@
 """Constants that are kept in one place."""

-import os
-from pathlib import Path
-
-import click
 import mutagen.id3 as id3

-APPNAME = "streamrip"
-
-CACHE_DIR = click.get_app_dir(APPNAME)
-CONFIG_DIR = click.get_app_dir(APPNAME)
-CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
-LOG_DIR = click.get_app_dir(APPNAME)
-DB_PATH = os.path.join(LOG_DIR, "downloads.db")
-
-HOME = Path.home()
-DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
-
 AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0"

 TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"
--- a/streamrip/utils.py
+++ b/streamrip/utils.py
@ -325,46 +325,6 @@ def decho(message, fg=None):
 interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")


-def extract_interpreter_url(url: str) -> str:
-    """Extract artist ID from a Qobuz interpreter url.
-
-    :param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
-    :type url: str
-    :rtype: str
-    """
-    session = gen_threadsafe_session({"User-Agent": AGENT})
-    r = session.get(url)
-    match = interpreter_artist_regex.search(r.text)
-    if match:
-        return match.group(1)
-
-    raise Exception(
-        "Unable to extract artist id from interpreter url. Use a "
-        "url that contains an artist id."
-    )
-
-
-deezer_id_link_regex = re.compile(
-    r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
-)
-
-
-def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
-    """Extract a deezer url that includes an ID from a deezer.page.link url.
-
-    :param url:
-    :type url: str
-    :rtype: Tuple[str, str]
-    """
-    session = gen_threadsafe_session({"User-Agent": AGENT})
-    r = session.get(url)
-    match = deezer_id_link_regex.search(r.text)
-    if match:
-        return match.group(1), match.group(2)
-
-    raise Exception("Unable to extract Deezer dynamic link.")
-
-
 def get_container(quality: int, source: str) -> str:
    """Get the file container given the quality.