Move some utils and constants to rip module

Signed-off-by: nathom <nathanthomas707@gmail.com>
This commit is contained in:
nathom 2021-07-03 15:49:55 -07:00
parent e2483ca90c
commit ec5afef1b3
5 changed files with 95 additions and 79 deletions

29
rip/constants.py Normal file
View file

@ -0,0 +1,29 @@
import click
import re
import os
from pathlib import Path
APPNAME = "streamrip"
APP_DIR = click.get_app_dir(APPNAME)
HOME = Path.home()
LOG_DIR = CACHE_DIR = CONFIG_DIR = APP_DIR
CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
DB_PATH = os.path.join(LOG_DIR, "downloads.db")
FAILED_DB_PATH = os.path.join(LOG_DIR, "failed_downloads.db")
DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
URL_REGEX = re.compile(
r"https?://(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:/"
r"(album|artist|track|playlist|video|label))|(?:\/[-\w]+?))+\/([-\w]+)"
)
SOUNDCLOUD_URL_REGEX = re.compile(r"https://soundcloud.com/[-\w:/]+")
SOUNDCLOUD_CLIENT_ID = re.compile("a3e059563d7fd3372b49b37f00a00bcf")
LASTFM_URL_REGEX = re.compile(r"https://www.last.fm/user/\w+/playlists/\w+")
QOBUZ_INTERPRETER_URL_REGEX = re.compile(
r"https?://www\.qobuz\.com/\w\w-\w\w/interpreter/[-\w]+/[-\w]+"
)
DEEZER_DYNAMIC_LINK_REGEX = re.compile(r"https://deezer\.page\.link/\w+")
YOUTUBE_URL_REGEX = re.compile(r"https://www\.youtube\.com/watch\?v=[-\w]+")

View file

@ -5,6 +5,7 @@ import html
import logging import logging
import os import os
import re import re
from getpass import getpass from getpass import getpass
from hashlib import md5 from hashlib import md5
from string import Formatter from string import Formatter
@ -32,16 +33,17 @@ from streamrip.clients import (
TidalClient, TidalClient,
) )
from .config import Config from .config import Config
from streamrip.constants import ( from streamrip.constants import MEDIA_TYPES
from .constants import (
URL_REGEX,
SOUNDCLOUD_URL_REGEX,
LASTFM_URL_REGEX,
QOBUZ_INTERPRETER_URL_REGEX,
YOUTUBE_URL_REGEX,
DEEZER_DYNAMIC_LINK_REGEX,
CONFIG_PATH, CONFIG_PATH,
DB_PATH, DB_PATH,
DEEZER_DYNAMIC_LINK_REGEX, FAILED_DB_PATH,
LASTFM_URL_REGEX,
MEDIA_TYPES,
QOBUZ_INTERPRETER_URL_REGEX,
SOUNDCLOUD_URL_REGEX,
URL_REGEX,
YOUTUBE_URL_REGEX,
) )
from . import db from . import db
from streamrip.exceptions import ( from streamrip.exceptions import (
@ -51,11 +53,11 @@ from streamrip.exceptions import (
NoResultsFound, NoResultsFound,
ParsingError, ParsingError,
) )
from streamrip.utils import extract_deezer_dynamic_link, extract_interpreter_url from .utils import extract_deezer_dynamic_link, extract_interpreter_url
logger = logging.getLogger("streamrip") logger = logging.getLogger("streamrip")
# ---------------- Constants ------------------ #
Media = Union[ Media = Union[
Type[Album], Type[Album],
Type[Playlist], Type[Playlist],
@ -72,6 +74,7 @@ MEDIA_CLASS: Dict[str, Media] = {
"label": Label, "label": Label,
"video": Video, "video": Video,
} }
# ---------------------------------------------- #
class MusicDL(list): class MusicDL(list):
@ -86,13 +89,6 @@ class MusicDL(list):
:param config: :param config:
:type config: Optional[Config] :type config: Optional[Config]
""" """
self.url_parse = re.compile(URL_REGEX)
self.soundcloud_url_parse = re.compile(SOUNDCLOUD_URL_REGEX)
self.lastfm_url_parse = re.compile(LASTFM_URL_REGEX)
self.interpreter_url_parse = re.compile(QOBUZ_INTERPRETER_URL_REGEX)
self.youtube_url_parse = re.compile(YOUTUBE_URL_REGEX)
self.deezer_dynamic_url_parse = re.compile(DEEZER_DYNAMIC_LINK_REGEX)
self.config: Config self.config: Config
if config is None: if config is None:
self.config = Config(CONFIG_PATH) self.config = Config(CONFIG_PATH)
@ -136,7 +132,7 @@ class MusicDL(list):
# youtube is handled by youtube-dl, so much of the # youtube is handled by youtube-dl, so much of the
# processing is not necessary # processing is not necessary
youtube_urls = self.youtube_url_parse.findall(url) youtube_urls = YOUTUBE_URL_REGEX.findall(url)
if youtube_urls != []: if youtube_urls != []:
self.extend(YoutubeVideo(u) for u in youtube_urls) self.extend(YoutubeVideo(u) for u in youtube_urls)
@ -333,7 +329,7 @@ class MusicDL(list):
""" """
parsed: List[Tuple[str, str, str]] = [] parsed: List[Tuple[str, str, str]] = []
interpreter_urls = self.interpreter_url_parse.findall(url) interpreter_urls = QOBUZ_INTERPRETER_URL_REGEX.findall(url)
if interpreter_urls: if interpreter_urls:
click.secho( click.secho(
"Extracting IDs from Qobuz interpreter urls. Use urls " "Extracting IDs from Qobuz interpreter urls. Use urls "
@ -344,9 +340,9 @@ class MusicDL(list):
("qobuz", "artist", extract_interpreter_url(u)) ("qobuz", "artist", extract_interpreter_url(u))
for u in interpreter_urls for u in interpreter_urls
) )
url = self.interpreter_url_parse.sub("", url) url = QOBUZ_INTERPRETER_URL_REGEX.sub("", url)
dynamic_urls = self.deezer_dynamic_url_parse.findall(url) dynamic_urls = DEEZER_DYNAMIC_LINK_REGEX.findall(url)
if dynamic_urls: if dynamic_urls:
click.secho( click.secho(
"Extracting IDs from Deezer dynamic link. Use urls " "Extracting IDs from Deezer dynamic link. Use urls "
@ -358,8 +354,8 @@ class MusicDL(list):
("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls ("deezer", *extract_deezer_dynamic_link(url)) for url in dynamic_urls
) )
parsed.extend(self.url_parse.findall(url)) # Qobuz, Tidal, Dezer parsed.extend(URL_REGEX.findall(url)) # Qobuz, Tidal, Dezer
soundcloud_urls = self.soundcloud_url_parse.findall(url) soundcloud_urls = URL_REGEX.findall(url)
soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls] soundcloud_items = [self.clients["soundcloud"].get(u) for u in soundcloud_urls]
parsed.extend( parsed.extend(
@ -392,7 +388,7 @@ class MusicDL(list):
# For testing: # For testing:
# https://www.last.fm/user/nathan3895/playlists/12058911 # https://www.last.fm/user/nathan3895/playlists/12058911
user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+") user_regex = re.compile(r"https://www\.last\.fm/user/([^/]+)/playlists/\d+")
lastfm_urls = self.lastfm_url_parse.findall(urls) lastfm_urls = LASTFM_URL_REGEX.findall(urls)
try: try:
lastfm_source = self.config.session["lastfm"]["source"] lastfm_source = self.config.session["lastfm"]["source"]
lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"] lastfm_fallback_source = self.config.session["lastfm"]["fallback_source"]

46
rip/utils.py Normal file
View file

@ -0,0 +1,46 @@
from streamrip.utils import gen_threadsafe_session
from streamrip.constants import AGENT
from typing import Tuple
import re
interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
def extract_interpreter_url(url: str) -> str:
"""Extract artist ID from a Qobuz interpreter url.
:param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
:type url: str
:rtype: str
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = interpreter_artist_regex.search(r.text)
if match:
return match.group(1)
raise Exception(
"Unable to extract artist id from interpreter url. Use a "
"url that contains an artist id."
)
deezer_id_link_regex = re.compile(
r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
)
def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
"""Extract a deezer url that includes an ID from a deezer.page.link url.
:param url:
:type url: str
:rtype: Tuple[str, str]
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = deezer_id_link_regex.search(r.text)
if match:
return match.group(1), match.group(2)
raise Exception("Unable to extract Deezer dynamic link.")

View file

@ -1,22 +1,7 @@
"""Constants that are kept in one place.""" """Constants that are kept in one place."""
import os
from pathlib import Path
import click
import mutagen.id3 as id3 import mutagen.id3 as id3
APPNAME = "streamrip"
CACHE_DIR = click.get_app_dir(APPNAME)
CONFIG_DIR = click.get_app_dir(APPNAME)
CONFIG_PATH = os.path.join(CONFIG_DIR, "config.toml")
LOG_DIR = click.get_app_dir(APPNAME)
DB_PATH = os.path.join(LOG_DIR, "downloads.db")
HOME = Path.home()
DOWNLOADS_DIR = os.path.join(HOME, "StreamripDownloads")
AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0" AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0"
TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg" TIDAL_COVER_URL = "https://resources.tidal.com/images/{uuid}/{width}x{height}.jpg"

View file

@ -325,46 +325,6 @@ def decho(message, fg=None):
interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'") interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
def extract_interpreter_url(url: str) -> str:
"""Extract artist ID from a Qobuz interpreter url.
:param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
:type url: str
:rtype: str
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = interpreter_artist_regex.search(r.text)
if match:
return match.group(1)
raise Exception(
"Unable to extract artist id from interpreter url. Use a "
"url that contains an artist id."
)
deezer_id_link_regex = re.compile(
r"https://www\.deezer\.com/[a-z]{2}/(album|artist|playlist|track)/(\d+)"
)
def extract_deezer_dynamic_link(url: str) -> Tuple[str, str]:
"""Extract a deezer url that includes an ID from a deezer.page.link url.
:param url:
:type url: str
:rtype: Tuple[str, str]
"""
session = gen_threadsafe_session({"User-Agent": AGENT})
r = session.get(url)
match = deezer_id_link_regex.search(r.text)
if match:
return match.group(1), match.group(2)
raise Exception("Unable to extract Deezer dynamic link.")
def get_container(quality: int, source: str) -> str: def get_container(quality: int, source: str) -> str:
"""Get the file container given the quality. """Get the file container given the quality.