From 527b52cae276d2bcf60f5ddc9252a1308137df76 Mon Sep 17 00:00:00 2001 From: Nathan Thomas Date: Sat, 11 May 2024 23:17:41 -0700 Subject: [PATCH] More robust error handling (#678) * Handle no copyright case for tidal * Add default values for get calls * Fix LSP errors * Misc fixes --- pyproject.toml | 16 +++--- streamrip/client/downloadable.py | 82 +++++++++++++++++++++---------- streamrip/client/qobuz.py | 14 +++--- streamrip/media/album.py | 7 ++- streamrip/media/artist.py | 9 +++- streamrip/media/artwork.py | 6 ++- streamrip/media/label.py | 19 +++++-- streamrip/media/playlist.py | 6 ++- streamrip/media/track.py | 47 ++++++++++++++++-- streamrip/metadata/album.py | 39 ++++++++------- streamrip/metadata/playlist.py | 2 +- tests/silence.flac | Bin 28561 -> 29238 bytes 12 files changed, 175 insertions(+), 72 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c70a4ee..63e957a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,12 +10,10 @@ repository = "https://github.com/nathom/streamrip" include = ["src/config.toml"] keywords = ["hi-res", "free", "music", "download"] classifiers = [ - "License :: OSI Approved :: GNU General Public License (GPL)", - "Operating System :: OS Independent", -] -packages = [ - { include = "streamrip" } + "License :: OSI Approved :: GNU General Public License (GPL)", + "Operating System :: OS Independent", ] +packages = [{ include = "streamrip" }] [tool.poetry.scripts] rip = "streamrip.rip:rip" @@ -25,9 +23,9 @@ python = ">=3.10 <4.0" mutagen = "^1.45.1" tomlkit = "^0.7.2" pathvalidate = "^2.4.1" -simple-term-menu = {version = "^1.2.1", platform = 'darwin|linux'} -pick = {version = "^2", platform = 'win32|cygwin'} -windows-curses = {version = "^2.2.0", platform = 'win32|cygwin'} +simple-term-menu = { version = "^1.2.1", platform = 'darwin|linux' } +pick = { version = "^2", platform = 'win32|cygwin' } +windows-curses = { version = "^2.2.0", platform = 'win32|cygwin' } Pillow = ">=9,<11" deezer-py = "1.3.6" pycryptodomex = "^3.10.1" @@ -58,7 +56,7 @@ pytest = "^7.4" [tool.pytest.ini_options] minversion = "6.0" addopts = "-ra -q" -testpaths = [ "tests" ] +testpaths = ["tests"] log_level = "DEBUG" asyncio_mode = 'auto' log_cli = true diff --git a/streamrip/client/downloadable.py b/streamrip/client/downloadable.py index 2685a1b..9eba477 100644 --- a/streamrip/client/downloadable.py +++ b/streamrip/client/downloadable.py @@ -37,13 +37,38 @@ def generate_temp_path(url: str): ) +async def fast_async_download(path, url, headers, callback): + """Synchronous download with yield for every 1MB read. + + Using aiofiles/aiohttp resulted in a yield to the event loop for every 1KB, + which made file downloads CPU-bound. This resulted in a ~10MB max total download + speed. This fixes the issue by only yielding to the event loop for every 1MB read. + """ + chunk_size: int = 2**17 # 131 KB + counter = 0 + yield_every = 8 # 1 MB + with open(path, "wb") as file: # noqa: ASYNC101 + with requests.get( # noqa: ASYNC100 + url, + headers=headers, + allow_redirects=True, + stream=True, + ) as resp: + for chunk in resp.iter_content(chunk_size=chunk_size): + file.write(chunk) + callback(len(chunk)) + if counter % yield_every == 0: + await asyncio.sleep(0) + counter += 1 + + @dataclass(slots=True) class Downloadable(ABC): session: aiohttp.ClientSession url: str extension: str - chunk_size = 2**17 - _size: Optional[int] = None + source: str = "Unknown" + _size_base: Optional[int] = None async def download(self, path: str, callback: Callable[[int], Any]): await self._download(path, callback) @@ -58,6 +83,14 @@ class Downloadable(ABC): self._size = int(content_length) return self._size + @property + def _size(self): + return self._size_base + + @_size.setter + def _size(self, v): + self._size_base = v + @abstractmethod async def _download(self, path: str, callback: Callable[[int], None]): raise NotImplementedError @@ -66,35 +99,31 @@ class Downloadable(ABC): class BasicDownloadable(Downloadable): """Just downloads a URL.""" - def __init__(self, session: aiohttp.ClientSession, url: str, extension: str): + def __init__( + self, + session: aiohttp.ClientSession, + url: str, + extension: str, + source: str | None = None, + ): self.session = session self.url = url self.extension = extension self._size = None + self.source: str = source or "Unknown" async def _download(self, path: str, callback): - # Attempt to fix async performance issues by manually and infrequently - # yielding to event loop selector - counter = 0 - yield_every = 16 - with open(path, "wb") as file: - with requests.get(self.url, allow_redirects=True, stream=True) as resp: - for chunk in resp.iter_content(chunk_size=self.chunk_size): - file.write(chunk) - callback(len(chunk)) - if counter % yield_every == 0: - await asyncio.sleep(0) - counter += 1 + await fast_async_download(path, self.url, self.session.headers, callback) class DeezerDownloadable(Downloadable): is_encrypted = re.compile("/m(?:obile|edia)/") - # chunk_size = 2048 * 3 def __init__(self, session: aiohttp.ClientSession, info: dict): logger.debug("Deezer info for downloadable: %s", info) self.session = session self.url = info["url"] + self.source: str = "deezer" max_quality_available = max( i for i, size in enumerate(info["quality_to_size"]) if size > 0 ) @@ -125,11 +154,9 @@ class DeezerDownloadable(Downloadable): if self.is_encrypted.search(self.url) is None: logger.debug(f"Deezer file at {self.url} not encrypted.") - async with aiofiles.open(path, "wb") as file: - async for chunk in resp.content.iter_chunked(self.chunk_size): - await file.write(chunk) - # typically a bar.update() - callback(len(chunk)) + await fast_async_download( + path, self.url, self.session.headers, callback + ) else: blowfish_key = self._generate_blowfish_key(self.id) logger.debug( @@ -144,10 +171,11 @@ class DeezerDownloadable(Downloadable): buf += data callback(len(data)) + encrypt_chunk_size = 3 * 2048 async with aiofiles.open(path, "wb") as audio: buflen = len(buf) - for i in range(0, buflen, self.chunk_size): - data = buf[i : min(i + self.chunk_size, buflen)] + for i in range(0, buflen, encrypt_chunk_size): + data = buf[i : min(i + encrypt_chunk_size, buflen)] if len(data) >= 2048: decrypted_chunk = ( self._decrypt_chunk(blowfish_key, data[:2048]) @@ -199,6 +227,7 @@ class TidalDownloadable(Downloadable): restrictions, ): self.session = session + self.source = "tidal" codec = codec.lower() if codec in ("flac", "mqa"): self.extension = "flac" @@ -217,7 +246,7 @@ class TidalDownloadable(Downloadable): ) self.url = url self.enc_key = encryption_key - self.downloadable = BasicDownloadable(session, url, self.extension) + self.downloadable = BasicDownloadable(session, url, self.extension, "tidal") async def _download(self, path: str, callback): await self.downloadable._download(path, callback) @@ -276,6 +305,7 @@ class SoundcloudDownloadable(Downloadable): def __init__(self, session, info: dict): self.session = session self.file_type = info["type"] + self.source = "soundcloud" if self.file_type == "mp3": self.extension = "mp3" elif self.file_type == "original": @@ -291,7 +321,9 @@ class SoundcloudDownloadable(Downloadable): await self._download_original(path, callback) async def _download_original(self, path: str, callback): - downloader = BasicDownloadable(self.session, self.url, "flac") + downloader = BasicDownloadable( + self.session, self.url, "flac", source="soundcloud" + ) await downloader.download(path, callback) self.size = downloader.size engine = converter.FLAC(path) diff --git a/streamrip/client/qobuz.py b/streamrip/client/qobuz.py index b9bf152..c181f9d 100644 --- a/streamrip/client/qobuz.py +++ b/streamrip/client/qobuz.py @@ -197,14 +197,14 @@ class QobuzClient(Client): self.logged_in = True - async def get_metadata(self, item_id: str, media_type: str): + async def get_metadata(self, item: str, media_type: str): if media_type == "label": - return await self.get_label(item_id) + return await self.get_label(item) c = self.config.session.qobuz params = { "app_id": c.app_id, - f"{media_type}_id": item_id, + f"{media_type}_id": item, # Do these matter? "limit": 500, "offset": 0, @@ -302,9 +302,9 @@ class QobuzClient(Client): epoint = "playlist/getUserPlaylists" return await self._paginate(epoint, {}, limit=limit) - async def get_downloadable(self, item_id: str, quality: int) -> Downloadable: + async def get_downloadable(self, item: str, quality: int) -> Downloadable: assert self.secret is not None and self.logged_in and 1 <= quality <= 4 - status, resp_json = await self._request_file_url(item_id, quality, self.secret) + status, resp_json = await self._request_file_url(item, quality, self.secret) assert status == 200 stream_url = resp_json.get("url") @@ -319,9 +319,7 @@ class QobuzClient(Client): raise NonStreamableError return BasicDownloadable( - self.session, - stream_url, - "flac" if quality > 1 else "mp3", + self.session, stream_url, "flac" if quality > 1 else "mp3", source="qobuz" ) async def _paginate( diff --git a/streamrip/media/album.py b/streamrip/media/album.py index 7d97346..bdc3dc4 100644 --- a/streamrip/media/album.py +++ b/streamrip/media/album.py @@ -59,7 +59,12 @@ class PendingAlbum(Pending): ) return None - meta = AlbumMetadata.from_album_resp(resp, self.client.source) + try: + meta = AlbumMetadata.from_album_resp(resp, self.client.source) + except Exception as e: + logger.error(f"Error building album metadata for {id=}: {e}") + return None + if meta is None: logger.error( f"Album {self.id} not available to stream on {self.client.source}", diff --git a/streamrip/media/artist.py b/streamrip/media/artist.py index 0093e06..14d30de 100644 --- a/streamrip/media/artist.py +++ b/streamrip/media/artist.py @@ -190,7 +190,14 @@ class PendingArtist(Pending): ) return None - meta = ArtistMetadata.from_resp(resp, self.client.source) + try: + meta = ArtistMetadata.from_resp(resp, self.client.source) + except Exception as e: + logger.error( + f"Error building artist metadata: {e}", + ) + return None + albums = [ PendingAlbum(album_id, self.client, self.config, self.db) for album_id in meta.album_ids() diff --git a/streamrip/media/artwork.py b/streamrip/media/artwork.py index 33a98c5..d18747e 100644 --- a/streamrip/media/artwork.py +++ b/streamrip/media/artwork.py @@ -94,7 +94,11 @@ async def download_artwork( if len(downloadables) == 0: return embed_cover_path, saved_cover_path - await asyncio.gather(*downloadables) + try: + await asyncio.gather(*downloadables) + except Exception as e: + logger.error(f"Error downloading artwork: {e}") + return None, None # Update `covers` to reflect the current download state if save_artwork: diff --git a/streamrip/media/label.py b/streamrip/media/label.py index 7eadef8..5aa0e80 100644 --- a/streamrip/media/label.py +++ b/streamrip/media/label.py @@ -1,6 +1,9 @@ import asyncio +import logging from dataclasses import dataclass +from streamrip.exceptions import NonStreamableError + from ..client import Client from ..config import Config from ..db import Database @@ -8,6 +11,8 @@ from ..metadata import LabelMetadata from .album import PendingAlbum from .media import Media, Pending +logger = logging.getLogger("streamrip") + @dataclass(slots=True) class Label(Media): @@ -57,9 +62,17 @@ class PendingLabel(Pending): config: Config db: Database - async def resolve(self) -> Label: - resp = await self.client.get_metadata(self.id, "label") - meta = LabelMetadata.from_resp(resp, self.client.source) + async def resolve(self) -> Label | None: + try: + resp = await self.client.get_metadata(self.id, "label") + except NonStreamableError as e: + logger.error(f"Error resolving Label: {e}") + return None + try: + meta = LabelMetadata.from_resp(resp, self.client.source) + except Exception as e: + logger.error(f"Error resolving Label: {e}") + return None albums = [ PendingAlbum(album_id, self.client, self.config, self.db) for album_id in meta.album_ids() diff --git a/streamrip/media/playlist.py b/streamrip/media/playlist.py index 2e84a85..858b3c0 100644 --- a/streamrip/media/playlist.py +++ b/streamrip/media/playlist.py @@ -155,7 +155,11 @@ class PendingPlaylist(Pending): ) return None - meta = PlaylistMetadata.from_resp(resp, self.client.source) + try: + meta = PlaylistMetadata.from_resp(resp, self.client.source) + except Exception as e: + logger.error(f"Error creating playlist: {e}") + return None name = meta.name parent = self.config.session.downloads.folder folder = os.path.join(parent, clean_filepath(name)) diff --git a/streamrip/media/track.py b/streamrip/media/track.py index 6e2d88b..32ba531 100644 --- a/streamrip/media/track.py +++ b/streamrip/media/track.py @@ -45,7 +45,32 @@ class Track(Media): await self.downloadable.size(), f"Track {self.meta.tracknumber}", ) as callback: - await self.downloadable.download(self.download_path, callback) + try: + await self.downloadable.download(self.download_path, callback) + retry = False + except Exception as e: + logger.error( + f"Error downloading track '{self.meta.title}', retrying: {e}" + ) + retry = True + + if not retry: + return + + with get_progress_callback( + self.config.session.cli.progress_bars, + await self.downloadable.size(), + f"Track {self.meta.tracknumber} (retry)", + ) as callback: + try: + await self.downloadable.download(self.download_path, callback) + except Exception as e: + logger.error( + f"Persistent error downloading track '{self.meta.title}', skipping: {e}" + ) + self.db.set_failed( + self.downloadable.source, "track", self.meta.info.id + ) async def postprocess(self): if self.is_single: @@ -110,7 +135,12 @@ class PendingTrack(Pending): logger.error(f"Track {self.id} not available for stream on {source}: {e}") return None - meta = TrackMetadata.from_resp(self.album, source, resp) + try: + meta = TrackMetadata.from_resp(self.album, source, resp) + except Exception as e: + logger.error(f"Error building track metadata for {id=}: {e}") + return None + if meta is None: logger.error(f"Track {self.id} not available for stream on {source}") self.db.set_failed(source, "track", self.id) @@ -154,7 +184,12 @@ class PendingSingle(Pending): logger.error(f"Error fetching track {self.id}: {e}") return None # Patch for soundcloud - album = AlbumMetadata.from_track_resp(resp, self.client.source) + try: + album = AlbumMetadata.from_track_resp(resp, self.client.source) + except Exception as e: + logger.error(f"Error building album metadata for track {id=}: {e}") + return None + if album is None: self.db.set_failed(self.client.source, "track", self.id) logger.error( @@ -162,7 +197,11 @@ class PendingSingle(Pending): ) return None - meta = TrackMetadata.from_resp(album, self.client.source, resp) + try: + meta = TrackMetadata.from_resp(album, self.client.source, resp) + except Exception as e: + logger.error(f"Error building track metadata for track {id=}: {e}") + return None if meta is None: self.db.set_failed(self.client.source, "track", self.id) diff --git a/streamrip/metadata/album.py b/streamrip/metadata/album.py index de941ec..dcac729 100644 --- a/streamrip/metadata/album.py +++ b/streamrip/metadata/album.py @@ -5,9 +5,9 @@ import re from dataclasses import dataclass from typing import Optional +from ..filepath_utils import clean_filename from .covers import Covers from .util import get_quality_id, safe_get, typed -from ..filepath_utils import clean_filename PHON_COPYRIGHT = "\u2117" COPYRIGHT = "\u00a9" @@ -64,12 +64,12 @@ class AlbumMetadata: def format_folder_path(self, formatter: str) -> str: # Available keys: "albumartist", "title", "year", "bit_depth", "sampling_rate", - # "id", and "albumcomposer", + # "id", and "albumcomposer", none_str = "Unknown" info: dict[str, str | int | float] = { "albumartist": clean_filename(self.albumartist), - "albumcomposer": clean_filename(self.albumcomposer) or none_str, + "albumcomposer": clean_filename(self.albumcomposer or "") or none_str, "bit_depth": self.info.bit_depth or none_str, "id": self.info.id, "sampling_rate": self.info.sampling_rate or none_str, @@ -77,9 +77,9 @@ class AlbumMetadata: "year": self.year, "container": self.info.container, } - + return formatter.format(**info) - + @classmethod def from_qobuz(cls, resp: dict) -> AlbumMetadata: album = resp.get("title", "Unknown Album") @@ -96,12 +96,12 @@ class AlbumMetadata: else: albumartist = typed(safe_get(resp, "artist", "name"), str) - albumcomposer = typed(safe_get(resp, "composer", "name"), str | None) + albumcomposer = typed(safe_get(resp, "composer", "name", default=""), str) _label = resp.get("label") if isinstance(_label, dict): _label = _label["name"] - label = typed(_label, str | None) - description = typed(resp.get("description") or None, str | None) + label = typed(_label or "", str) + description = typed(resp.get("description", "") or None, str) disctotal = typed( max( track.get("media_number", 1) @@ -115,8 +115,8 @@ class AlbumMetadata: # Non-embedded information cover_urls = Covers.from_qobuz(resp) - bit_depth = typed(resp.get("maximum_bit_depth"), int | None) - sampling_rate = typed(resp.get("maximum_sampling_rate"), int | float | None) + bit_depth = typed(resp.get("maximum_bit_depth", -1), int) + sampling_rate = typed(resp.get("maximum_sampling_rate", -1.0), int | float) quality = get_quality_id(bit_depth, sampling_rate) # Make sure it is non-empty list booklets = typed(resp.get("goodies", None) or None, list | None) @@ -227,14 +227,14 @@ class AlbumMetadata: safe_get(track, "publisher_metadata", "explicit", default=False), bool, ) - genre = typed(track["genre"], str | None) + genre = typed(track.get("genre"), str | None) genres = [genre] if genre is not None else [] artist = typed(safe_get(track, "publisher_metadata", "artist"), str | None) artist = artist or typed(track["user"]["username"], str) albumartist = artist - date = typed(track["created_at"], str) + date = typed(track.get("created_at"), str) year = date[:4] - label = typed(track["label_name"], str | None) + label = typed(track.get("label_name"), str | None) description = typed(track.get("description"), str | None) album_title = typed( safe_get(track, "publisher_metadata", "album_title"), @@ -284,6 +284,7 @@ class AlbumMetadata: """ Args: + ---- resp: API response containing album metadata. Returns: AlbumMetadata instance if the album is streamable, otherwise None. @@ -300,12 +301,12 @@ class AlbumMetadata: # genre not returned by API date = typed(resp.get("releaseDate"), str) year = date[:4] - _copyright = typed(resp.get("copyright"), str) + _copyright = typed(resp.get("copyright", ""), str) artists = typed(resp.get("artists", []), list) albumartist = ", ".join(a["name"] for a in artists) if not albumartist: - albumartist = typed(safe_get(resp, "artist", "name"), str) + albumartist = typed(safe_get(resp, "artist", "name", default=""), str) disctotal = typed(resp.get("numberOfVolumes", 1), int) # label not returned by API @@ -367,7 +368,7 @@ class AlbumMetadata: ) @classmethod - def from_tidal_playlist_track_resp(cls, resp) -> AlbumMetadata | None: + def from_tidal_playlist_track_resp(cls, resp: dict) -> AlbumMetadata | None: album_resp = resp["album"] streamable = resp.get("allowStreaming", False) if not streamable: @@ -383,11 +384,13 @@ class AlbumMetadata: else: year = "Unknown Year" - _copyright = typed(resp.get("copyright"), str) + _copyright = typed(resp.get("copyright", ""), str) artists = typed(resp.get("artists", []), list) albumartist = ", ".join(a["name"] for a in artists) if not albumartist: - albumartist = typed(safe_get(resp, "artist", "name"), str) + albumartist = typed( + safe_get(resp, "artist", "name", default="Unknown Albumbartist"), str + ) disctotal = typed(resp.get("volumeNumber", 1), int) # label not returned by API diff --git a/streamrip/metadata/playlist.py b/streamrip/metadata/playlist.py index d113ba6..58a563c 100644 --- a/streamrip/metadata/playlist.py +++ b/streamrip/metadata/playlist.py @@ -37,7 +37,7 @@ def get_soundcloud_id(resp: dict) -> str: def parse_soundcloud_id(item_id: str) -> tuple[str, str]: info = item_id.split("|") assert len(info) == 2 - return tuple(info) + return (info[0], info[1]) @dataclass(slots=True) diff --git a/tests/silence.flac b/tests/silence.flac index 108c437ac4075cafa1c30b07582879c2fa8f0559..302cef910cb7fa83a3f48ea74359cd7584360228 100644 GIT binary patch delta 19 bcmbPupK;q0#tqSdn>T1KW!fAaXeI{$U3&;m delta 13 Ucmdn?gmL11#tqSdn?lUw04`(&!vFvP