mirror of
https://github.com/nathom/streamrip.git
synced 2024-09-18 18:58:46 -04:00
More robust error handling (#678)
* Handle no copyright case for tidal * Add default values for get calls * Fix LSP errors * Misc fixes
This commit is contained in:
parent
868a8fff99
commit
527b52cae2
12 changed files with 175 additions and 72 deletions
|
@ -13,9 +13,7 @@ classifiers = [
|
|||
"License :: OSI Approved :: GNU General Public License (GPL)",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
packages = [
|
||||
{ include = "streamrip" }
|
||||
]
|
||||
packages = [{ include = "streamrip" }]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
rip = "streamrip.rip:rip"
|
||||
|
|
|
@ -37,13 +37,38 @@ def generate_temp_path(url: str):
|
|||
)
|
||||
|
||||
|
||||
async def fast_async_download(path, url, headers, callback):
|
||||
"""Synchronous download with yield for every 1MB read.
|
||||
|
||||
Using aiofiles/aiohttp resulted in a yield to the event loop for every 1KB,
|
||||
which made file downloads CPU-bound. This resulted in a ~10MB max total download
|
||||
speed. This fixes the issue by only yielding to the event loop for every 1MB read.
|
||||
"""
|
||||
chunk_size: int = 2**17 # 131 KB
|
||||
counter = 0
|
||||
yield_every = 8 # 1 MB
|
||||
with open(path, "wb") as file: # noqa: ASYNC101
|
||||
with requests.get( # noqa: ASYNC100
|
||||
url,
|
||||
headers=headers,
|
||||
allow_redirects=True,
|
||||
stream=True,
|
||||
) as resp:
|
||||
for chunk in resp.iter_content(chunk_size=chunk_size):
|
||||
file.write(chunk)
|
||||
callback(len(chunk))
|
||||
if counter % yield_every == 0:
|
||||
await asyncio.sleep(0)
|
||||
counter += 1
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Downloadable(ABC):
|
||||
session: aiohttp.ClientSession
|
||||
url: str
|
||||
extension: str
|
||||
chunk_size = 2**17
|
||||
_size: Optional[int] = None
|
||||
source: str = "Unknown"
|
||||
_size_base: Optional[int] = None
|
||||
|
||||
async def download(self, path: str, callback: Callable[[int], Any]):
|
||||
await self._download(path, callback)
|
||||
|
@ -58,6 +83,14 @@ class Downloadable(ABC):
|
|||
self._size = int(content_length)
|
||||
return self._size
|
||||
|
||||
@property
|
||||
def _size(self):
|
||||
return self._size_base
|
||||
|
||||
@_size.setter
|
||||
def _size(self, v):
|
||||
self._size_base = v
|
||||
|
||||
@abstractmethod
|
||||
async def _download(self, path: str, callback: Callable[[int], None]):
|
||||
raise NotImplementedError
|
||||
|
@ -66,35 +99,31 @@ class Downloadable(ABC):
|
|||
class BasicDownloadable(Downloadable):
|
||||
"""Just downloads a URL."""
|
||||
|
||||
def __init__(self, session: aiohttp.ClientSession, url: str, extension: str):
|
||||
def __init__(
|
||||
self,
|
||||
session: aiohttp.ClientSession,
|
||||
url: str,
|
||||
extension: str,
|
||||
source: str | None = None,
|
||||
):
|
||||
self.session = session
|
||||
self.url = url
|
||||
self.extension = extension
|
||||
self._size = None
|
||||
self.source: str = source or "Unknown"
|
||||
|
||||
async def _download(self, path: str, callback):
|
||||
# Attempt to fix async performance issues by manually and infrequently
|
||||
# yielding to event loop selector
|
||||
counter = 0
|
||||
yield_every = 16
|
||||
with open(path, "wb") as file:
|
||||
with requests.get(self.url, allow_redirects=True, stream=True) as resp:
|
||||
for chunk in resp.iter_content(chunk_size=self.chunk_size):
|
||||
file.write(chunk)
|
||||
callback(len(chunk))
|
||||
if counter % yield_every == 0:
|
||||
await asyncio.sleep(0)
|
||||
counter += 1
|
||||
await fast_async_download(path, self.url, self.session.headers, callback)
|
||||
|
||||
|
||||
class DeezerDownloadable(Downloadable):
|
||||
is_encrypted = re.compile("/m(?:obile|edia)/")
|
||||
# chunk_size = 2048 * 3
|
||||
|
||||
def __init__(self, session: aiohttp.ClientSession, info: dict):
|
||||
logger.debug("Deezer info for downloadable: %s", info)
|
||||
self.session = session
|
||||
self.url = info["url"]
|
||||
self.source: str = "deezer"
|
||||
max_quality_available = max(
|
||||
i for i, size in enumerate(info["quality_to_size"]) if size > 0
|
||||
)
|
||||
|
@ -125,11 +154,9 @@ class DeezerDownloadable(Downloadable):
|
|||
|
||||
if self.is_encrypted.search(self.url) is None:
|
||||
logger.debug(f"Deezer file at {self.url} not encrypted.")
|
||||
async with aiofiles.open(path, "wb") as file:
|
||||
async for chunk in resp.content.iter_chunked(self.chunk_size):
|
||||
await file.write(chunk)
|
||||
# typically a bar.update()
|
||||
callback(len(chunk))
|
||||
await fast_async_download(
|
||||
path, self.url, self.session.headers, callback
|
||||
)
|
||||
else:
|
||||
blowfish_key = self._generate_blowfish_key(self.id)
|
||||
logger.debug(
|
||||
|
@ -144,10 +171,11 @@ class DeezerDownloadable(Downloadable):
|
|||
buf += data
|
||||
callback(len(data))
|
||||
|
||||
encrypt_chunk_size = 3 * 2048
|
||||
async with aiofiles.open(path, "wb") as audio:
|
||||
buflen = len(buf)
|
||||
for i in range(0, buflen, self.chunk_size):
|
||||
data = buf[i : min(i + self.chunk_size, buflen)]
|
||||
for i in range(0, buflen, encrypt_chunk_size):
|
||||
data = buf[i : min(i + encrypt_chunk_size, buflen)]
|
||||
if len(data) >= 2048:
|
||||
decrypted_chunk = (
|
||||
self._decrypt_chunk(blowfish_key, data[:2048])
|
||||
|
@ -199,6 +227,7 @@ class TidalDownloadable(Downloadable):
|
|||
restrictions,
|
||||
):
|
||||
self.session = session
|
||||
self.source = "tidal"
|
||||
codec = codec.lower()
|
||||
if codec in ("flac", "mqa"):
|
||||
self.extension = "flac"
|
||||
|
@ -217,7 +246,7 @@ class TidalDownloadable(Downloadable):
|
|||
)
|
||||
self.url = url
|
||||
self.enc_key = encryption_key
|
||||
self.downloadable = BasicDownloadable(session, url, self.extension)
|
||||
self.downloadable = BasicDownloadable(session, url, self.extension, "tidal")
|
||||
|
||||
async def _download(self, path: str, callback):
|
||||
await self.downloadable._download(path, callback)
|
||||
|
@ -276,6 +305,7 @@ class SoundcloudDownloadable(Downloadable):
|
|||
def __init__(self, session, info: dict):
|
||||
self.session = session
|
||||
self.file_type = info["type"]
|
||||
self.source = "soundcloud"
|
||||
if self.file_type == "mp3":
|
||||
self.extension = "mp3"
|
||||
elif self.file_type == "original":
|
||||
|
@ -291,7 +321,9 @@ class SoundcloudDownloadable(Downloadable):
|
|||
await self._download_original(path, callback)
|
||||
|
||||
async def _download_original(self, path: str, callback):
|
||||
downloader = BasicDownloadable(self.session, self.url, "flac")
|
||||
downloader = BasicDownloadable(
|
||||
self.session, self.url, "flac", source="soundcloud"
|
||||
)
|
||||
await downloader.download(path, callback)
|
||||
self.size = downloader.size
|
||||
engine = converter.FLAC(path)
|
||||
|
|
|
@ -197,14 +197,14 @@ class QobuzClient(Client):
|
|||
|
||||
self.logged_in = True
|
||||
|
||||
async def get_metadata(self, item_id: str, media_type: str):
|
||||
async def get_metadata(self, item: str, media_type: str):
|
||||
if media_type == "label":
|
||||
return await self.get_label(item_id)
|
||||
return await self.get_label(item)
|
||||
|
||||
c = self.config.session.qobuz
|
||||
params = {
|
||||
"app_id": c.app_id,
|
||||
f"{media_type}_id": item_id,
|
||||
f"{media_type}_id": item,
|
||||
# Do these matter?
|
||||
"limit": 500,
|
||||
"offset": 0,
|
||||
|
@ -302,9 +302,9 @@ class QobuzClient(Client):
|
|||
epoint = "playlist/getUserPlaylists"
|
||||
return await self._paginate(epoint, {}, limit=limit)
|
||||
|
||||
async def get_downloadable(self, item_id: str, quality: int) -> Downloadable:
|
||||
async def get_downloadable(self, item: str, quality: int) -> Downloadable:
|
||||
assert self.secret is not None and self.logged_in and 1 <= quality <= 4
|
||||
status, resp_json = await self._request_file_url(item_id, quality, self.secret)
|
||||
status, resp_json = await self._request_file_url(item, quality, self.secret)
|
||||
assert status == 200
|
||||
stream_url = resp_json.get("url")
|
||||
|
||||
|
@ -319,9 +319,7 @@ class QobuzClient(Client):
|
|||
raise NonStreamableError
|
||||
|
||||
return BasicDownloadable(
|
||||
self.session,
|
||||
stream_url,
|
||||
"flac" if quality > 1 else "mp3",
|
||||
self.session, stream_url, "flac" if quality > 1 else "mp3", source="qobuz"
|
||||
)
|
||||
|
||||
async def _paginate(
|
||||
|
|
|
@ -59,7 +59,12 @@ class PendingAlbum(Pending):
|
|||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
meta = AlbumMetadata.from_album_resp(resp, self.client.source)
|
||||
except Exception as e:
|
||||
logger.error(f"Error building album metadata for {id=}: {e}")
|
||||
return None
|
||||
|
||||
if meta is None:
|
||||
logger.error(
|
||||
f"Album {self.id} not available to stream on {self.client.source}",
|
||||
|
|
|
@ -190,7 +190,14 @@ class PendingArtist(Pending):
|
|||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
meta = ArtistMetadata.from_resp(resp, self.client.source)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error building artist metadata: {e}",
|
||||
)
|
||||
return None
|
||||
|
||||
albums = [
|
||||
PendingAlbum(album_id, self.client, self.config, self.db)
|
||||
for album_id in meta.album_ids()
|
||||
|
|
|
@ -94,7 +94,11 @@ async def download_artwork(
|
|||
if len(downloadables) == 0:
|
||||
return embed_cover_path, saved_cover_path
|
||||
|
||||
try:
|
||||
await asyncio.gather(*downloadables)
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading artwork: {e}")
|
||||
return None, None
|
||||
|
||||
# Update `covers` to reflect the current download state
|
||||
if save_artwork:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from streamrip.exceptions import NonStreamableError
|
||||
|
||||
from ..client import Client
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
@ -8,6 +11,8 @@ from ..metadata import LabelMetadata
|
|||
from .album import PendingAlbum
|
||||
from .media import Media, Pending
|
||||
|
||||
logger = logging.getLogger("streamrip")
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Label(Media):
|
||||
|
@ -57,9 +62,17 @@ class PendingLabel(Pending):
|
|||
config: Config
|
||||
db: Database
|
||||
|
||||
async def resolve(self) -> Label:
|
||||
async def resolve(self) -> Label | None:
|
||||
try:
|
||||
resp = await self.client.get_metadata(self.id, "label")
|
||||
except NonStreamableError as e:
|
||||
logger.error(f"Error resolving Label: {e}")
|
||||
return None
|
||||
try:
|
||||
meta = LabelMetadata.from_resp(resp, self.client.source)
|
||||
except Exception as e:
|
||||
logger.error(f"Error resolving Label: {e}")
|
||||
return None
|
||||
albums = [
|
||||
PendingAlbum(album_id, self.client, self.config, self.db)
|
||||
for album_id in meta.album_ids()
|
||||
|
|
|
@ -155,7 +155,11 @@ class PendingPlaylist(Pending):
|
|||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
meta = PlaylistMetadata.from_resp(resp, self.client.source)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating playlist: {e}")
|
||||
return None
|
||||
name = meta.name
|
||||
parent = self.config.session.downloads.folder
|
||||
folder = os.path.join(parent, clean_filepath(name))
|
||||
|
|
|
@ -45,7 +45,32 @@ class Track(Media):
|
|||
await self.downloadable.size(),
|
||||
f"Track {self.meta.tracknumber}",
|
||||
) as callback:
|
||||
try:
|
||||
await self.downloadable.download(self.download_path, callback)
|
||||
retry = False
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error downloading track '{self.meta.title}', retrying: {e}"
|
||||
)
|
||||
retry = True
|
||||
|
||||
if not retry:
|
||||
return
|
||||
|
||||
with get_progress_callback(
|
||||
self.config.session.cli.progress_bars,
|
||||
await self.downloadable.size(),
|
||||
f"Track {self.meta.tracknumber} (retry)",
|
||||
) as callback:
|
||||
try:
|
||||
await self.downloadable.download(self.download_path, callback)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Persistent error downloading track '{self.meta.title}', skipping: {e}"
|
||||
)
|
||||
self.db.set_failed(
|
||||
self.downloadable.source, "track", self.meta.info.id
|
||||
)
|
||||
|
||||
async def postprocess(self):
|
||||
if self.is_single:
|
||||
|
@ -110,7 +135,12 @@ class PendingTrack(Pending):
|
|||
logger.error(f"Track {self.id} not available for stream on {source}: {e}")
|
||||
return None
|
||||
|
||||
try:
|
||||
meta = TrackMetadata.from_resp(self.album, source, resp)
|
||||
except Exception as e:
|
||||
logger.error(f"Error building track metadata for {id=}: {e}")
|
||||
return None
|
||||
|
||||
if meta is None:
|
||||
logger.error(f"Track {self.id} not available for stream on {source}")
|
||||
self.db.set_failed(source, "track", self.id)
|
||||
|
@ -154,7 +184,12 @@ class PendingSingle(Pending):
|
|||
logger.error(f"Error fetching track {self.id}: {e}")
|
||||
return None
|
||||
# Patch for soundcloud
|
||||
try:
|
||||
album = AlbumMetadata.from_track_resp(resp, self.client.source)
|
||||
except Exception as e:
|
||||
logger.error(f"Error building album metadata for track {id=}: {e}")
|
||||
return None
|
||||
|
||||
if album is None:
|
||||
self.db.set_failed(self.client.source, "track", self.id)
|
||||
logger.error(
|
||||
|
@ -162,7 +197,11 @@ class PendingSingle(Pending):
|
|||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
meta = TrackMetadata.from_resp(album, self.client.source, resp)
|
||||
except Exception as e:
|
||||
logger.error(f"Error building track metadata for track {id=}: {e}")
|
||||
return None
|
||||
|
||||
if meta is None:
|
||||
self.db.set_failed(self.client.source, "track", self.id)
|
||||
|
|
|
@ -5,9 +5,9 @@ import re
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from ..filepath_utils import clean_filename
|
||||
from .covers import Covers
|
||||
from .util import get_quality_id, safe_get, typed
|
||||
from ..filepath_utils import clean_filename
|
||||
|
||||
PHON_COPYRIGHT = "\u2117"
|
||||
COPYRIGHT = "\u00a9"
|
||||
|
@ -69,7 +69,7 @@ class AlbumMetadata:
|
|||
none_str = "Unknown"
|
||||
info: dict[str, str | int | float] = {
|
||||
"albumartist": clean_filename(self.albumartist),
|
||||
"albumcomposer": clean_filename(self.albumcomposer) or none_str,
|
||||
"albumcomposer": clean_filename(self.albumcomposer or "") or none_str,
|
||||
"bit_depth": self.info.bit_depth or none_str,
|
||||
"id": self.info.id,
|
||||
"sampling_rate": self.info.sampling_rate or none_str,
|
||||
|
@ -96,12 +96,12 @@ class AlbumMetadata:
|
|||
else:
|
||||
albumartist = typed(safe_get(resp, "artist", "name"), str)
|
||||
|
||||
albumcomposer = typed(safe_get(resp, "composer", "name"), str | None)
|
||||
albumcomposer = typed(safe_get(resp, "composer", "name", default=""), str)
|
||||
_label = resp.get("label")
|
||||
if isinstance(_label, dict):
|
||||
_label = _label["name"]
|
||||
label = typed(_label, str | None)
|
||||
description = typed(resp.get("description") or None, str | None)
|
||||
label = typed(_label or "", str)
|
||||
description = typed(resp.get("description", "") or None, str)
|
||||
disctotal = typed(
|
||||
max(
|
||||
track.get("media_number", 1)
|
||||
|
@ -115,8 +115,8 @@ class AlbumMetadata:
|
|||
# Non-embedded information
|
||||
cover_urls = Covers.from_qobuz(resp)
|
||||
|
||||
bit_depth = typed(resp.get("maximum_bit_depth"), int | None)
|
||||
sampling_rate = typed(resp.get("maximum_sampling_rate"), int | float | None)
|
||||
bit_depth = typed(resp.get("maximum_bit_depth", -1), int)
|
||||
sampling_rate = typed(resp.get("maximum_sampling_rate", -1.0), int | float)
|
||||
quality = get_quality_id(bit_depth, sampling_rate)
|
||||
# Make sure it is non-empty list
|
||||
booklets = typed(resp.get("goodies", None) or None, list | None)
|
||||
|
@ -227,14 +227,14 @@ class AlbumMetadata:
|
|||
safe_get(track, "publisher_metadata", "explicit", default=False),
|
||||
bool,
|
||||
)
|
||||
genre = typed(track["genre"], str | None)
|
||||
genre = typed(track.get("genre"), str | None)
|
||||
genres = [genre] if genre is not None else []
|
||||
artist = typed(safe_get(track, "publisher_metadata", "artist"), str | None)
|
||||
artist = artist or typed(track["user"]["username"], str)
|
||||
albumartist = artist
|
||||
date = typed(track["created_at"], str)
|
||||
date = typed(track.get("created_at"), str)
|
||||
year = date[:4]
|
||||
label = typed(track["label_name"], str | None)
|
||||
label = typed(track.get("label_name"), str | None)
|
||||
description = typed(track.get("description"), str | None)
|
||||
album_title = typed(
|
||||
safe_get(track, "publisher_metadata", "album_title"),
|
||||
|
@ -284,6 +284,7 @@ class AlbumMetadata:
|
|||
"""
|
||||
|
||||
Args:
|
||||
----
|
||||
resp: API response containing album metadata.
|
||||
|
||||
Returns: AlbumMetadata instance if the album is streamable, otherwise None.
|
||||
|
@ -300,12 +301,12 @@ class AlbumMetadata:
|
|||
# genre not returned by API
|
||||
date = typed(resp.get("releaseDate"), str)
|
||||
year = date[:4]
|
||||
_copyright = typed(resp.get("copyright"), str)
|
||||
_copyright = typed(resp.get("copyright", ""), str)
|
||||
|
||||
artists = typed(resp.get("artists", []), list)
|
||||
albumartist = ", ".join(a["name"] for a in artists)
|
||||
if not albumartist:
|
||||
albumartist = typed(safe_get(resp, "artist", "name"), str)
|
||||
albumartist = typed(safe_get(resp, "artist", "name", default=""), str)
|
||||
|
||||
disctotal = typed(resp.get("numberOfVolumes", 1), int)
|
||||
# label not returned by API
|
||||
|
@ -367,7 +368,7 @@ class AlbumMetadata:
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def from_tidal_playlist_track_resp(cls, resp) -> AlbumMetadata | None:
|
||||
def from_tidal_playlist_track_resp(cls, resp: dict) -> AlbumMetadata | None:
|
||||
album_resp = resp["album"]
|
||||
streamable = resp.get("allowStreaming", False)
|
||||
if not streamable:
|
||||
|
@ -383,11 +384,13 @@ class AlbumMetadata:
|
|||
else:
|
||||
year = "Unknown Year"
|
||||
|
||||
_copyright = typed(resp.get("copyright"), str)
|
||||
_copyright = typed(resp.get("copyright", ""), str)
|
||||
artists = typed(resp.get("artists", []), list)
|
||||
albumartist = ", ".join(a["name"] for a in artists)
|
||||
if not albumartist:
|
||||
albumartist = typed(safe_get(resp, "artist", "name"), str)
|
||||
albumartist = typed(
|
||||
safe_get(resp, "artist", "name", default="Unknown Albumbartist"), str
|
||||
)
|
||||
|
||||
disctotal = typed(resp.get("volumeNumber", 1), int)
|
||||
# label not returned by API
|
||||
|
|
|
@ -37,7 +37,7 @@ def get_soundcloud_id(resp: dict) -> str:
|
|||
def parse_soundcloud_id(item_id: str) -> tuple[str, str]:
|
||||
info = item_id.split("|")
|
||||
assert len(info) == 2
|
||||
return tuple(info)
|
||||
return (info[0], info[1])
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
|
Binary file not shown.
Loading…
Reference in a new issue