diff --git a/src/__main__.py b/src/__main__.py index 4dc57b1..6d84b9e 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -2,6 +2,7 @@ import logging import uuid +from contextlib import asynccontextmanager from nicegui import app, ui from starlette.requests import Request @@ -67,6 +68,12 @@ def build_main_content(self): .classes("mt-2") ) + self.get_songs = ( + ui.switch("Get individual songs (not just albums/eps)", value=False) + .props("color=#CB69C1") + .classes("mt-2") + ) + # ensure submit handler logs and receives session_id ui.button( "Submit", @@ -75,6 +82,28 @@ def build_main_content(self): "pink-btn w-full h-[50px] font-bold text-lg mt-4" ) + # attach update handler that logs every change once threshold is met + def _extract_value(ev): + # NiceGUI may pass the new value directly or an event-like object + try: + return ev.value # event object + except Exception: + return ev # raw value + + def _on_update(ev): + val = _extract_value(ev) + try: + if isinstance(val, str) and len(val.strip()) >= THRESHOLD: + # call decorated logger and propagate session id + try: + self.log_url_input(value=val, session_id=self.session_id) + except Exception: + logger.exception("Failed to log url input update") + except Exception: + logger.exception("Error handling url input update") + + self.url_input.on("update", _on_update) + @log_event("submit.click") async def handle_click(self, session_id: str | None = None): """Handle a download submission. session_id is passed to logging wrapper. @@ -85,11 +114,18 @@ async def handle_click(self, session_id: str | None = None): """ await process_submission( self.url_input, + self.get_songs.value, self.auto_dl.value, self.settings.audio_format.value, session_id=session_id, ) + @log_event("url_input.update") + def log_url_input(self, value: str, session_id: str | None = None): + """Log every URL input update (no debounce).""" + # return a small payload for structured logging + return {"url": value} + @log_event("page.view") def _log_page_view(session_id: str | None, user_agent: str, created: bool): @@ -144,7 +180,26 @@ def main_page(request: Request, response: Response) -> None: MusicApiApp(session_id=session_id) -app.on_shutdown(stop_logging) +@app.on_event("startup") +async def _startup_event(): + """Startup handler to configure logging and related resources. + + We keep failures from preventing the app from starting by logging exceptions. + """ + try: + setup_logging(LogDatabaseConnector()) + except Exception as e: + logging.getLogger("app").exception(f"Logging setup failed on startup: {e}") + + +@app.on_event("shutdown") +async def _shutdown_event(): + """Shutdown handler to stop logging and perform cleanup.""" + try: + stop_logging() + except Exception as e: + logging.getLogger("app").exception(f"Logging stop failed on shutdown: {e}") + if __name__ in {"__main__", "__mp_main__"}: setup_logging(LogDatabaseConnector()) diff --git a/src/download_handler_base.py b/src/download_handler_base.py index ca0c0af..83f3235 100644 --- a/src/download_handler_base.py +++ b/src/download_handler_base.py @@ -10,7 +10,8 @@ class DownloadHandlerBase(ABC): def download( self, url: str, - auto_download: bool, + get_songs: bool = False, # make optional default False + auto_download: bool = False, *args, session_id: str | None = None, **kwargs, @@ -21,6 +22,8 @@ def download( Args: url: The URL to download from. + get_songs: Whether to extract individual songs and not just eps and + albums when downloading from an artist URL. auto_download: Whether to mark the content for auto-download. *args: Positional arguments for the download method. session_id: Optional session identifier to propagate to downstream calls. diff --git a/src/ui/logic.py b/src/ui/logic.py index 5e90545..0b9973f 100644 --- a/src/ui/logic.py +++ b/src/ui/logic.py @@ -6,9 +6,23 @@ async def process_submission( - url_input, auto_download, audio_format, session_id: str | None = None + url_input, + get_songs, + auto_download, + audio_format, + session_id: str | None = None ): - """Logic for handling the URL submission.""" + """Logic for handling the URL submission. + + Args: + url_input: The input field containing the URL. + get_songs: Whether to fetch individual songs and not just eps and albums + when downloading from an artist URL. + auto_download: Whether to mark the artist for auto-download. + audio_format: The desired audio format for downloads. + session_id: The session ID for logging context. + + """ try: url = str(url_input.value).strip() if not url: @@ -24,6 +38,7 @@ async def process_submission( handler.download( url=url, + get_songs=get_songs, auto_download=auto_download, add_without_download=False, download_format=audio_format, diff --git a/src/youtube_handler/youtube_album_fetcher.py b/src/youtube_handler/youtube_album_fetcher.py index 68b1dc4..1431b98 100644 --- a/src/youtube_handler/youtube_album_fetcher.py +++ b/src/youtube_handler/youtube_album_fetcher.py @@ -12,17 +12,95 @@ class YoutubeAlbumFetcher: """A class to fetch album and song information from YouTube Music.""" + @staticmethod + def _collect_section_results(section: dict[str, Any]) -> list[dict[str, Any]]: + """Collect all items from a section, following continuation tokens when present. + + This handles several shapes returned by ytmusicapi: initial 'results' and + various continuation token structures. For each continuation token it calls + ytmusic.get_continuation(...) and extracts list-like results. + """ + items: list[dict[str, Any]] = [] + if not section: + return items + + # initial results + items.extend( + section.get( + "results", [] + ) if isinstance(section.get("results", []), list) else [] + ) + + continuations = section.get("continuations", []) or [] + if not continuations: + cont_token = None + for key in ("continuation", "nextContinuationData"): + if key in section: + value = section.get(key) + if isinstance(value, str): + cont_token = value + elif isinstance(value, dict): + cont_token = value.get("continuation") or value.get("token") + if cont_token: + continuations = [{"continuation": cont_token}] + break + + for cont in continuations: + token = cont.get( + "continuation" + ) or cont.get("token") or cont.get( + "nextContinuationData", {} + ).get("continuation") + if not token: + continue + try: + next_page = ytmusic.get_continuation(token) + except Exception: + break + + if isinstance(next_page, dict): + if "results" in next_page and isinstance( + next_page.get("results"), list + ): + items.extend(next_page.get("results", [])) + continue + + # nested containers + for container_key in ( + "albums", "singles", "continuationContents", "contents" + ): + container = next_page.get(container_key) + if isinstance(container, dict) and isinstance( + container.get("results" + ), list): + items.extend(container.get("results", [])) + break + if isinstance(container, list): + items.extend(container) + break + else: + # fallback: take first list value we find + for v in next_page.values(): + if isinstance(v, list): + items.extend(v) + break + return items + @staticmethod @log_event("youtube.get_album_ids") - def get_album_ids(artist_url: str, session_id: str | None = None) -> list[str]: - """Fetch album URLs for a given YouTube Music artist channel URL. + def get_album_ids(artist_url: str, + get_songs: bool = False, + session_id: str | None = None) -> list[str]: + """Fetch album URLs from a YouTube Music artist channel URL. Args: - artist_url: The YouTube Music channel URL. - session_id: An optional session ID for logging correlation. + artist_url: The YouTube Music channel URL of the artist. + get_songs: Whether to include songs that are not part of an album or EP. + session_id: Optional session ID for logging. Returns: - A list of YouTube Music playlist URLs for the artist's albums. + A list of YouTube Music playlist URLs for the artist's albums and + optionally EPs/s that are not part of albums, depending on get_songs. """ artist_id = YoutubeAlbumFetcher._get_id_by_url(artist_url) @@ -30,24 +108,26 @@ def get_album_ids(artist_url: str, session_id: str | None = None) -> list[str]: artist_id, session_id=session_id ) album_ids = YoutubeAlbumFetcher._get_albums( - artist_details, session_id=session_id + artist_details, + channel_id=artist_id, + get_songs=get_songs, + session_id=session_id, ) - return [YoutubeAlbumFetcher._get_album_url(id) for id in album_ids] + return [YoutubeAlbumFetcher._get_album_url(aid) for aid in album_ids] @staticmethod def _get_id_by_url(url: str) -> str: - """Extract the YouTube Music artist ID from a channel URL. + """Extract the YouTube channel ID from a given YouTube Music channel URL. Args: url: The YouTube Music channel URL. Returns: - The YouTube Music artist ID. + The extracted YouTube channel ID. """ if not url or "channel/" not in url: raise ValueError(f"Invalid YouTube Music channel URL: {url}") - id_side = url.split("channel/")[1] return id_side.split("/")[0] @@ -56,10 +136,11 @@ def _get_album_url(playlist_id: str) -> str: """Construct a YouTube Music playlist URL from a playlist ID. Args: - playlist_id: The YouTube Music playlist ID. + playlist_id: The YouTube Music playlist ID of the album/EP. Returns: - The full YouTube Music playlist URL. + The full YouTube Music URL for the album/EP playlist. + """ if not playlist_id: raise ValueError("Playlist ID cannot be empty") @@ -68,44 +149,84 @@ def _get_album_url(playlist_id: str) -> str: @staticmethod @log_event("youtube._get_albums") def _get_albums( - artist_details: dict, get_eps: bool = True, session_id: str | None = None + artist_details: dict, + channel_id: str, + get_eps: bool = True, + get_songs: bool = False, + session_id: str | None = None ) -> list[str]: - """Extract album IDs from artist details. + """Fetch album IDs from artist details. Args: - artist_details: A dictionary containing artist details. - get_eps: Whether to include EPs in the album list. - session_id: An optional session ID for logging correlation. + artist_details: The details dictionary of the artist, as returned + by ytmusic.get_artist + channel_id: The YouTube channel ID of the artist, used for API calls + and fallback scanning. + get_eps: Whether to include EPs/singles in the results. + get_songs: Whether to include individual songs that are not part of + albums/EPs in the results. + session_id: Optional session ID for logging. Returns: - A list of album IDs. - - Raises: - ValueError: If no album details or IDs are found. + A list of album playlist IDs and optionally EP playlist IDs and + song video IDs. """ - album_ids = [] - - album_dict = artist_details.get("albums", {}) - if not album_dict: - raise ValueError("No album details found") - albums = album_dict.get("results", []) - - if not albums: + album_ids: list[str] = [] + + albums_list: list[dict[str, Any]] = [] + params = ytmusic.get_artist(channel_id) + try: + albums_list = ytmusic.get_artist_albums( + channel_id, limit=None, params=params, + ) or [] + except Exception: + albums_list = [] + + if not albums_list or len(albums_list) < 50: + album_section = artist_details.get("albums", {}) or {} + collected = YoutubeAlbumFetcher._collect_section_results(album_section) + if collected: + seen = set() + merged: list[dict[str, Any]] = [] + for a in collected + albums_list: + pid = a.get( + "audioPlaylistId" + ) or a.get("browseId") or a.get("playlistId") + if pid and pid not in seen: + seen.add(pid) + merged.append(a) + albums_list = merged + + if not albums_list: raise ValueError("No album details found") - for album in albums: - id = album.get("audioPlaylistId") - if not id: - raise ValueError(f"No album id found for: {album}") - album_ids.append(id) + for album in albums_list: + playlist_id = album.get( + "audioPlaylistId" + ) or album.get("browseId") or album.get("playlistId") + if not playlist_id: + continue + album_ids.append(playlist_id) if get_eps: album_ids.extend( - YoutubeAlbumFetcher.get_eps(artist_details, session_id=session_id) + YoutubeAlbumFetcher.get_eps_and_songs( + artist_details, + channel_id=channel_id, + get_songs=get_songs, + session_id=session_id, + ) ) - return album_ids + seen = set() + deduped: list[str] = [] + for aid in album_ids: + if aid and aid not in seen: + seen.add(aid) + deduped.append(aid) + + return deduped @staticmethod @log_event("youtube._get_artist_details") @@ -115,67 +236,111 @@ def _get_artist_details( """Fetch artist details from YouTube Music by artist ID. Args: - artist_id: The YouTube Music artist ID. - session_id: An optional session ID for logging correlation. + artist_id: The YouTube Music artist ID (channel ID). + session_id: Optional session ID for logging. Returns: - A dictionary containing artist details. + A dictionary containing the artist details as returned by + ytmusic.get_artist. + """ return ytmusic.get_artist(artist_id) @staticmethod @log_event("youtube.get_album_songs") def get_album_songs(playlist_id: str, session_id: str | None = None) -> list[str]: - """Fetch song URLs from a YouTube Music playlist ID. + """Fetch song URLs from a given album/playlist ID. Args: - playlist_id: The YouTube Music playlist ID. - session_id: An optional session ID for logging correlation. + playlist_id: The YouTube Music playlist ID of the album/EP. + session_id: Optional session ID for logging. Returns: - A list of song URLs in the playlist. + A list of YouTube Music song URLs contained in the album/EP. """ playlist = ytmusic.get_playlist(playlist_id, limit=None) - tracks = playlist.get("tracks", []) + tracks = playlist.get("tracks", []) if playlist else [] - songs = [] + songs: list[str] = [] for track in tracks: video_id = track.get("videoId") if video_id: - song_url = ( - f"https://music.youtube.com/watch?v={video_id}&list={playlist_id}" - ) + song_url = ("https://music.youtube.com/" + f"watch?v={video_id}&list={playlist_id}") songs.append(song_url) return songs @staticmethod @log_event("youtube.get_eps") - def get_eps(artist_details: dict, session_id: str | None = None) -> list[str | Any]: - """Fetch EPs for a given artist ID from YouTube Music. + def get_eps_and_songs( + artist_details: dict, + get_songs: bool = False, + channel_id: str = "", + session_id: str | None = None + ) -> list[str]: + """Fetch EPs (and optionally songs) from artist details. Args: - artist_details: A dictionary containing artist details. - session_id: An optional session ID for logging correlation. + artist_details: The details dictionary of the artist, as returned + by ytmusic.get_artist + get_songs: Whether to include individual songs that are not part + of albums/EPs. + channel_id: The YouTube channel ID of the artist, used for fallback + scanning if 'singles' section is not present in artist_details. + session_id: Optional session ID for logging. Returns: - A list of EP playlist IDs. + A list of EP playlist IDs and optionally song video IDs. """ - releases = artist_details.get("singles", {}).get("results", []) - - eps = [] + eps: list[str] = [] + seen_ids = set() + + # Use continuation-aware collector for 'singles' section first + singles_section = artist_details.get("singles", {}) or {} + releases = YoutubeAlbumFetcher._collect_section_results(singles_section) + + # If no singles found in artist_details, scan full albums for singles/EPs + if not releases: + try: + all_albums = ytmusic.get_artist_albums(channel_id, limit=None) or [] + except Exception: + all_albums = [] + releases = [ + a for a in all_albums + if (str(a.get( + "type", "" + )).lower() in ("single", "ep") or a.get("isSingle") is True) + ] for item in releases: - playlist_id = item.get("browseId") - if not playlist_id: + playlist_id = item.get("browseId") or item.get( + "audioPlaylistId" + ) or item.get("playlistId") + if not playlist_id or playlist_id in seen_ids: + continue + + try: + album_details = ytmusic.get_album(playlist_id) + except Exception: continue - album_details = ytmusic.get_album(playlist_id) track_count = len(album_details.get("tracks", [])) - if track_count <= 1: + if track_count <= 1 and not get_songs: continue - eps.append(album_details.get("audioPlaylistId")) + + audio_playlist_id = album_details.get("audioPlaylistId") or playlist_id + if audio_playlist_id: + eps.append(audio_playlist_id) + seen_ids.add(playlist_id) + + if get_songs: + for t in album_details.get("tracks", []): + vid = t.get("videoId") + if vid and vid not in seen_ids: + eps.append(vid) + seen_ids.add(vid) return eps diff --git a/src/youtube_handler/youtube_download_handler.py b/src/youtube_handler/youtube_download_handler.py index 969cb4e..e397631 100644 --- a/src/youtube_handler/youtube_download_handler.py +++ b/src/youtube_handler/youtube_download_handler.py @@ -81,6 +81,7 @@ def __init__(self, db_connector: DatabaseConnector) -> None: def download( self, url: str, + get_songs: bool, auto_download: bool = False, download_format: str = "mp3", quality: str = "Best", @@ -91,6 +92,8 @@ def download( Args: url: The YouTube URL to download from. + get_songs: Whether to include songs that are not part of an album or EP + when downloading from a channel URL. Default is False. auto_download: Whether to mark an artist for auto-download. This is only applicable if the URL is a channel URL. Default is False. download_format: The desired download_format for the download. @@ -109,8 +112,10 @@ def download( self._handle_channel_url( url, auto_download, + get_songs=get_songs, quality=quality, download_format=download_format, + add_without_download=add_without_download, session_id=session_id, ) elif "playlist" in url or "watch?v=" in url: @@ -129,6 +134,7 @@ def _handle_channel_url( self, channel_url: str, auto_download: bool, + get_songs: bool, quality: str, download_format: str, add_without_download: bool = False, @@ -139,6 +145,7 @@ def _handle_channel_url( Args: channel_url: The YouTube channel URL. auto_download: Whether to mark the artist for auto-download. + get_songs: Whether to include songs that are not part of an album or EP quality: The desired quality for the downloads. download_format: The desired download_format for the downloads. add_without_download: If True, will add albums to the database @@ -148,7 +155,9 @@ def _handle_channel_url( """ album_urls = YoutubeAlbumFetcher.get_album_ids( - channel_url, session_id=session_id + channel_url, + get_songs=get_songs, + session_id=session_id ) self.db_connector.add_artist(channel_url, auto_download=auto_download) for album_url in album_urls: diff --git a/tests/youtube_handler/test_youtube_album_fetcher.py b/tests/youtube_handler/test_youtube_album_fetcher.py index 1f4e46e..456cca8 100644 --- a/tests/youtube_handler/test_youtube_album_fetcher.py +++ b/tests/youtube_handler/test_youtube_album_fetcher.py @@ -257,3 +257,19 @@ def _mock_get_album(playlist_id): assert result == expected_eps mock_ytmusic.get_album.assert_any_call("EP_ID_1") mock_ytmusic.get_album.assert_any_call("EP_ID_2") + + +@patch("src.youtube_handler.youtube_album_fetcher.ytmusic") +def test_get_albums_more_than_10(mock_ytmusic): + """Ensure _get_albums returns all albums when the API returns >10 items.""" + # simulate ytmusic returning 15 albums (previously code only used first page/10) + mock_ytmusic.get_artist_albums.return_value = [ + {"audioPlaylistId": f"ALBUM_ID_{i}"} for i in range(15) + ] + + # call _get_albums; rely on ytmusic path so artist_details may be empty + result = YoutubeAlbumFetcher._get_albums({}, get_eps=False) + + assert len(result) == 15 + assert result[0] == "ALBUM_ID_0" + assert result[-1] == "ALBUM_ID_14" diff --git a/tests/youtube_handler/test_youtube_download_handler.py b/tests/youtube_handler/test_youtube_download_handler.py index e814cb0..61c23d7 100644 --- a/tests/youtube_handler/test_youtube_download_handler.py +++ b/tests/youtube_handler/test_youtube_download_handler.py @@ -216,3 +216,73 @@ def test_get_warning_no_warning(mock_me_tube_connector, mock_db_connector): result = handler.get_warning(url) assert result is None + + +@patch("src.youtube_handler.youtube_download_handler.DatabaseConnector") +@patch("src.youtube_handler.youtube_download_handler.MeTubeConnector") +@patch( + "src.youtube_handler.youtube_download_handler." + "YoutubeDownloadHandler._handle_channel_url" +) +def test_download_channel_url_passes_get_songs_flag( + mock_handle_channel_url, + mock_me_tube_connector, + mock_db_connector, +): + """Ensure the get_songs flag is forwarded into _handle_channel_url via download.""" + handler = YoutubeDownloadHandler(db_connector=mock_db_connector) + url = "https://www.youtube.com/channel/CHANNEL_ID" + + handler.download( + url, + get_songs=True, + auto_download=False, + quality="Best", + download_format="mp3", + ) + + mock_handle_channel_url.assert_called_once_with( + url, + False, + get_songs=True, + quality="Best", + download_format="mp3", + add_without_download=False, + session_id=None, + ) + + +@pytest.mark.parametrize("num_albums", [12, 20]) +@patch("src.youtube_handler.youtube_download_handler.DatabaseConnector") +@patch("src.youtube_handler.youtube_download_handler.MeTubeConnector") +@patch("src.youtube_handler.youtube_download_handler.YoutubeAlbumFetcher") +def test_handle_channel_url_many_albums( + mock_youtube_album_fetcher, + mock_me_tube_connector, + mock_db_connector, + num_albums, +): + """When many album URLs are returned, queue_download are called for each.""" + album_urls = [ + f"https://example.com/playlist?list=ALBUM_ID_{i}" for i in range(num_albums) + ] + mock_youtube_album_fetcher.get_album_ids.return_value = album_urls + mock_youtube_album_fetcher.get_album_songs.return_value = [] + + handler = YoutubeDownloadHandler(db_connector=mock_db_connector) + handler._handle_channel_url( + "https://www.example.com/channel/CHANNEL_ID", + auto_download=False, + get_songs=False, + quality="High", + download_format="mp4", + ) + + # queue_download should be called once per album + assert mock_me_tube_connector().queue_download.call_count == num_albums + + # add_album should be called once per album + assert mock_db_connector.add_album.call_count == num_albums + + # album_songs were empty so add_song should not be called + assert mock_db_connector.add_song.call_count == 0