#!/usr/bin/env python3 from logging import Logger, basicConfig, getLogger from os import environ from os.path import abspath, basename, dirname, exists, isdir, join, normpath, splitext from re import IGNORECASE from re import compile as re_compile from re import search from subprocess import CalledProcessError from subprocess import run as subprocess_run from typing import Any, Dict, List, Optional, Tuple, Union from requests import get as requests_get from requests import post as requests_post class JimakuDownloader: """ Main class for downloading subtitles from Jimaku using the AniList API. This class provides functionality to search for, select, and download subtitles for anime media files or directories. """ # API endpoints ANILIST_API_URL = "https://graphql.anilist.co" JIMAKU_SEARCH_URL = "https://jimaku.cc/api/entries/search" JIMAKU_FILES_BASE = "https://jimaku.cc/api/entries" def __init__(self, api_token: Optional[str] = None, log_level: str = "INFO"): """ Initialize the JimakuDownloader with API token and logging configuration. Parameters ---------- api_token : str, optional Jimaku API token for authentication. If None, will try to get from JIMAKU_API_TOKEN env var log_level : str, default="INFO" Logging level to use (DEBUG, INFO, WARNING, ERROR, CRITICAL) """ # Set up logging self.logger = self._setup_logging(log_level) # Initialize API token self.api_token = api_token or environ.get("JIMAKU_API_TOKEN", "") if not self.api_token: self.logger.warning( "No API token provided. Will need to be set before downloading." ) def _setup_logging(self, log_level: str) -> Logger: """ Configure logging with the specified level. Parameters ---------- log_level : str The desired log level (e.g. "INFO", "DEBUG", etc.) Returns ------- logger : logging.Logger Configured logger instance """ import logging numeric_level = getattr(logging, log_level.upper(), None) if not isinstance(numeric_level, int): raise ValueError(f"Invalid log level: {log_level}") basicConfig( level=numeric_level, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) return getLogger(__name__) def is_directory_input(self, path: str) -> bool: """ Check if the input path is a directory. Parameters ---------- path : str Path to check Returns ------- bool True if the path is a directory, False otherwise """ return isdir(path) def parse_filename(self, filename: str) -> Tuple[str, int, int]: """ Extract show title, season, and episode number from the filename. Parameters ---------- filename : str The filename to parse Returns ------- tuple (title, season, episode) where: - title (str): Show title - season (int): Season number - episode (int): Episode number """ match = search(r"(.+?)[. _-]+[Ss](\d+)[Ee](\d+)", filename) if match: title = match.group(1).replace(".", " ").strip() season = int(match.group(2)) episode = int(match.group(3)) return title, season, episode else: self.logger.warning("Could not parse filename automatically.") title = input( "Could not parse media title. Please enter show title: " ).strip() try: season = int( input("Enter season number (or 0 if not applicable): ").strip() or "0" ) episode = int( input("Enter episode number (or 0 if not applicable): ").strip() or "0" ) except ValueError: self.logger.error("Invalid input.") raise ValueError("Invalid season or episode number") return title, season, episode def parse_directory_name(self, dirname: str) -> Tuple[bool, str, int, int]: """ Extract show title from the directory name. Parameters ---------- dirname : str The directory name to parse Returns ------- tuple (success, title, season, episode) where: - success (bool): Whether a title could be extracted - title (str): Show title extracted from directory name - season (int): Defaults to 1 - episode (int): Defaults to 0 (indicating all episodes) """ # Clean up the directory name to use as the title title = basename(dirname.rstrip("/")) # Skip empty titles or obviously non-anime directories if not title or title in [".", "..", "/"]: self.logger.debug(f"Directory name '{title}' is not usable") return False, "", 1, 0 # Skip common system directories common_dirs = [ "bin", "etc", "lib", "home", "usr", "var", "tmp", "opt", "media", "mnt", ] if title.lower() in common_dirs: self.logger.debug( f"Directory name '{title}' is a common system directory, skipping" ) return False, "", 1, 0 title = title.replace("_", " ").replace(".", " ").strip() # Check if the title seems usable (at least 3 characters) if len(title) < 3: self.logger.debug( f"Directory name '{title}' too short, likely not a show title" ) return False, "", 1, 0 self.logger.debug(f"Parsed title from directory name: {title}") # For directories, assume season 1 and episode 0 (indicating all episodes) return True, title, 1, 0 def find_anime_title_in_path(self, path: str) -> Tuple[str, int, int]: """ Recursively search for an anime title in the path, trying parent directories if necessary. Parameters ---------- path : str Starting directory path Returns ------- tuple (title, season, episode) - anime title and defaults for season and episode Raises ------ ValueError If no suitable directory name is found up to root """ original_path = path path = abspath(path) while path and path != "/": success, title, season, episode = self.parse_directory_name(path) if success: self.logger.debug(f"Found anime title '{title}' from directory: {path}") return title, season, episode # Try parent directory self.logger.debug(f"No anime title in '{path}', trying parent directory") parent_path = dirname(path) # Check if we're stuck (parent is same as current) if parent_path == path: break path = parent_path # If we get here, we've reached root without finding a suitable title self.logger.error( f"Could not extract anime title from directory path: {original_path}" ) self.logger.error("Please specify a directory with a recognizable anime name") raise ValueError(f"Could not find anime title in path: {original_path}") def load_cached_anilist_id(self, directory: str) -> Optional[int]: """ Look for a file named '.anilist.id' in the given directory and return the AniList ID. Parameters ---------- directory : str Path to the directory to search for cache file Returns ------- int or None The cached AniList ID if found and valid, None otherwise """ cache_path = join(directory, ".anilist.id") if exists(cache_path): try: with open(cache_path, "r", encoding="UTF-8") as f: return int(f.read().strip()) except Exception: self.logger.warning("Failed to read cached AniList ID.") return None return None def save_anilist_id(self, directory: str, anilist_id: int) -> None: """ Save the AniList ID to a file named '.anilist.id' in the given directory. Parameters ---------- directory : str Path to the directory where the cache file should be saved anilist_id : int The AniList ID to cache Returns ------- None """ cache_path = join(directory, ".anilist.id") try: with open(cache_path, "w") as f: f.write(str(anilist_id)) except Exception as e: self.logger.warning(f"Could not save AniList cache file: {e}") def query_anilist(self, title: str) -> int: """ Query AniList's GraphQL API for the given title and return its media ID. Parameters ---------- title : str The anime title to search for Returns ------- int The AniList media ID for the title Raises ------ ValueError If no media is found or an error occurs with the API """ query = """ query ($search: String) { Media(search: $search, type: ANIME) { id title { romaji english native } } } """ variables = {"search": title} try: self.logger.debug(f"Sending AniList query for title: {title}") response = requests_post( self.ANILIST_API_URL, json={"query": query, "variables": variables} ) response.raise_for_status() data = response.json() self.logger.debug(f"AniList response: {data}") media = data.get("data", {}).get("Media") if media: return media.get("id") else: self.logger.error("AniList: No media found for title.") raise ValueError(f"No media found on AniList for title: {title}") except Exception as e: self.logger.error(f"Error querying AniList: {e}") raise ValueError(f"Error querying AniList API: {str(e)}") def query_jimaku_entries(self, anilist_id: int) -> List[Dict[str, Any]]: """ Query the Jimaku API to list available subtitle entries. Parameters ---------- anilist_id : int The AniList ID of the anime Returns ------- list List of entry dictionaries containing subtitle metadata Raises ------ ValueError If no entries are found or an error occurs with the API """ if not self.api_token: raise ValueError( "API token is required. Set it in the constructor or JIMAKU_API_TOKEN env var." ) params = {"anilist_id": anilist_id} headers = { "Authorization": f"{self.api_token}", "Accept": "application/json", "Content-Type": "application/json", } try: self.logger.debug(f"Querying Jimaku entries for AniList ID: {anilist_id}") response = requests_get( self.JIMAKU_SEARCH_URL, params=params, headers=headers ) response.raise_for_status() results = response.json() self.logger.debug(f"Jimaku search response: {results}") if not results: self.logger.error("No subtitle entries found on Jimaku for this media.") raise ValueError( f"No subtitle entries found for AniList ID: {anilist_id}" ) return results except Exception as e: self.logger.error(f"Error querying Jimaku API: {e}") raise ValueError(f"Error querying Jimaku API: {str(e)}") def get_entry_files(self, entry_id: Union[str, int]) -> List[Dict[str, Any]]: """ Retrieve file information for a given entry ID. Parameters ---------- entry_id : str or int The Jimaku entry ID to retrieve files for Returns ------- list List of file info dictionaries Raises ------ ValueError If no files are found or an error occurs with the API """ if not self.api_token: raise ValueError( "API token is required. Set it in the constructor or JIMAKU_API_TOKEN env var." ) url = f"{self.JIMAKU_FILES_BASE}/{entry_id}/files" headers = { "Authorization": f"{self.api_token}", "Accept": "application/json", "Content-Type": "application/json", } try: self.logger.debug(f"Querying files for entry ID: {entry_id}") response = requests_get(url, headers=headers) response.raise_for_status() files = response.json() self.logger.debug(f"Entry files response: {files}") if not files: self.logger.error("No files found for the selected entry.") raise ValueError(f"No files found for entry ID: {entry_id}") return files except Exception as e: self.logger.error(f"Error querying files for entry {entry_id}: {e}") raise ValueError(f"Error retrieving files: {str(e)}") def filter_files_by_episode( self, files: List[Dict[str, Any]], target_episode: int ) -> List[Dict[str, Any]]: """ Filter subtitle files to only include those matching the target episode. Parameters ---------- files : list List of file info dictionaries target_episode : int Episode number to filter by Returns ------- list Filtered list of file info dictionaries matching the target episode, or all files if no matches are found """ filtered_files = [] # More flexible episode pattern that can detect various formats: # - E01, e01, Ep01, EP01, episode01 # - Just the number: 01, 1 # - With separators: - 01, _01, .01 # Using word boundaries to avoid matching random numbers episode_patterns = [ # Standard episode markers re_compile(r"[Ee](?:p(?:isode)?)?[ ._-]*(\d+)", IGNORECASE), # Just the number with word boundary or separator before it re_compile(r"(?:^|\s|[._-])(\d+)(?:\s|$|[._-])", IGNORECASE), # Number with hash re_compile(r"#(\d+)", IGNORECASE), ] # Check for keywords that indicate a file covers all episodes all_episodes_keywords = ["all", "batch", "complete", "season", "full"] for file_info in files: filename = file_info.get("name", "").lower() matched = False # Try each pattern to find episode number for pattern in episode_patterns: matches = pattern.findall(filename) for match in matches: try: file_episode = int(match) if file_episode == target_episode: filtered_files.append(file_info) self.logger.debug( f"Matched episode {target_episode} in: {filename}" ) matched = True break except (ValueError, TypeError): continue if matched: break # If we didn't find a match but it might be a batch file if not matched: # Check if it seems to be a batch file that would include our episode might_include_episode = any( keyword in filename for keyword in all_episodes_keywords ) if might_include_episode: self.logger.debug( f"Might include episode {target_episode} (batch): {filename}" ) filtered_files.append(file_info) if filtered_files: self.logger.info( f"Found {len(filtered_files)} files matching episode {target_episode}" ) return filtered_files else: # If no matches found, return all files to avoid empty selection self.logger.warning( f"No files specifically matched episode {target_episode}, showing all options" ) return files def fzf_menu( self, options: List[str], multi: bool = False ) -> Union[str, List[str], None]: """ Launch fzf with the provided options for selection. Parameters ---------- options : list List of strings to present as options multi : bool, optional Whether to enable multi-select mode (default: False) Returns ------- str or list or None If multi=False: Selected option string or None if cancelled If multi=True: List of selected option strings or empty list if cancelled """ try: fzf_args = ["fzf", "--height=40%", "--border"] if multi: fzf_args.append("--multi") self.logger.debug("Launching fzf multi-selection menu") else: self.logger.debug("Launching fzf single selection menu") proc = subprocess_run( fzf_args, input="\n".join(options), text=True, capture_output=True, check=True, ) if multi: return [ line.strip() for line in proc.stdout.strip().split("\n") if line.strip() ] else: return proc.stdout.strip() except CalledProcessError: self.logger.warning("User cancelled fzf selection") return [] if multi else None def download_file(self, url: str, dest_path: str) -> str: """ Download the file from the given URL and save it to dest_path. Parameters ---------- url : str URL to download the file from dest_path : str Path where the file should be saved Returns ------- str Path where the file was saved Raises ------ ValueError If an error occurs during download """ try: self.logger.debug(f"Downloading file from: {url}") response = requests_get(url, stream=True) response.raise_for_status() with open(dest_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) self.logger.debug(f"File saved to: {dest_path}") return dest_path except Exception as e: self.logger.error(f"Error downloading subtitle file: {e}") raise ValueError(f"Error downloading file: {str(e)}") def download_subtitles( self, media_path: str, dest_dir: Optional[str] = None, play: bool = False ) -> List[str]: """ Download subtitles for the given media path. This is the main entry point method that orchestrates the entire download process. Parameters ---------- media_path : str Path to the media file or directory dest_dir : str, optional Directory to save downloaded subtitles (default: same directory as media) play : bool, default=False Whether to launch MPV with the subtitles after download Returns ------- list List of paths to downloaded subtitle files Raises ------ ValueError If media path doesn't exist or other errors occur """ if not exists(media_path): raise ValueError(f"Path '{media_path}' does not exist") self.logger.info("Starting subtitle search and download process") # Check if input is a file or directory is_directory = self.is_directory_input(media_path) self.logger.info( f"Processing {'directory' if is_directory else 'file'}: {media_path}" ) # Set destination directory if dest_dir: dest_dir = dest_dir else: if is_directory: dest_dir = media_path else: dest_dir = dirname(abspath(media_path)) self.logger.debug(f"Destination directory: {dest_dir}") # Parse media information based on input type if is_directory: title, season, episode = self.find_anime_title_in_path(media_path) media_dir = media_path media_file = None # No specific file for directory input self.logger.debug( f"Found anime title '{title}' but will save subtitles to: {dest_dir}" ) else: base_filename = basename(media_path) title, season, episode = self.parse_filename(base_filename) media_dir = dirname(abspath(media_path)) media_file = media_path self.logger.info( f"Identified show: {title}, Season: {season}, Episode: {episode}" ) # Get AniList ID (either from cache or by querying) anilist_id = self.load_cached_anilist_id(media_dir) if not anilist_id: self.logger.info("Querying AniList for media ID...") anilist_id = self.query_anilist(title) self.logger.info(f"AniList ID for '{title}' is {anilist_id}") self.save_anilist_id(media_dir, anilist_id) else: self.logger.info(f"Using cached AniList ID: {anilist_id}") # Query Jimaku for available subtitle entries self.logger.info("Querying Jimaku for subtitle entries...") entries = self.query_jimaku_entries(anilist_id) # Present entries in fzf for selection entry_options = [] entry_mapping = {} for i, entry in enumerate(entries, start=1): opt = f"{i}. {entry.get('english_name', 'No Eng Name')} - {entry.get('japanese_name', 'None')}" entry_options.append(opt) entry_mapping[opt] = entry # Sort entry options alphabetically entry_options.sort() self.logger.info("Select a subtitle entry using fzf:") selected_entry_option = self.fzf_menu( entry_options, multi=False ) # Always single selection for entries if not selected_entry_option or selected_entry_option not in entry_mapping: raise ValueError("No valid entry selected") selected_entry = entry_mapping[selected_entry_option] entry_id = selected_entry.get("id") if not entry_id: raise ValueError("Selected entry does not have a valid ID") # Retrieve the files for the selected entry self.logger.info(f"Retrieving files for entry ID: {entry_id}") files = self.get_entry_files(entry_id) # For file input: filter files by episode if not is_directory and episode > 0: self.logger.info(f"Filtering subtitle files for episode {episode}") files = self.filter_files_by_episode(files, episode) # Present available subtitle files for selection file_options = [] file_mapping = {} for i, file_info in enumerate(files, start=1): display = f"{i}. {file_info.get('name', 'Unknown')}" file_options.append(display) file_mapping[display] = file_info # Sort the file options alphabetically for better readability file_options.sort() # Use multi-select mode only for directory input self.logger.info( f"Select {'one or more' if is_directory else 'one'} subtitle file(s):" ) selected_files = self.fzf_menu(file_options, multi=is_directory) # Handle the different return types based on multi or single selection if is_directory: # multi-select mode if not selected_files: raise ValueError("No subtitle files selected") selected_files_list = selected_files # already a list else: # single-select mode if not selected_files: raise ValueError("No subtitle file selected") selected_files_list = [ selected_files ] # convert to list for consistent processing # Download each selected subtitle file downloaded_files = [] for opt in selected_files_list: file_info = file_mapping.get(opt) if not file_info: self.logger.warning(f"Could not find mapping for selected file: {opt}") continue download_url = file_info.get("url") if not download_url: self.logger.warning( f"File option '{opt}' does not have a download URL. Skipping." ) continue # Use provided filename if available; otherwise, default to base video name + suffix. filename = file_info.get("name") if not filename: if is_directory: # For directory input, use the file's own name or ID filename = f"{file_info.get('name', 'subtitle.srt')}" dest_path = join(dest_dir, filename) self.logger.info(f"Downloading '{opt}' to {dest_path}...") self.download_file(download_url, dest_path) downloaded_files.append(dest_path) self.logger.info(f"Subtitle saved to: {dest_path}") # Optionally, launch MPV with the video file and the downloaded subtitles if play and not is_directory: self.logger.info("Launching MPV with the subtitle files...") mpv_cmd = ["mpv", media_file] mpv_cmd.extend([f"--sub-file={filename}"]) try: self.logger.debug(f"Running command: {' '.join(mpv_cmd)}") subprocess_run(mpv_cmd) except FileNotFoundError: self.logger.error( "MPV not found. Please install MPV and ensure it is in your PATH." ) elif play and is_directory: self.logger.warning( "Cannot play media with MPV when input is a directory. Skipping playback." ) self.logger.info("Subtitle download process completed successfully") return downloaded_files