diff --git a/README.md b/README.md index f326d88..70d9625 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ SubMiner is an Electron overlay that sits on top of mpv. It turns your video pla - **Dictionary lookups** — Yomitan popups on subtitles with hover or full keyboard-driven navigation; hover-aware auto-pause keeps playback in sync - **One-key mining** — Creates Anki cards with sentence, audio, screenshot, and AI-powered translation - **Reading annotations** — N+1 targeting, frequency highlighting, and JLPT underlining while you watch -- **Subtitle tools** — Jimaku downloads, alass/ffsubsync sync, and whisper.cpp transcription for YouTube with optional AI cleanup +- **Subtitle tools** — Jimaku downloads, alass/ffsubsync sync, and YouTube subtitle generation via manual-track reuse plus whisper.cpp fallback with optional AI cleanup - **Texthooker** — Built-in texthooker page and annotated websocket API for external clients - **Immersion tracking** — SQLite-powered stats on watch time and mining activity - **Integrations** — Jellyfin remote playback, AniList episode progress, and AnkiConnect auto-enrichment diff --git a/changes/youtube-single-flow.md b/changes/youtube-single-flow.md new file mode 100644 index 0000000..22d5096 --- /dev/null +++ b/changes/youtube-single-flow.md @@ -0,0 +1,4 @@ +type: changed +area: launcher + +- Removed the YouTube subtitle generation mode switch so YouTube playback always preloads subtitles before mpv starts. diff --git a/config.example.jsonc b/config.example.jsonc index 34654c7..d831e9c 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -5,7 +5,6 @@ * Copy to $XDG_CONFIG_HOME/SubMiner/config.jsonc (or ~/.config/SubMiner/config.jsonc) and edit as needed. */ { - // ========================================== // Overlay Auto-Start // When overlay connects to mpv, automatically show overlay and hide mpv subtitles. @@ -18,7 +17,7 @@ // ========================================== "texthooker": { "launchAtStartup": true, // Launch texthooker server automatically when SubMiner starts. Values: true | false - "openBrowser": true // Open browser setting. Values: true | false + "openBrowser": true, // Open browser setting. Values: true | false }, // Configure texthooker startup launch and browser opening behavior. // ========================================== @@ -28,7 +27,7 @@ // ========================================== "websocket": { "enabled": "auto", // Built-in subtitle websocket server mode. Values: auto | true | false - "port": 6677 // Built-in subtitle websocket server port. + "port": 6677, // Built-in subtitle websocket server port. }, // Built-in WebSocket server broadcasts subtitle text to connected clients. // ========================================== @@ -38,7 +37,7 @@ // ========================================== "annotationWebsocket": { "enabled": true, // Annotated subtitle websocket server enabled state. Values: true | false - "port": 6678 // Annotated subtitle websocket server port. + "port": 6678, // Annotated subtitle websocket server port. }, // Dedicated annotated subtitle websocket for bundled texthooker and token-aware clients. // ========================================== @@ -47,7 +46,7 @@ // Set to debug for full runtime diagnostics. // ========================================== "logging": { - "level": "info" // Minimum log level for runtime logging. Values: debug | info | warn | error + "level": "info", // Minimum log level for runtime logging. Values: debug | info | warn | error }, // Controls logging verbosity. // ========================================== @@ -61,7 +60,7 @@ "mecab": true, // Warm up MeCab tokenizer at startup. Values: true | false "yomitanExtension": true, // Warm up Yomitan extension at startup. Values: true | false "subtitleDictionaries": true, // Warm up subtitle dictionaries at startup. Values: true | false - "jellyfinRemoteSession": true // Warm up Jellyfin remote session at startup. Values: true | false + "jellyfinRemoteSession": true, // Warm up Jellyfin remote session at startup. Values: true | false }, // Background warmup controls for MeCab, Yomitan, dictionaries, and Jellyfin session. // ========================================== @@ -82,7 +81,7 @@ "toggleSecondarySub": "CommandOrControl+Shift+V", // Toggle secondary sub setting. "markAudioCard": "CommandOrControl+Shift+A", // Mark audio card setting. "openRuntimeOptions": "CommandOrControl+Shift+O", // Open runtime options setting. - "openJimaku": "Ctrl+Shift+J" // Open jimaku setting. + "openJimaku": "Ctrl+Shift+J", // Open jimaku setting. }, // Overlay keyboard shortcuts. Set a shortcut to null to disable. // ========================================== @@ -102,7 +101,7 @@ "secondarySub": { "secondarySubLanguages": [], // Secondary sub languages setting. "autoLoadSecondarySub": false, // Auto load secondary sub setting. Values: true | false - "defaultMode": "hover" // Default mode setting. + "defaultMode": "hover", // Default mode setting. }, // Dual subtitle track options. // ========================================== @@ -114,7 +113,7 @@ "alass_path": "", // Alass path setting. "ffsubsync_path": "", // Ffsubsync path setting. "ffmpeg_path": "", // Ffmpeg path setting. - "replace": true // Replace the active subtitle file when sync completes. Values: true | false + "replace": true, // Replace the active subtitle file when sync completes. Values: true | false }, // Subsync engine and executable paths. // ========================================== @@ -122,7 +121,7 @@ // Initial vertical subtitle position from the bottom. // ========================================== "subtitlePosition": { - "yPercent": 10 // Y percent setting. + "yPercent": 10, // Y percent setting. }, // Initial vertical subtitle position from the bottom. // ========================================== @@ -159,7 +158,7 @@ "N2": "#f5a97f", // N2 setting. "N3": "#f9e2af", // N3 setting. "N4": "#a6e3a1", // N4 setting. - "N5": "#8aadf4" // N5 setting. + "N5": "#8aadf4", // N5 setting. }, // Jlpt colors setting. "frequencyDictionary": { "enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false @@ -168,13 +167,7 @@ "mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded "matchMode": "headword", // headword: frequency lookup uses dictionary form. surface: lookup uses subtitle-visible token text. Values: headword | surface "singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`. - "bandedColors": [ - "#ed8796", - "#f5a97f", - "#f9e2af", - "#8bd5ca", - "#8aadf4" - ] // Five colors used for rank bands when mode is `banded` (from most common to least within topX). + "bandedColors": ["#ed8796", "#f5a97f", "#f9e2af", "#8bd5ca", "#8aadf4"], // Five colors used for rank bands when mode is `banded` (from most common to least within topX). }, // Frequency dictionary setting. "secondary": { "fontFamily": "Inter, Noto Sans, Helvetica Neue, sans-serif", // Font family setting. @@ -189,14 +182,27 @@ "backgroundColor": "rgba(20, 22, 34, 0.78)", // Background color setting. "backdropFilter": "blur(6px)", // Backdrop filter setting. "fontWeight": "600", // Font weight setting. - "fontStyle": "normal" // Font style setting. - } // Secondary setting. + "fontStyle": "normal", // Font style setting. + }, // Secondary setting. }, // Primary and secondary subtitle styling. + // ========================================== + // Shared AI Provider + // Canonical OpenAI-compatible provider transport settings shared by Anki and YouTube subtitle fixing. + // ========================================== + "ai": { + "enabled": false, // Enable shared OpenAI-compatible AI provider features. Values: true | false + "apiKey": "", // Static API key for the shared OpenAI-compatible AI provider. + "apiKeyCommand": "", // Shell command used to resolve the shared AI provider API key. + "baseUrl": "https://openrouter.ai/api", // Base URL for the shared OpenAI-compatible AI provider. + "requestTimeoutMs": 15000, // Timeout in milliseconds for shared AI provider requests. + }, // Canonical OpenAI-compatible provider transport settings shared by Anki and YouTube subtitle fixing. + // ========================================== // AnkiConnect Integration // Automatic Anki updates and media generation options. - // Hot-reload: AI translation settings update live while SubMiner is running. + // Hot-reload: ankiConnect.ai.enabled updates live while SubMiner is running. + // Shared AI provider transport settings are read from top-level ai and typically require restart. // Most other AnkiConnect settings still require restart. // ========================================== "ankiConnect": { @@ -207,26 +213,20 @@ "enabled": true, // Enable local AnkiConnect-compatible proxy for push-based auto-enrichment. Values: true | false "host": "127.0.0.1", // Bind host for local AnkiConnect proxy. "port": 8766, // Bind port for local AnkiConnect proxy. - "upstreamUrl": "http://127.0.0.1:8765" // Upstream AnkiConnect URL proxied by local AnkiConnect proxy. + "upstreamUrl": "http://127.0.0.1:8765", // Upstream AnkiConnect URL proxied by local AnkiConnect proxy. }, // Proxy setting. - "tags": [ - "SubMiner" - ], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging. + "tags": ["SubMiner"], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging. "fields": { "audio": "ExpressionAudio", // Audio setting. "image": "Picture", // Image setting. "sentence": "Sentence", // Sentence setting. "miscInfo": "MiscInfo", // Misc info setting. - "translation": "SelectionText" // Translation setting. + "translation": "SelectionText", // Translation setting. }, // Fields setting. "ai": { - "enabled": false, // Enabled setting. Values: true | false - "alwaysUseAiTranslation": false, // Always use ai translation setting. Values: true | false - "apiKey": "", // Api key setting. - "model": "openai/gpt-4o-mini", // Model setting. - "baseUrl": "https://openrouter.ai/api", // Base url setting. - "targetLanguage": "English", // Target language setting. - "systemPrompt": "You are a translation engine. Return only the translated text with no explanations." // System prompt setting. + "enabled": false, // Enable AI provider usage for Anki translation/enrichment flows. Values: true | false + "model": "", // Optional model override for Anki AI translation/enrichment flows. + "systemPrompt": "", // Optional system prompt override for Anki AI translation/enrichment flows. }, // Ai setting. "media": { "generateAudio": true, // Generate audio setting. Values: true | false @@ -239,7 +239,7 @@ "animatedCrf": 35, // Animated crf setting. "audioPadding": 0.5, // Audio padding setting. "fallbackDuration": 3, // Fallback duration setting. - "maxMediaDuration": 30 // Max media duration setting. + "maxMediaDuration": 30, // Max media duration setting. }, // Media setting. "behavior": { "overwriteAudio": true, // Overwrite audio setting. Values: true | false @@ -247,7 +247,7 @@ "mediaInsertMode": "append", // Media insert mode setting. "highlightWord": true, // Highlight word setting. Values: true | false "notificationType": "osd", // Notification type setting. - "autoUpdateNewCards": true // Automatically update newly added cards. Values: true | false + "autoUpdateNewCards": true, // Automatically update newly added cards. Values: true | false }, // Behavior setting. "nPlusOne": { "highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false @@ -256,20 +256,20 @@ "decks": [], // Decks used for N+1 known-word cache scope. Supports one or more deck names. "minSentenceWords": 3, // Minimum sentence word count required for N+1 targeting (default: 3). "nPlusOne": "#c6a0f6", // Color used for the single N+1 target token highlight. - "knownWord": "#a6da95" // Color used for legacy known-word highlights. + "knownWord": "#a6da95", // Color used for legacy known-word highlights. }, // N plus one setting. "metadata": { - "pattern": "[SubMiner] %f (%t)" // Pattern setting. + "pattern": "[SubMiner] %f (%t)", // Pattern setting. }, // Metadata setting. "isLapis": { "enabled": false, // Enabled setting. Values: true | false - "sentenceCardModel": "Japanese sentences" // Sentence card model setting. + "sentenceCardModel": "Japanese sentences", // Sentence card model setting. }, // Is lapis setting. "isKiku": { "enabled": false, // Enabled setting. Values: true | false "fieldGrouping": "disabled", // Kiku duplicate-card field grouping mode. Values: auto | manual | disabled - "deleteDuplicateInAuto": true // Delete duplicate in auto setting. Values: true | false - } // Is kiku setting. + "deleteDuplicateInAuto": true, // Delete duplicate in auto setting. Values: true | false + }, // Is kiku setting. }, // Automatic Anki updates and media generation options. // ========================================== @@ -279,22 +279,25 @@ "jimaku": { "apiBaseUrl": "https://jimaku.cc", // Api base url setting. "languagePreference": "ja", // Preferred language used in Jimaku search. Values: ja | en | none - "maxEntryResults": 10 // Maximum Jimaku search results returned. + "maxEntryResults": 10, // Maximum Jimaku search results returned. }, // Jimaku API configuration and defaults. // ========================================== // YouTube Subtitle Generation - // Defaults for subminer YouTube subtitle extraction/transcription mode. + // Defaults for SubMiner YouTube subtitle generation. // ========================================== "youtubeSubgen": { - "mode": "automatic", // YouTube subtitle generation mode for the launcher script. Values: automatic | preprocess | off "whisperBin": "", // Path to whisper.cpp CLI used as fallback transcription engine. "whisperModel": "", // Path to whisper model used for fallback transcription. - "primarySubLanguages": [ - "ja", - "jpn" - ] // Comma-separated primary subtitle language priority used by the launcher. - }, // Defaults for subminer YouTube subtitle extraction/transcription mode. + "whisperVadModel": "", // Path to optional whisper VAD model used for subtitle generation. + "whisperThreads": 4, // Thread count passed to whisper.cpp subtitle generation runs. + "fixWithAi": false, // Use shared AI provider to post-process whisper-generated YouTube subtitles. Values: true | false + "ai": { + "model": "", // Optional model override for YouTube subtitle AI post-processing. + "systemPrompt": "", // Optional system prompt override for YouTube subtitle AI post-processing. + }, // Ai setting. + "primarySubLanguages": ["ja", "jpn"], // Comma-separated primary subtitle language priority used by the launcher. + }, // Defaults for SubMiner YouTube subtitle generation. // ========================================== // Anilist @@ -314,9 +317,9 @@ "collapsibleSections": { "description": false, // Open the Description section by default in character dictionary glossary entries. Values: true | false "characterInformation": false, // Open the Character Information section by default in character dictionary glossary entries. Values: true | false - "voicedBy": false // Open the Voiced by section by default in character dictionary glossary entries. Values: true | false - } // Collapsible sections setting. - } // Character dictionary setting. + "voicedBy": false, // Open the Voiced by section by default in character dictionary glossary entries. Values: true | false + }, // Collapsible sections setting. + }, // Character dictionary setting. }, // Anilist API credentials and update behavior. // ========================================== @@ -340,16 +343,8 @@ "pullPictures": false, // Enable Jellyfin poster/icon fetching for launcher menus. Values: true | false "iconCacheDir": "/tmp/subminer-jellyfin-icons", // Directory used by launcher for cached Jellyfin poster icons. "directPlayPreferred": true, // Try direct play before server-managed transcoding when possible. Values: true | false - "directPlayContainers": [ - "mkv", - "mp4", - "webm", - "mov", - "flac", - "mp3", - "aac" - ], // Container allowlist for direct play decisions. - "transcodeVideoCodec": "h264" // Preferred transcode video codec when direct play is unavailable. + "directPlayContainers": ["mkv", "mp4", "webm", "mov", "flac", "mp3", "aac"], // Container allowlist for direct play decisions. + "transcodeVideoCodec": "h264", // Preferred transcode video codec when direct play is unavailable. }, // Optional Jellyfin integration for auth, browsing, and playback launch. // ========================================== @@ -360,7 +355,7 @@ "discordPresence": { "enabled": false, // Enable optional Discord Rich Presence updates. Values: true | false "updateIntervalMs": 3000, // Minimum interval between presence payload updates. - "debounceMs": 750 // Debounce delay used to collapse bursty presence updates. + "debounceMs": 750, // Debounce delay used to collapse bursty presence updates. }, // Optional Discord Rich Presence activity card updates for current playback/study session. // ========================================== @@ -382,7 +377,7 @@ "telemetryDays": 30, // Telemetry retention window in days. "dailyRollupsDays": 365, // Daily rollup retention window in days. "monthlyRollupsDays": 1825, // Monthly rollup retention window in days. - "vacuumIntervalDays": 7 // Minimum days between VACUUM runs. - } // Retention setting. - } // Enable/disable immersion tracking. + "vacuumIntervalDays": 7, // Minimum days between VACUUM runs. + }, // Retention setting. + }, // Enable/disable immersion tracking. } diff --git a/launcher/commands/playback-command.ts b/launcher/commands/playback-command.ts index 565de16..7aa1a96 100644 --- a/launcher/commands/playback-command.ts +++ b/launcher/commands/playback-command.ts @@ -34,12 +34,7 @@ function checkDependencies(args: Args): void { missing.push('yt-dlp'); } - if ( - args.targetKind === 'url' && - isYoutubeTarget(args.target) && - args.youtubeSubgenMode !== 'off' && - !commandExists('ffmpeg') - ) { + if (args.targetKind === 'url' && isYoutubeTarget(args.target) && !commandExists('ffmpeg')) { missing.push('ffmpeg'); } @@ -164,22 +159,28 @@ export async function runPlaybackCommand(context: LauncherCommandContext): Promi const isYoutubeUrl = selectedTarget.kind === 'url' && isYoutubeTarget(selectedTarget.target); let preloadedSubtitles: { primaryPath?: string; secondaryPath?: string } | undefined; - if (isYoutubeUrl && args.youtubeSubgenMode === 'preprocess') { - log('info', args.logLevel, 'YouTube subtitle mode: preprocess'); + if (isYoutubeUrl) { + log('info', args.logLevel, 'YouTube subtitle generation: preload before mpv'); const generated = await generateYoutubeSubtitles(selectedTarget.target, args); preloadedSubtitles = { primaryPath: generated.primaryPath, secondaryPath: generated.secondaryPath, }; + const primaryStatus = generated.primaryPath + ? 'ready' + : generated.primaryNative + ? 'native' + : 'missing'; + const secondaryStatus = generated.secondaryPath + ? 'ready' + : generated.secondaryNative + ? 'native' + : 'missing'; log( 'info', args.logLevel, - `YouTube preprocess result: primary=${generated.primaryPath ? 'ready' : 'missing'}, secondary=${generated.secondaryPath ? 'ready' : 'missing'}`, + `YouTube subtitle result: primary=${primaryStatus}, secondary=${secondaryStatus}`, ); - } else if (isYoutubeUrl && args.youtubeSubgenMode === 'automatic') { - log('info', args.logLevel, 'YouTube subtitle mode: automatic (background)'); - } else if (isYoutubeUrl) { - log('info', args.logLevel, 'YouTube subtitle mode: off'); } const shouldPauseUntilOverlayReady = @@ -201,26 +202,6 @@ export async function runPlaybackCommand(context: LauncherCommandContext): Promi { startPaused: shouldPauseUntilOverlayReady }, ); - if (isYoutubeUrl && args.youtubeSubgenMode === 'automatic') { - void generateYoutubeSubtitles(selectedTarget.target, args, async (lang, subtitlePath) => { - try { - await loadSubtitleIntoMpv(mpvSocketPath, subtitlePath, lang === 'primary', args.logLevel); - } catch (error) { - log( - 'warn', - args.logLevel, - `Generated subtitle ready but failed to load in mpv: ${(error as Error).message}`, - ); - } - }).catch((error) => { - log( - 'warn', - args.logLevel, - `Background subtitle generation failed: ${(error as Error).message}`, - ); - }); - } - const ready = await waitForUnixSocketReady(mpvSocketPath, 10000); const pluginAutoStartEnabled = pluginRuntimeConfig.autoStart; const shouldStartOverlay = args.startOverlay || args.autoStartOverlay; diff --git a/launcher/config-domain-parsers.test.ts b/launcher/config-domain-parsers.test.ts index 14be87d..7c2da07 100644 --- a/launcher/config-domain-parsers.test.ts +++ b/launcher/config-domain-parsers.test.ts @@ -6,10 +6,24 @@ import { parsePluginRuntimeConfigContent } from './config/plugin-runtime-config. test('parseLauncherYoutubeSubgenConfig keeps only valid typed values', () => { const parsed = parseLauncherYoutubeSubgenConfig({ + ai: { + enabled: true, + apiKey: 'shared-key', + baseUrl: 'https://openrouter.ai/api', + model: 'openrouter/shared-model', + systemPrompt: 'Legacy shared prompt.', + requestTimeoutMs: 12000, + }, youtubeSubgen: { - mode: 'preprocess', whisperBin: '/usr/bin/whisper', whisperModel: '/models/base.bin', + whisperVadModel: '/models/vad.bin', + whisperThreads: 6.8, + fixWithAi: true, + ai: { + model: 'openrouter/subgen-model', + systemPrompt: 'Fix subtitles only.', + }, primarySubLanguages: ['ja', 42, 'en'], }, secondarySub: { @@ -24,9 +38,17 @@ test('parseLauncherYoutubeSubgenConfig keeps only valid typed values', () => { }, }); - assert.equal(parsed.mode, 'preprocess'); + assert.equal('mode' in parsed, false); assert.deepEqual(parsed.primarySubLanguages, ['ja', 'en']); assert.deepEqual(parsed.secondarySubLanguages, ['eng', 'deu']); + assert.equal(parsed.whisperVadModel, '/models/vad.bin'); + assert.equal(parsed.whisperThreads, 6); + assert.equal(parsed.fixWithAi, true); + assert.equal(parsed.ai?.enabled, true); + assert.equal(parsed.ai?.apiKey, 'shared-key'); + assert.equal(parsed.ai?.model, 'openrouter/subgen-model'); + assert.equal(parsed.ai?.systemPrompt, 'Fix subtitles only.'); + assert.equal(parsed.ai?.requestTimeoutMs, 12000); assert.equal(parsed.jimakuLanguagePreference, 'ja'); assert.equal(parsed.jimakuMaxEntryResults, 8); }); diff --git a/launcher/config/args-normalizer.ts b/launcher/config/args-normalizer.ts index e307d67..44a34b9 100644 --- a/launcher/config/args-normalizer.ts +++ b/launcher/config/args-normalizer.ts @@ -1,13 +1,7 @@ import fs from 'node:fs'; import path from 'node:path'; import { fail } from '../log.js'; -import type { - Args, - Backend, - LauncherYoutubeSubgenConfig, - LogLevel, - YoutubeSubgenMode, -} from '../types.js'; +import type { Args, Backend, LauncherYoutubeSubgenConfig, LogLevel } from '../types.js'; import { DEFAULT_JIMAKU_API_BASE_URL, DEFAULT_YOUTUBE_PRIMARY_SUB_LANGS, @@ -54,14 +48,6 @@ function parseLogLevel(value: string): LogLevel { fail(`Invalid log level: ${value} (must be debug, info, warn, or error)`); } -function parseYoutubeMode(value: string): YoutubeSubgenMode { - const normalized = value.toLowerCase(); - if (normalized === 'automatic' || normalized === 'preprocess' || normalized === 'off') { - return normalized as YoutubeSubgenMode; - } - fail(`Invalid yt-subgen mode: ${value} (must be automatic, preprocess, or off)`); -} - function parseBackend(value: string): Backend { if (value === 'auto' || value === 'hyprland' || value === 'x11' || value === 'macos') { return value as Backend; @@ -91,13 +77,6 @@ function parseDictionaryTarget(value: string): string { } export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig): Args { - const envMode = (process.env.SUBMINER_YT_SUBGEN_MODE || '').toLowerCase(); - const defaultMode: YoutubeSubgenMode = - envMode === 'preprocess' || envMode === 'off' || envMode === 'automatic' - ? (envMode as YoutubeSubgenMode) - : launcherConfig.mode - ? launcherConfig.mode - : 'automatic'; const configuredSecondaryLangs = uniqueNormalizedLangCodes( launcherConfig.secondarySubLanguages ?? [], ); @@ -120,12 +99,18 @@ export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig): recursive: false, profile: 'subminer', startOverlay: false, - youtubeSubgenMode: defaultMode, whisperBin: process.env.SUBMINER_WHISPER_BIN || launcherConfig.whisperBin || '', whisperModel: process.env.SUBMINER_WHISPER_MODEL || launcherConfig.whisperModel || '', + whisperVadModel: process.env.SUBMINER_WHISPER_VAD_MODEL || launcherConfig.whisperVadModel || '', + whisperThreads: (() => { + const envValue = Number.parseInt(process.env.SUBMINER_WHISPER_THREADS || '', 10); + if (Number.isInteger(envValue) && envValue > 0) return envValue; + return launcherConfig.whisperThreads || 4; + })(), youtubeSubgenOutDir: process.env.SUBMINER_YT_SUBGEN_OUT_DIR || DEFAULT_YOUTUBE_SUBGEN_OUT_DIR, youtubeSubgenAudioFormat: process.env.SUBMINER_YT_SUBGEN_AUDIO_FORMAT || 'm4a', youtubeSubgenKeepTemp: process.env.SUBMINER_YT_SUBGEN_KEEP_TEMP === '1', + youtubeFixWithAi: launcherConfig.fixWithAi === true, jimakuApiKey: process.env.SUBMINER_JIMAKU_API_KEY || '', jimakuApiKeyCommand: process.env.SUBMINER_JIMAKU_API_KEY_COMMAND || '', jimakuApiBaseUrl: process.env.SUBMINER_JIMAKU_API_BASE_URL || DEFAULT_JIMAKU_API_BASE_URL, @@ -152,6 +137,15 @@ export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig): youtubeSecondarySubLangs: secondarySubLangs, youtubeAudioLangs, youtubeWhisperSourceLanguage: inferWhisperLanguage(primarySubLangs, 'ja'), + aiConfig: { + enabled: launcherConfig.ai?.enabled, + apiKey: launcherConfig.ai?.apiKey, + apiKeyCommand: launcherConfig.ai?.apiKeyCommand, + baseUrl: launcherConfig.ai?.baseUrl, + model: launcherConfig.ai?.model, + systemPrompt: launcherConfig.ai?.systemPrompt, + requestTimeoutMs: launcherConfig.ai?.requestTimeoutMs, + }, useTexthooker: true, autoStartOverlay: false, texthookerOnly: false, @@ -242,8 +236,6 @@ export function applyInvocationsToArgs(parsed: Args, invocations: CliInvocations if (invocations.ytInvocation) { if (invocations.ytInvocation.logLevel) parsed.logLevel = parseLogLevel(invocations.ytInvocation.logLevel); - if (invocations.ytInvocation.mode) - parsed.youtubeSubgenMode = parseYoutubeMode(invocations.ytInvocation.mode); if (invocations.ytInvocation.outDir) parsed.youtubeSubgenOutDir = invocations.ytInvocation.outDir; if (invocations.ytInvocation.keepTemp) parsed.youtubeSubgenKeepTemp = true; @@ -251,6 +243,10 @@ export function applyInvocationsToArgs(parsed: Args, invocations: CliInvocations parsed.whisperBin = invocations.ytInvocation.whisperBin; if (invocations.ytInvocation.whisperModel) parsed.whisperModel = invocations.ytInvocation.whisperModel; + if (invocations.ytInvocation.whisperVadModel) + parsed.whisperVadModel = invocations.ytInvocation.whisperVadModel; + if (invocations.ytInvocation.whisperThreads) + parsed.whisperThreads = invocations.ytInvocation.whisperThreads; if (invocations.ytInvocation.ytSubgenAudioFormat) { parsed.youtubeSubgenAudioFormat = invocations.ytInvocation.ytSubgenAudioFormat; } diff --git a/launcher/config/cli-parser-builder.ts b/launcher/config/cli-parser-builder.ts index 312bc38..126d8ef 100644 --- a/launcher/config/cli-parser-builder.ts +++ b/launcher/config/cli-parser-builder.ts @@ -16,11 +16,12 @@ export interface JellyfinInvocation { export interface YtInvocation { target?: string; - mode?: string; outDir?: string; keepTemp?: boolean; whisperBin?: string; whisperModel?: string; + whisperVadModel?: string; + whisperThreads?: number; ytSubgenAudioFormat?: string; logLevel?: string; } @@ -201,21 +202,27 @@ export function parseCliPrograms( .alias('youtube') .description('YouTube workflows') .argument('[target]', 'YouTube URL or ytsearch: query') - .option('-m, --mode ', 'Subtitle generation mode') .option('-o, --out-dir ', 'Subtitle output dir') .option('--keep-temp', 'Keep temp files') .option('--whisper-bin ', 'whisper.cpp CLI path') .option('--whisper-model ', 'whisper model path') + .option('--whisper-vad-model ', 'whisper.cpp VAD model path') + .option('--whisper-threads ', 'whisper.cpp thread count') .option('--yt-subgen-audio-format ', 'Audio extraction format') .option('--log-level ', 'Log level') .action((target: string | undefined, options: Record) => { ytInvocation = { target, - mode: typeof options.mode === 'string' ? options.mode : undefined, outDir: typeof options.outDir === 'string' ? options.outDir : undefined, keepTemp: options.keepTemp === true, whisperBin: typeof options.whisperBin === 'string' ? options.whisperBin : undefined, whisperModel: typeof options.whisperModel === 'string' ? options.whisperModel : undefined, + whisperVadModel: + typeof options.whisperVadModel === 'string' ? options.whisperVadModel : undefined, + whisperThreads: + typeof options.whisperThreads === 'number' && Number.isFinite(options.whisperThreads) + ? Math.floor(options.whisperThreads) + : undefined, ytSubgenAudioFormat: typeof options.ytSubgenAudioFormat === 'string' ? options.ytSubgenAudioFormat : undefined, logLevel: typeof options.logLevel === 'string' ? options.logLevel : undefined, diff --git a/launcher/config/youtube-subgen-config.ts b/launcher/config/youtube-subgen-config.ts index bfe7c34..eb1ecbd 100644 --- a/launcher/config/youtube-subgen-config.ts +++ b/launcher/config/youtube-subgen-config.ts @@ -1,4 +1,5 @@ import type { LauncherYoutubeSubgenConfig } from '../types.js'; +import { mergeAiConfig } from '../../src/ai/config.js'; function asStringArray(value: unknown): string[] | undefined { if (!Array.isArray(value)) return undefined; @@ -21,17 +22,58 @@ export function parseLauncherYoutubeSubgenConfig( const jimakuRaw = root.jimaku; const jimaku = jimakuRaw && typeof jimakuRaw === 'object' ? (jimakuRaw as Record) : null; + const aiRaw = root.ai; + const ai = aiRaw && typeof aiRaw === 'object' ? (aiRaw as Record) : null; + const youtubeAiRaw = youtubeSubgen?.ai; + const youtubeAi = + youtubeAiRaw && typeof youtubeAiRaw === 'object' + ? (youtubeAiRaw as Record) + : null; - const mode = youtubeSubgen?.mode; const jimakuLanguagePreference = jimaku?.languagePreference; const jimakuMaxEntryResults = jimaku?.maxEntryResults; return { - mode: mode === 'automatic' || mode === 'preprocess' || mode === 'off' ? mode : undefined, whisperBin: typeof youtubeSubgen?.whisperBin === 'string' ? youtubeSubgen.whisperBin : undefined, whisperModel: typeof youtubeSubgen?.whisperModel === 'string' ? youtubeSubgen.whisperModel : undefined, + whisperVadModel: + typeof youtubeSubgen?.whisperVadModel === 'string' + ? youtubeSubgen.whisperVadModel + : undefined, + whisperThreads: + typeof youtubeSubgen?.whisperThreads === 'number' && + Number.isFinite(youtubeSubgen.whisperThreads) && + youtubeSubgen.whisperThreads > 0 + ? Math.floor(youtubeSubgen.whisperThreads) + : undefined, + fixWithAi: typeof youtubeSubgen?.fixWithAi === 'boolean' ? youtubeSubgen.fixWithAi : undefined, + ai: mergeAiConfig( + ai + ? { + enabled: typeof ai.enabled === 'boolean' ? ai.enabled : undefined, + apiKey: typeof ai.apiKey === 'string' ? ai.apiKey : undefined, + apiKeyCommand: typeof ai.apiKeyCommand === 'string' ? ai.apiKeyCommand : undefined, + baseUrl: typeof ai.baseUrl === 'string' ? ai.baseUrl : undefined, + model: typeof ai.model === 'string' ? ai.model : undefined, + systemPrompt: typeof ai.systemPrompt === 'string' ? ai.systemPrompt : undefined, + requestTimeoutMs: + typeof ai.requestTimeoutMs === 'number' && + Number.isFinite(ai.requestTimeoutMs) && + ai.requestTimeoutMs > 0 + ? Math.floor(ai.requestTimeoutMs) + : undefined, + } + : undefined, + youtubeAi + ? { + model: typeof youtubeAi.model === 'string' ? youtubeAi.model : undefined, + systemPrompt: + typeof youtubeAi.systemPrompt === 'string' ? youtubeAi.systemPrompt : undefined, + } + : undefined, + ), primarySubLanguages: asStringArray(youtubeSubgen?.primarySubLanguages), secondarySubLanguages: asStringArray(secondarySub?.secondarySubLanguages), jimakuApiKey: typeof jimaku?.apiKey === 'string' ? jimaku.apiKey : undefined, diff --git a/launcher/main.test.ts b/launcher/main.test.ts index a8f0d24..62af91e 100644 --- a/launcher/main.test.ts +++ b/launcher/main.test.ts @@ -162,6 +162,134 @@ test('doctor reports checks and exits non-zero without hard dependencies', () => }); }); +test('youtube command rejects removed --mode option', () => { + withTempDir((root) => { + const homeDir = path.join(root, 'home'); + const xdgConfigHome = path.join(root, 'xdg'); + const appPath = path.join(root, 'fake-subminer.sh'); + fs.writeFileSync(appPath, '#!/bin/sh\nexit 0\n'); + fs.chmodSync(appPath, 0o755); + + const env = { + ...makeTestEnv(homeDir, xdgConfigHome), + SUBMINER_APPIMAGE_PATH: appPath, + }; + const result = runLauncher( + ['youtube', 'https://www.youtube.com/watch?v=test123', '--mode', 'automatic'], + env, + ); + + assert.equal(result.status, 1); + assert.match(result.stderr, /unknown option '--mode'/i); + }); +}); + +test('youtube playback generates subtitles before mpv launch', () => { + withTempDir((root) => { + const homeDir = path.join(root, 'home'); + const xdgConfigHome = path.join(root, 'xdg'); + const binDir = path.join(root, 'bin'); + const appPath = path.join(root, 'fake-subminer.sh'); + const ytdlpLogPath = path.join(root, 'yt-dlp.log'); + const mpvCapturePath = path.join(root, 'mpv-order.txt'); + const mpvArgsPath = path.join(root, 'mpv-args.txt'); + const socketPath = path.join(root, 'mpv.sock'); + + fs.mkdirSync(binDir, { recursive: true }); + fs.mkdirSync(path.join(xdgConfigHome, 'SubMiner'), { recursive: true }); + fs.mkdirSync(path.join(xdgConfigHome, 'mpv', 'script-opts'), { recursive: true }); + fs.writeFileSync( + path.join(xdgConfigHome, 'SubMiner', 'setup-state.json'), + JSON.stringify({ + version: 1, + status: 'completed', + completedAt: '2026-03-08T00:00:00.000Z', + completionSource: 'user', + lastSeenYomitanDictionaryCount: 0, + pluginInstallStatus: 'installed', + pluginInstallPathSummary: null, + }), + ); + fs.writeFileSync( + path.join(xdgConfigHome, 'mpv', 'script-opts', 'subminer.conf'), + `socket_path=${socketPath}\nauto_start=no\nauto_start_visible_overlay=no\nauto_start_pause_until_ready=no\n`, + ); + fs.writeFileSync(appPath, '#!/bin/sh\nexit 0\n'); + fs.chmodSync(appPath, 0o755); + + fs.writeFileSync( + path.join(binDir, 'yt-dlp'), + `#!/bin/sh +set -eu +printf '%s\\n' "$*" >> "$SUBMINER_TEST_YTDLP_LOG" +if printf '%s\\n' "$*" | grep -q -- '--dump-single-json'; then + printf '{"id":"video123"}\\n' + exit 0 +fi +out_dir="" +prev="" +for arg in "$@"; do + if [ "$prev" = "-o" ]; then + out_dir=$(dirname "$arg") + break + fi + prev="$arg" +done +mkdir -p "$out_dir" +printf '1\\n00:00:00,000 --> 00:00:01,000\\nこんにちは\\n' > "$out_dir/video123.ja.srt" +printf '1\\n00:00:00,000 --> 00:00:01,000\\nhello\\n' > "$out_dir/video123.en.srt" +`, + 'utf8', + ); + fs.chmodSync(path.join(binDir, 'yt-dlp'), 0o755); + + fs.writeFileSync(path.join(binDir, 'ffmpeg'), '#!/bin/sh\nexit 0\n', 'utf8'); + fs.chmodSync(path.join(binDir, 'ffmpeg'), 0o755); + + fs.writeFileSync( + path.join(binDir, 'mpv'), + `#!/bin/sh +set -eu +if [ -s "$SUBMINER_TEST_YTDLP_LOG" ]; then + printf 'generated-before-mpv\\n' > "$SUBMINER_TEST_MPV_ORDER" +else + printf 'mpv-before-generation\\n' > "$SUBMINER_TEST_MPV_ORDER" +fi +printf '%s\\n' "$@" > "$SUBMINER_TEST_MPV_ARGS" +socket_path="" +for arg in "$@"; do + case "$arg" in + --input-ipc-server=*) + socket_path="\${arg#--input-ipc-server=}" + ;; + esac +done +bun -e "const net=require('node:net'); const fs=require('node:fs'); const socket=process.argv[1]; try { fs.rmSync(socket,{force:true}); } catch {} const server=net.createServer((conn)=>conn.end()); server.listen(socket,()=>setTimeout(()=>server.close(()=>process.exit(0)),250));" "$socket_path" +`, + 'utf8', + ); + fs.chmodSync(path.join(binDir, 'mpv'), 0o755); + + const env = { + ...makeTestEnv(homeDir, xdgConfigHome), + PATH: `${binDir}${path.delimiter}${process.env.PATH || ''}`, + SUBMINER_APPIMAGE_PATH: appPath, + SUBMINER_TEST_YTDLP_LOG: ytdlpLogPath, + SUBMINER_TEST_MPV_ORDER: mpvCapturePath, + SUBMINER_TEST_MPV_ARGS: mpvArgsPath, + }; + const result = runLauncher(['youtube', 'https://www.youtube.com/watch?v=test123'], env); + + assert.equal(result.status, 0); + assert.equal(fs.readFileSync(mpvCapturePath, 'utf8').trim(), 'generated-before-mpv'); + assert.match( + fs.readFileSync(mpvArgsPath, 'utf8'), + /https:\/\/www\.youtube\.com\/watch\?v=test123/, + ); + assert.match(fs.readFileSync(ytdlpLogPath, 'utf8'), /--dump-single-json/); + }); +}); + test('dictionary command forwards --dictionary and --dictionary-target to app command path', () => { withTempDir((root) => { const homeDir = path.join(root, 'home'); diff --git a/launcher/types.ts b/launcher/types.ts index 9cfa061..88e0fa9 100644 --- a/launcher/types.ts +++ b/launcher/types.ts @@ -42,26 +42,38 @@ export const DEFAULT_MPV_SUBMINER_ARGS = [ ] as const; export type LogLevel = 'debug' | 'info' | 'warn' | 'error'; -export type YoutubeSubgenMode = 'automatic' | 'preprocess' | 'off'; export type Backend = 'auto' | 'hyprland' | 'x11' | 'macos'; export type JimakuLanguagePreference = 'ja' | 'en' | 'none'; +export interface LauncherAiConfig { + enabled?: boolean; + apiKey?: string; + apiKeyCommand?: string; + baseUrl?: string; + model?: string; + systemPrompt?: string; + requestTimeoutMs?: number; +} + export interface Args { backend: Backend; directory: string; recursive: boolean; profile: string; startOverlay: boolean; - youtubeSubgenMode: YoutubeSubgenMode; whisperBin: string; whisperModel: string; + whisperVadModel: string; + whisperThreads: number; youtubeSubgenOutDir: string; youtubeSubgenAudioFormat: string; youtubeSubgenKeepTemp: boolean; + youtubeFixWithAi: boolean; youtubePrimarySubLangs: string[]; youtubeSecondarySubLangs: string[]; youtubeAudioLangs: string[]; youtubeWhisperSourceLanguage: string; + aiConfig: LauncherAiConfig; useTexthooker: boolean; autoStartOverlay: boolean; texthookerOnly: boolean; @@ -96,9 +108,12 @@ export interface Args { } export interface LauncherYoutubeSubgenConfig { - mode?: YoutubeSubgenMode; whisperBin?: string; whisperModel?: string; + whisperVadModel?: string; + whisperThreads?: number; + fixWithAi?: boolean; + ai?: LauncherAiConfig; primarySubLanguages?: string[]; secondarySubLanguages?: string[]; jimakuApiKey?: string; @@ -144,13 +159,15 @@ export interface SubtitleCandidate { lang: 'primary' | 'secondary'; ext: string; size: number; - source: 'manual' | 'auto' | 'whisper' | 'whisper-translate'; + source: 'manual' | 'whisper' | 'whisper-fixed' | 'whisper-translate' | 'whisper-translate-fixed'; } export interface YoutubeSubgenOutputs { basename: string; primaryPath?: string; secondaryPath?: string; + primaryNative?: boolean; + secondaryNative?: boolean; } export interface MpvTrack { diff --git a/src/config/config.test.ts b/src/config/config.test.ts index 612abed..9b42abe 100644 --- a/src/config/config.test.ts +++ b/src/config/config.test.ts @@ -34,6 +34,13 @@ test('loads defaults when config is missing', () => { assert.equal(config.jellyfin.remoteControlAutoConnect, true); assert.equal(config.jellyfin.autoAnnounce, false); assert.equal(config.jellyfin.remoteControlDeviceName, 'SubMiner'); + assert.equal(config.ai.enabled, false); + assert.equal(config.ai.apiKeyCommand, ''); + assert.deepEqual(config.ankiConnect.ai, { + enabled: false, + model: '', + systemPrompt: '', + }); assert.equal(config.startupWarmups.lowPowerMode, false); assert.equal(config.startupWarmups.mecab, true); assert.equal(config.startupWarmups.yomitanExtension, true); @@ -1068,12 +1075,20 @@ test('parses global shortcuts and startup settings', () => { fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ + "ai": { + "enabled": true, + "apiKeyCommand": "pass show subminer/ai", + "model": "openai/gpt-4o-mini" + }, "shortcuts": { "toggleVisibleOverlayGlobal": "Alt+Shift+U", "openJimaku": "Ctrl+Alt+J" }, "youtubeSubgen": { - "primarySubLanguages": ["ja", "jpn", "jp"] + "primarySubLanguages": ["ja", "jpn", "jp"], + "whisperVadModel": "/models/vad.bin", + "whisperThreads": 12, + "fixWithAi": true } }`, 'utf-8', @@ -1081,9 +1096,14 @@ test('parses global shortcuts and startup settings', () => { const service = new ConfigService(dir); const config = service.getConfig(); + assert.equal(config.ai.enabled, true); + assert.equal(config.ai.apiKeyCommand, 'pass show subminer/ai'); assert.equal(config.shortcuts.toggleVisibleOverlayGlobal, 'Alt+Shift+U'); assert.equal(config.shortcuts.openJimaku, 'Ctrl+Alt+J'); assert.deepEqual(config.youtubeSubgen.primarySubLanguages, ['ja', 'jpn', 'jp']); + assert.equal(config.youtubeSubgen.whisperVadModel, '/models/vad.bin'); + assert.equal(config.youtubeSubgen.whisperThreads, 12); + assert.equal(config.youtubeSubgen.fixWithAi, true); }); test('runtime options registry is centralized', () => { @@ -1324,14 +1344,86 @@ test('supports legacy ankiConnect.behavior N+1 settings as fallback', () => { ); }); -test('warns when ankiConnect.openRouter is used and migrates to ai', () => { +test('accepts top-level ai config', () => { + const dir = makeTempDir(); + fs.writeFileSync( + path.join(dir, 'config.jsonc'), + `{ + "ai": { + "enabled": true, + "apiKey": "abc123", + "apiKeyCommand": "pass show subminer/ai", + "baseUrl": "https://openrouter.ai/api", + "model": "openrouter/test-model", + "systemPrompt": "Return only fixed subtitles.", + "requestTimeoutMs": 20000 + } + }`, + 'utf-8', + ); + + const service = new ConfigService(dir); + const config = service.getConfig(); + assert.equal(config.ai.enabled, true); + assert.equal(config.ai.apiKey, 'abc123'); + assert.equal(config.ai.apiKeyCommand, 'pass show subminer/ai'); + assert.equal(config.ai.baseUrl, 'https://openrouter.ai/api'); + assert.equal(config.ai.model, 'openrouter/test-model'); + assert.equal(config.ai.systemPrompt, 'Return only fixed subtitles.'); + assert.equal(config.ai.requestTimeoutMs, 20000); +}); + +test('accepts per-feature ai overrides for anki and youtube subtitle generation', () => { + const dir = makeTempDir(); + fs.writeFileSync( + path.join(dir, 'config.jsonc'), + `{ + "ai": { + "enabled": true, + "apiKeyCommand": "pass show subminer/ai", + "baseUrl": "https://openrouter.ai/api", + "model": "openrouter/shared-model", + "systemPrompt": "Legacy shared prompt." + }, + "ankiConnect": { + "ai": { + "enabled": true, + "model": "openrouter/anki-model", + "systemPrompt": "Translate mined sentence text." + } + }, + "youtubeSubgen": { + "ai": { + "model": "openrouter/subgen-model", + "systemPrompt": "Fix subtitle mistakes only." + } + } + }`, + 'utf-8', + ); + + const service = new ConfigService(dir); + const config = service.getConfig(); + + assert.equal(config.ai.enabled, true); + assert.equal(config.ai.model, 'openrouter/shared-model'); + assert.equal(config.ankiConnect.ai.enabled, true); + assert.equal(config.ankiConnect.ai.model, 'openrouter/anki-model'); + assert.equal(config.ankiConnect.ai.systemPrompt, 'Translate mined sentence text.'); + assert.equal(config.youtubeSubgen.ai.model, 'openrouter/subgen-model'); + assert.equal(config.youtubeSubgen.ai.systemPrompt, 'Fix subtitle mistakes only.'); +}); + +test('warns and falls back when ankiConnect.ai override values are invalid', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "openRouter": { - "model": "openrouter/test-model" + "ai": { + "enabled": "yes", + "model": 123, + "systemPrompt": true } } }`, @@ -1342,13 +1434,10 @@ test('warns when ankiConnect.openRouter is used and migrates to ai', () => { const config = service.getConfig(); const warnings = service.getWarnings(); - assert.equal((config.ankiConnect.ai as Record).model, 'openrouter/test-model'); - assert.ok( - warnings.some( - (warning) => - warning.path === 'ankiConnect.openRouter' && warning.message.includes('ankiConnect.ai'), - ), - ); + assert.deepEqual(config.ankiConnect.ai, DEFAULT_CONFIG.ankiConnect.ai); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.ai.enabled')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.ai.model')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.ai.systemPrompt')); }); test('falls back and warns when legacy ankiConnect migration values are invalid', () => { @@ -1547,6 +1636,7 @@ test('falls back to default when ankiConnect n+1 deck list is invalid', () => { test('template generator includes known keys', () => { const output = generateConfigTemplate(DEFAULT_CONFIG); + assert.match(output, /"ai":/); assert.match(output, /"ankiConnect":/); assert.match(output, /"logging":/); assert.match(output, /"websocket":/); @@ -1577,6 +1667,31 @@ test('template generator includes known keys', () => { output, /"enabled": false,? \/\/ Enable AnkiConnect integration\. Values: true \| false/, ); + assert.match( + output, + /"enabled": false,? \/\/ Enable AI provider usage for Anki translation\/enrichment flows\. Values: true \| false/, + ); + assert.match( + output, + /"model": "",? \/\/ Optional model override for Anki AI translation\/enrichment flows\./, + ); + assert.match( + output, + /"enabled": false,? \/\/ Enable shared OpenAI-compatible AI provider features\. Values: true \| false/, + ); + assert.match( + output, + /"fixWithAi": false,? \/\/ Use shared AI provider to post-process whisper-generated YouTube subtitles\. Values: true \| false/, + ); + assert.match( + output, + /"systemPrompt": "",? \/\/ Optional system prompt override for YouTube subtitle AI post-processing\./, + ); + assert.doesNotMatch(output, /"mode": "automatic"/); + assert.match( + output, + /"whisperThreads": 4,? \/\/ Thread count passed to whisper\.cpp subtitle generation runs\./, + ); assert.match( output, /"launchAtStartup": true,? \/\/ Launch texthooker server automatically when SubMiner starts\. Values: true \| false/, diff --git a/src/config/definitions/defaults-integrations.ts b/src/config/definitions/defaults-integrations.ts index 5765eb2..0df432b 100644 --- a/src/config/definitions/defaults-integrations.ts +++ b/src/config/definitions/defaults-integrations.ts @@ -2,7 +2,7 @@ import { ResolvedConfig } from '../../types'; export const INTEGRATIONS_DEFAULT_CONFIG: Pick< ResolvedConfig, - 'ankiConnect' | 'jimaku' | 'anilist' | 'jellyfin' | 'discordPresence' | 'youtubeSubgen' + 'ankiConnect' | 'jimaku' | 'anilist' | 'jellyfin' | 'discordPresence' | 'ai' | 'youtubeSubgen' > = { ankiConnect: { enabled: false, @@ -24,13 +24,8 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< }, ai: { enabled: false, - alwaysUseAiTranslation: false, - apiKey: '', - model: 'openai/gpt-4o-mini', - baseUrl: 'https://openrouter.ai/api', - targetLanguage: 'English', - systemPrompt: - 'You are a translation engine. Return only the translated text with no explanations.', + model: '', + systemPrompt: '', }, media: { generateAudio: true, @@ -122,10 +117,26 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< updateIntervalMs: 3_000, debounceMs: 750, }, + ai: { + enabled: false, + apiKey: '', + apiKeyCommand: '', + model: 'openai/gpt-4o-mini', + baseUrl: 'https://openrouter.ai/api', + systemPrompt: + 'You are a translation engine. Return only the translated text with no explanations.', + requestTimeoutMs: 15_000, + }, youtubeSubgen: { - mode: 'automatic', whisperBin: '', whisperModel: '', + whisperVadModel: '', + whisperThreads: 4, + fixWithAi: false, + ai: { + model: '', + systemPrompt: '', + }, primarySubLanguages: ['ja', 'jpn'], }, }; diff --git a/src/config/definitions/options-integrations.ts b/src/config/definitions/options-integrations.ts index 1d6f7ad..7e786ee 100644 --- a/src/config/definitions/options-integrations.ts +++ b/src/config/definitions/options-integrations.ts @@ -51,6 +51,24 @@ export function buildIntegrationConfigOptionRegistry( description: 'Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging.', }, + { + path: 'ankiConnect.ai.enabled', + kind: 'boolean', + defaultValue: defaultConfig.ankiConnect.ai.enabled, + description: 'Enable AI provider usage for Anki translation/enrichment flows.', + }, + { + path: 'ankiConnect.ai.model', + kind: 'string', + defaultValue: defaultConfig.ankiConnect.ai.model, + description: 'Optional model override for Anki AI translation/enrichment flows.', + }, + { + path: 'ankiConnect.ai.systemPrompt', + kind: 'string', + defaultValue: defaultConfig.ankiConnect.ai.systemPrompt, + description: 'Optional system prompt override for Anki AI translation/enrichment flows.', + }, { path: 'ankiConnect.behavior.autoUpdateNewCards', kind: 'boolean', @@ -291,11 +309,34 @@ export function buildIntegrationConfigOptionRegistry( description: 'Debounce delay used to collapse bursty presence updates.', }, { - path: 'youtubeSubgen.mode', - kind: 'enum', - enumValues: ['automatic', 'preprocess', 'off'], - defaultValue: defaultConfig.youtubeSubgen.mode, - description: 'YouTube subtitle generation mode for the launcher script.', + path: 'ai.enabled', + kind: 'boolean', + defaultValue: defaultConfig.ai.enabled, + description: 'Enable shared OpenAI-compatible AI provider features.', + }, + { + path: 'ai.apiKey', + kind: 'string', + defaultValue: defaultConfig.ai.apiKey, + description: 'Static API key for the shared OpenAI-compatible AI provider.', + }, + { + path: 'ai.apiKeyCommand', + kind: 'string', + defaultValue: defaultConfig.ai.apiKeyCommand, + description: 'Shell command used to resolve the shared AI provider API key.', + }, + { + path: 'ai.baseUrl', + kind: 'string', + defaultValue: defaultConfig.ai.baseUrl, + description: 'Base URL for the shared OpenAI-compatible AI provider.', + }, + { + path: 'ai.requestTimeoutMs', + kind: 'number', + defaultValue: defaultConfig.ai.requestTimeoutMs, + description: 'Timeout in milliseconds for shared AI provider requests.', }, { path: 'youtubeSubgen.whisperBin', @@ -309,6 +350,36 @@ export function buildIntegrationConfigOptionRegistry( defaultValue: defaultConfig.youtubeSubgen.whisperModel, description: 'Path to whisper model used for fallback transcription.', }, + { + path: 'youtubeSubgen.whisperVadModel', + kind: 'string', + defaultValue: defaultConfig.youtubeSubgen.whisperVadModel, + description: 'Path to optional whisper VAD model used for subtitle generation.', + }, + { + path: 'youtubeSubgen.whisperThreads', + kind: 'number', + defaultValue: defaultConfig.youtubeSubgen.whisperThreads, + description: 'Thread count passed to whisper.cpp subtitle generation runs.', + }, + { + path: 'youtubeSubgen.fixWithAi', + kind: 'boolean', + defaultValue: defaultConfig.youtubeSubgen.fixWithAi, + description: 'Use shared AI provider to post-process whisper-generated YouTube subtitles.', + }, + { + path: 'youtubeSubgen.ai.model', + kind: 'string', + defaultValue: defaultConfig.youtubeSubgen.ai.model, + description: 'Optional model override for YouTube subtitle AI post-processing.', + }, + { + path: 'youtubeSubgen.ai.systemPrompt', + kind: 'string', + defaultValue: defaultConfig.youtubeSubgen.ai.systemPrompt, + description: 'Optional system prompt override for YouTube subtitle AI post-processing.', + }, { path: 'youtubeSubgen.primarySubLanguages', kind: 'string', diff --git a/src/config/definitions/template-sections.ts b/src/config/definitions/template-sections.ts index 9c9608c..9c0dfd9 100644 --- a/src/config/definitions/template-sections.ts +++ b/src/config/definitions/template-sections.ts @@ -91,11 +91,19 @@ const SUBTITLE_TEMPLATE_SECTIONS: ConfigTemplateSection[] = [ ]; const INTEGRATION_TEMPLATE_SECTIONS: ConfigTemplateSection[] = [ + { + title: 'Shared AI Provider', + description: [ + 'Canonical OpenAI-compatible provider transport settings shared by Anki and YouTube subtitle fixing.', + ], + key: 'ai', + }, { title: 'AnkiConnect Integration', description: ['Automatic Anki updates and media generation options.'], notes: [ - 'Hot-reload: AI translation settings update live while SubMiner is running.', + 'Hot-reload: ankiConnect.ai.enabled updates live while SubMiner is running.', + 'Shared AI provider transport settings are read from top-level ai and typically require restart.', 'Most other AnkiConnect settings still require restart.', ], key: 'ankiConnect', @@ -107,7 +115,7 @@ const INTEGRATION_TEMPLATE_SECTIONS: ConfigTemplateSection[] = [ }, { title: 'YouTube Subtitle Generation', - description: ['Defaults for subminer YouTube subtitle extraction/transcription mode.'], + description: ['Defaults for SubMiner YouTube subtitle generation.'], key: 'youtubeSubgen', }, { diff --git a/src/config/resolve/subtitle-domains.ts b/src/config/resolve/subtitle-domains.ts index 32f6219..2db5cb0 100644 --- a/src/config/resolve/subtitle-domains.ts +++ b/src/config/resolve/subtitle-domains.ts @@ -46,18 +46,6 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { } if (isObject(src.youtubeSubgen)) { - const mode = src.youtubeSubgen.mode; - if (mode === 'automatic' || mode === 'preprocess' || mode === 'off') { - resolved.youtubeSubgen.mode = mode; - } else if (mode !== undefined) { - warn( - 'youtubeSubgen.mode', - mode, - resolved.youtubeSubgen.mode, - 'Expected automatic, preprocess, or off.', - ); - } - const whisperBin = asString(src.youtubeSubgen.whisperBin); if (whisperBin !== undefined) { resolved.youtubeSubgen.whisperBin = whisperBin; @@ -82,6 +70,75 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { ); } + const whisperVadModel = asString(src.youtubeSubgen.whisperVadModel); + if (whisperVadModel !== undefined) { + resolved.youtubeSubgen.whisperVadModel = whisperVadModel; + } else if (src.youtubeSubgen.whisperVadModel !== undefined) { + warn( + 'youtubeSubgen.whisperVadModel', + src.youtubeSubgen.whisperVadModel, + resolved.youtubeSubgen.whisperVadModel, + 'Expected string.', + ); + } + + const whisperThreads = asNumber(src.youtubeSubgen.whisperThreads); + if (whisperThreads !== undefined && Number.isInteger(whisperThreads) && whisperThreads > 0) { + resolved.youtubeSubgen.whisperThreads = whisperThreads; + } else if (src.youtubeSubgen.whisperThreads !== undefined) { + warn( + 'youtubeSubgen.whisperThreads', + src.youtubeSubgen.whisperThreads, + resolved.youtubeSubgen.whisperThreads, + 'Expected positive integer.', + ); + } + + const fixWithAi = asBoolean(src.youtubeSubgen.fixWithAi); + if (fixWithAi !== undefined) { + resolved.youtubeSubgen.fixWithAi = fixWithAi; + } else if (src.youtubeSubgen.fixWithAi !== undefined) { + warn( + 'youtubeSubgen.fixWithAi', + src.youtubeSubgen.fixWithAi, + resolved.youtubeSubgen.fixWithAi, + 'Expected boolean.', + ); + } + + if (isObject(src.youtubeSubgen.ai)) { + const aiModel = asString(src.youtubeSubgen.ai.model); + if (aiModel !== undefined) { + resolved.youtubeSubgen.ai.model = aiModel; + } else if (src.youtubeSubgen.ai.model !== undefined) { + warn( + 'youtubeSubgen.ai.model', + src.youtubeSubgen.ai.model, + resolved.youtubeSubgen.ai.model, + 'Expected string.', + ); + } + + const aiSystemPrompt = asString(src.youtubeSubgen.ai.systemPrompt); + if (aiSystemPrompt !== undefined) { + resolved.youtubeSubgen.ai.systemPrompt = aiSystemPrompt; + } else if (src.youtubeSubgen.ai.systemPrompt !== undefined) { + warn( + 'youtubeSubgen.ai.systemPrompt', + src.youtubeSubgen.ai.systemPrompt, + resolved.youtubeSubgen.ai.systemPrompt, + 'Expected string.', + ); + } + } else if (src.youtubeSubgen.ai !== undefined) { + warn( + 'youtubeSubgen.ai', + src.youtubeSubgen.ai, + resolved.youtubeSubgen.ai, + 'Expected object.', + ); + } + if (Array.isArray(src.youtubeSubgen.primarySubLanguages)) { resolved.youtubeSubgen.primarySubLanguages = src.youtubeSubgen.primarySubLanguages.filter( (item): item is string => typeof item === 'string', diff --git a/src/core/services/overlay-runtime-init.test.ts b/src/core/services/overlay-runtime-init.test.ts index ec487de..b51ac41 100644 --- a/src/core/services/overlay-runtime-init.test.ts +++ b/src/core/services/overlay-runtime-init.test.ts @@ -108,3 +108,49 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled assert.equal(startedIntegrations, 1); assert.equal(setIntegrationCalls, 1); }); + +test('initializeOverlayRuntime re-syncs overlay shortcuts when tracker focus changes', () => { + let syncCalls = 0; + const tracker = { + onGeometryChange: null as ((...args: unknown[]) => void) | null, + onWindowFound: null as ((...args: unknown[]) => void) | null, + onWindowLost: null as (() => void) | null, + onWindowFocusChange: null as ((focused: boolean) => void) | null, + start: () => {}, + }; + + initializeOverlayRuntime({ + backendOverride: null, + createMainWindow: () => {}, + registerGlobalShortcuts: () => {}, + updateVisibleOverlayBounds: () => {}, + isVisibleOverlayVisible: () => false, + updateVisibleOverlayVisibility: () => {}, + getOverlayWindows: () => [], + syncOverlayShortcuts: () => { + syncCalls += 1; + }, + setWindowTracker: () => {}, + getMpvSocketPath: () => '/tmp/mpv.sock', + createWindowTracker: () => tracker as never, + getResolvedConfig: () => ({ + ankiConnect: { enabled: false } as never, + }), + getSubtitleTimingTracker: () => null, + getMpvClient: () => null, + getRuntimeOptionsManager: () => null, + setAnkiIntegration: () => {}, + showDesktopNotification: () => {}, + createFieldGroupingCallback: () => async () => ({ + keepNoteId: 1, + deleteNoteId: 2, + deleteDuplicate: false, + cancelled: false, + }), + getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json', + }); + + assert.equal(typeof tracker.onWindowFocusChange, 'function'); + tracker.onWindowFocusChange?.(true); + assert.equal(syncCalls, 1); +}); diff --git a/src/core/services/overlay-runtime-init.ts b/src/core/services/overlay-runtime-init.ts index 570fcbd..024e0a6 100644 --- a/src/core/services/overlay-runtime-init.ts +++ b/src/core/services/overlay-runtime-init.ts @@ -101,6 +101,9 @@ export function initializeOverlayRuntime(options: { } options.syncOverlayShortcuts(); }; + windowTracker.onWindowFocusChange = () => { + options.syncOverlayShortcuts(); + }; windowTracker.start(); } diff --git a/src/main.ts b/src/main.ts index 1b5f038..bc03bd0 100644 --- a/src/main.ts +++ b/src/main.ts @@ -969,6 +969,8 @@ const overlayShortcutsRuntime = createOverlayShortcutsRuntimeService( appState.shortcutsRegistered = registered; }, isOverlayRuntimeInitialized: () => appState.overlayRuntimeInitialized, + isMacOSPlatform: () => process.platform === 'darwin', + isTrackedMpvWindowFocused: () => appState.windowTracker?.isFocused() ?? false, showMpvOsd: (text: string) => showMpvOsd(text), openRuntimeOptionsPalette: () => { openRuntimeOptionsPalette(); diff --git a/src/types.ts b/src/types.ts index 577f488..b96621e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -227,24 +227,7 @@ export interface AnkiConnectConfig { miscInfo?: string; translation?: string; }; - ai?: { - enabled?: boolean; - alwaysUseAiTranslation?: boolean; - apiKey?: string; - model?: string; - baseUrl?: string; - targetLanguage?: string; - systemPrompt?: string; - }; - openRouter?: { - enabled?: boolean; - alwaysUseAiTranslation?: boolean; - apiKey?: string; - model?: string; - baseUrl?: string; - targetLanguage?: string; - systemPrompt?: string; - }; + ai?: boolean | AiFeatureConfig; media?: { generateAudio?: boolean; generateImage?: boolean; @@ -455,12 +438,29 @@ export interface DiscordPresenceConfig { debounceMs?: number; } -export type YoutubeSubgenMode = 'automatic' | 'preprocess' | 'off'; +export interface AiFeatureConfig { + enabled?: boolean; + model?: string; + systemPrompt?: string; +} + +export interface AiConfig { + enabled?: boolean; + apiKey?: string; + apiKeyCommand?: string; + baseUrl?: string; + model?: string; + systemPrompt?: string; + requestTimeoutMs?: number; +} export interface YoutubeSubgenConfig { - mode?: YoutubeSubgenMode; whisperBin?: string; whisperModel?: string; + whisperVadModel?: string; + whisperThreads?: number; + fixWithAi?: boolean; + ai?: AiFeatureConfig; primarySubLanguages?: string[]; } @@ -498,6 +498,7 @@ export interface Config { anilist?: AnilistConfig; jellyfin?: JellyfinConfig; discordPresence?: DiscordPresenceConfig; + ai?: AiConfig; youtubeSubgen?: YoutubeSubgenConfig; immersionTracking?: ImmersionTrackingConfig; logging?: { @@ -531,14 +532,8 @@ export interface ResolvedConfig { miscInfo: string; translation: string; }; - ai: { + ai: AiFeatureConfig & { enabled: boolean; - alwaysUseAiTranslation: boolean; - apiKey: string; - model: string; - baseUrl: string; - targetLanguage: string; - systemPrompt: string; }; media: { generateAudio: boolean; @@ -649,10 +644,22 @@ export interface ResolvedConfig { updateIntervalMs: number; debounceMs: number; }; + ai: AiConfig & { + enabled: boolean; + apiKey: string; + apiKeyCommand: string; + baseUrl: string; + model: string; + systemPrompt: string; + requestTimeoutMs: number; + }; youtubeSubgen: YoutubeSubgenConfig & { - mode: YoutubeSubgenMode; whisperBin: string; whisperModel: string; + whisperVadModel: string; + whisperThreads: number; + fixWithAi: boolean; + ai: AiFeatureConfig; primarySubLanguages: string[]; }; immersionTracking: {