diff --git a/backlog/tasks/task-117.1 - Harden-AI-subtitle-fix-against-non-SRT-model-responses.md b/backlog/tasks/task-117.1 - Harden-AI-subtitle-fix-against-non-SRT-model-responses.md new file mode 100644 index 0000000..97bc0ad --- /dev/null +++ b/backlog/tasks/task-117.1 - Harden-AI-subtitle-fix-against-non-SRT-model-responses.md @@ -0,0 +1,56 @@ +--- +id: TASK-117.1 +title: Harden AI subtitle fix against non-SRT model responses +status: Done +assignee: + - '@codex' +created_date: '2026-03-08 08:22' +updated_date: '2026-03-08 08:25' +labels: [] +dependencies: [] +references: + - >- + /Users/sudacode/projects/japanese/SubMiner/launcher/youtube/subtitle-fix-ai.ts + - /Users/sudacode/projects/japanese/SubMiner/launcher/youtube/srt.ts + - >- + /Users/sudacode/projects/japanese/SubMiner/launcher/youtube/subtitle-fix-ai.test.ts +parent_task_id: TASK-117 +--- + +## Description + + +Prevent optional YouTube AI subtitle post-processing from bailing out whenever the model returns usable cue text in a non-SRT wrapper or text-only format. The launcher should recover safe cases, preserve original timing, and fall back cleanly when the response cannot be mapped back to the source cues. + + +## Acceptance Criteria + +- [x] #1 AI subtitle fixing accepts safe AI responses that omit SRT framing but still provide one corrected text payload per original cue while preserving original cue timing. +- [x] #2 AI subtitle fixing still rejects responses that cannot be mapped back to the original cue batch without guessing and falls back to the raw subtitle file with a warning. +- [x] #3 Automated tests cover wrapped-SRT and text-only AI responses plus an unrecoverable invalid response case. + + +## Implementation Plan + + +1. Add failing tests in launcher/youtube/subtitle-fix-ai.test.ts for three cases: wrapped valid SRT, text-only one-block-per-cue output, and unrecoverable invalid output. +2. Extend launcher/youtube/subtitle-fix-ai.ts with a small response-normalization path that first strips markdown/code-fence wrappers, then accepts deterministic text-only cue batches only when they map 1:1 to the original cues without changing timestamps. +3. Keep existing safety rules: preserve cue count and timing, log a warning, and fall back to the raw subtitle file when normalization cannot recover a trustworthy batch. +4. Run focused launcher unit tests for subtitle-fix-ai and SRT parsing; expand only if the change affects adjacent behavior. + + +## Implementation Notes + + +Implemented deterministic AI subtitle-response recovery for fenced SRT, embedded SRT payloads, and text-only 1:1 cue batches while preserving original timing and existing fallback behavior. + +Verification: bun test launcher/youtube/*.test.ts passed; bun run typecheck passed; repo-wide format check still reports unrelated pre-existing warnings in launcher/youtube/orchestrator.ts and scripts/build-changelog*. + + +## Final Summary + + +Hardened the launcher AI subtitle-fix path so it can recover deterministic non-SRT model responses instead of immediately falling back. Added `parseAiSubtitleFixResponse` in `launcher/youtube/subtitle-fix-ai.ts` to normalize markdown-fenced or embedded SRT payloads first, then accept text-only responses only when they map 1:1 onto the original cue batch and preserve source timings. Added regression coverage in `launcher/youtube/subtitle-fix-ai.test.ts` for fenced SRT, text-only cue batches, and unrecoverable invalid output, plus a changelog fragment in `changes/task-117.1.md`. + +Verification: `bun test launcher/youtube/*.test.ts`, `bun run typecheck`, `bunx prettier --check launcher/youtube/subtitle-fix-ai.ts launcher/youtube/subtitle-fix-ai.test.ts`, and `bun run changelog:lint` passed. Repo-wide `bun run format:check:src` still reports unrelated pre-existing warnings in `launcher/youtube/orchestrator.ts` and `scripts/build-changelog*`. + diff --git a/changes/task-117.1.md b/changes/task-117.1.md new file mode 100644 index 0000000..fda3f8a --- /dev/null +++ b/changes/task-117.1.md @@ -0,0 +1,4 @@ +type: fixed +area: launcher + +- Hardened YouTube AI subtitle fixing so fenced SRT output and text-only one-cue-per-block responses can still be applied without losing original cue timing. diff --git a/changes/task-130.md b/changes/task-130.md new file mode 100644 index 0000000..8db5270 --- /dev/null +++ b/changes/task-130.md @@ -0,0 +1,5 @@ +type: fixed +area: launcher + +- Keep the background SubMiner process running after a launcher-managed mpv session exits so the next mpv instance can reconnect without restarting the app. +- Reuse prior tokenization readiness after the background app is already warm so reopening a video does not pause again waiting for duplicate warmup completion. diff --git a/launcher/smoke.e2e.test.ts b/launcher/smoke.e2e.test.ts index b68cbcc..14e840f 100644 --- a/launcher/smoke.e2e.test.ts +++ b/launcher/smoke.e2e.test.ts @@ -295,7 +295,7 @@ test('launcher mpv status returns ready when socket is connectable', async () => }); test( - 'launcher start-overlay run forwards socket/backend and stops overlay after mpv exits', + 'launcher start-overlay run forwards socket/backend and keeps background app alive after mpv exits', { timeout: LONG_SMOKE_TEST_TIMEOUT_MS }, async () => { await withSmokeCase('overlay-start-stop', async (smokeCase) => { @@ -310,7 +310,6 @@ test( const appStartPath = path.join(smokeCase.artifactsDir, 'fake-app-start.log'); const appStopPath = path.join(smokeCase.artifactsDir, 'fake-app-stop.log'); await waitForJsonLines(appStartPath, 1); - await waitForJsonLines(appStopPath, 1); const appStartEntries = readJsonLines(appStartPath); const appStopEntries = readJsonLines(appStopPath); @@ -325,7 +324,7 @@ test( assert.match(result.stdout, /Starting SubMiner overlay/i); assert.equal(appStartEntries.length, 1); - assert.equal(appStopEntries.length, 1); + assert.equal(appStopEntries.length, 0); assert.equal(mpvEntries.length >= 1, true); const appStartArgs = appStartEntries[0]?.argv; @@ -337,9 +336,6 @@ test( assert.equal((appStartArgs as string[]).includes(smokeCase.socketPath), true); assert.equal(appStartEntries[0]?.subminerMpvLog, smokeCase.mpvOverlayLogPath); - const appStopArgs = appStopEntries[0]?.argv; - assert.deepEqual(appStopArgs, ['--stop']); - const mpvFirstArgs = mpvEntries[0]?.argv; assert.equal(Array.isArray(mpvFirstArgs), true); assert.equal( diff --git a/launcher/youtube/subtitle-fix-ai.test.ts b/launcher/youtube/subtitle-fix-ai.test.ts new file mode 100644 index 0000000..045cf43 --- /dev/null +++ b/launcher/youtube/subtitle-fix-ai.test.ts @@ -0,0 +1,126 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { applyFixedCueBatch, parseAiSubtitleFixResponse } from './subtitle-fix-ai'; +import { parseSrt } from './srt'; + +test('applyFixedCueBatch accepts content-only fixes with identical timing', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんいちは + +2 +00:00:03,000 --> 00:00:04,000 +世界 +`); + const fixed = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんにちは + +2 +00:00:03,000 --> 00:00:04,000 +世界 +`); + + const merged = applyFixedCueBatch(original, fixed); + assert.equal(merged[0]?.text, 'こんにちは'); +}); + +test('applyFixedCueBatch rejects changed timestamps', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんいちは +`); + const fixed = parseSrt(`1 +00:00:01,100 --> 00:00:02,000 +こんにちは +`); + + assert.throws(() => applyFixedCueBatch(original, fixed), /timestamps/i); +}); + +test('parseAiSubtitleFixResponse accepts valid SRT wrapped in markdown fences', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんいちは + +2 +00:00:03,000 --> 00:00:04,000 +世界 +`); + + const parsed = parseAiSubtitleFixResponse( + original, + '```srt\n1\n00:00:01,000 --> 00:00:02,000\nこんにちは\n\n2\n00:00:03,000 --> 00:00:04,000\n世界\n```', + ); + + assert.equal(parsed[0]?.text, 'こんにちは'); + assert.equal(parsed[1]?.text, '世界'); +}); + +test('parseAiSubtitleFixResponse accepts text-only one-block-per-cue output', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんいちは + +2 +00:00:03,000 --> 00:00:04,000 +世界 +`); + + const parsed = parseAiSubtitleFixResponse( + original, + `こんにちは + +世界`, + ); + + assert.equal(parsed[0]?.start, '00:00:01,000'); + assert.equal(parsed[0]?.text, 'こんにちは'); + assert.equal(parsed[1]?.end, '00:00:04,000'); + assert.equal(parsed[1]?.text, '世界'); +}); + +test('parseAiSubtitleFixResponse rejects unrecoverable text-only output', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんいちは + +2 +00:00:03,000 --> 00:00:04,000 +世界 +`); + + assert.throws( + () => parseAiSubtitleFixResponse(original, 'こんにちは\n世界\n余分です'), + /cue block|cue count/i, + ); +}); + +test('parseAiSubtitleFixResponse rejects language drift for primary Japanese subtitles', () => { + const original = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんにちは + +2 +00:00:03,000 --> 00:00:04,000 +今日はいい天気ですね +`); + + assert.throws( + () => + parseAiSubtitleFixResponse( + original, + `1 +00:00:01,000 --> 00:00:02,000 +Hello + +2 +00:00:03,000 --> 00:00:04,000 +The weather is nice today +`, + 'ja', + ), + /language/i, + ); +}); diff --git a/launcher/youtube/subtitle-fix-ai.ts b/launcher/youtube/subtitle-fix-ai.ts new file mode 100644 index 0000000..df72043 --- /dev/null +++ b/launcher/youtube/subtitle-fix-ai.ts @@ -0,0 +1,213 @@ +import type { LauncherAiConfig } from '../types.js'; +import { requestAiChatCompletion, resolveAiApiKey } from '../../src/ai/client.js'; +import { parseSrt, stringifySrt, type SrtCue } from './srt.js'; + +const DEFAULT_SUBTITLE_FIX_PROMPT = + 'Fix transcription mistakes only. Preserve cue numbering, timestamps, and valid SRT formatting exactly. Return only corrected SRT.'; + +const SRT_BLOCK_PATTERN = + /(?:^|\n)(\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}[\s\S]*)$/; +const CODE_FENCE_PATTERN = /^```(?:\w+)?\s*\n([\s\S]*?)\n```$/; +const JAPANESE_CHAR_PATTERN = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}]/gu; +const LATIN_LETTER_PATTERN = /\p{Script=Latin}/gu; + +export function applyFixedCueBatch(original: SrtCue[], fixed: SrtCue[]): SrtCue[] { + if (original.length !== fixed.length) { + throw new Error('Fixed subtitle batch must preserve cue count.'); + } + + return original.map((cue, index) => { + const nextCue = fixed[index]; + if (!nextCue) { + throw new Error('Missing fixed subtitle cue.'); + } + if (cue.start !== nextCue.start || cue.end !== nextCue.end) { + throw new Error('Fixed subtitle batch must preserve cue timestamps.'); + } + return { + ...cue, + text: nextCue.text, + }; + }); +} + +function chunkCues(cues: SrtCue[], size: number): SrtCue[][] { + const chunks: SrtCue[][] = []; + for (let index = 0; index < cues.length; index += size) { + chunks.push(cues.slice(index, index + size)); + } + return chunks; +} + +function normalizeAiSubtitleFixCandidates(content: string): string[] { + const trimmed = content.replace(/\r\n/g, '\n').trim(); + if (!trimmed) { + return []; + } + + const candidates = new Set([trimmed]); + const fenced = CODE_FENCE_PATTERN.exec(trimmed)?.[1]?.trim(); + if (fenced) { + candidates.add(fenced); + } + + const srtBlock = SRT_BLOCK_PATTERN.exec(trimmed)?.[1]?.trim(); + if (srtBlock) { + candidates.add(srtBlock); + } + + return [...candidates]; +} + +function parseTextOnlyCueBatch(original: SrtCue[], content: string): SrtCue[] { + const paragraphBlocks = content + .split(/\n{2,}/) + .map((block) => block.trim()) + .filter((block) => block.length > 0); + if (paragraphBlocks.length === original.length) { + return original.map((cue, index) => ({ + ...cue, + text: paragraphBlocks[index]!, + })); + } + + const lineBlocks = content + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.length > 0); + if (lineBlocks.length === original.length) { + return original.map((cue, index) => ({ + ...cue, + text: lineBlocks[index]!, + })); + } + + throw new Error('Fixed subtitle batch must preserve cue count.'); +} + +function countPatternMatches(content: string, pattern: RegExp): number { + pattern.lastIndex = 0; + return [...content.matchAll(pattern)].length; +} + +function isJapaneseLanguageCode(language: string | undefined): boolean { + if (!language) return false; + const normalized = language.trim().toLowerCase(); + return normalized === 'ja' || normalized === 'jp' || normalized === 'jpn'; +} + +function validateExpectedLanguage( + original: SrtCue[], + fixed: SrtCue[], + expectedLanguage: string | undefined, +): void { + if (!isJapaneseLanguageCode(expectedLanguage)) return; + + const originalText = original.map((cue) => cue.text).join('\n'); + const fixedText = fixed.map((cue) => cue.text).join('\n'); + const originalJapaneseChars = countPatternMatches(originalText, JAPANESE_CHAR_PATTERN); + if (originalJapaneseChars < 4) return; + + const fixedJapaneseChars = countPatternMatches(fixedText, JAPANESE_CHAR_PATTERN); + const fixedLatinLetters = countPatternMatches(fixedText, LATIN_LETTER_PATTERN); + if (fixedJapaneseChars === 0 && fixedLatinLetters >= 4) { + throw new Error('Fixed subtitle batch changed language away from expected Japanese.'); + } +} + +export function parseAiSubtitleFixResponse( + original: SrtCue[], + content: string, + expectedLanguage?: string, +): SrtCue[] { + const candidates = normalizeAiSubtitleFixCandidates(content); + let lastError: Error | null = null; + + for (const candidate of candidates) { + try { + const parsed = parseSrt(candidate); + validateExpectedLanguage(original, parsed, expectedLanguage); + return parsed; + } catch (error) { + lastError = error as Error; + } + } + + for (const candidate of candidates) { + try { + const parsed = parseTextOnlyCueBatch(original, candidate); + validateExpectedLanguage(original, parsed, expectedLanguage); + return parsed; + } catch (error) { + lastError = error as Error; + } + } + + throw lastError ?? new Error('AI subtitle fix returned empty content.'); +} + +export async function fixSubtitleWithAi( + subtitleContent: string, + aiConfig: LauncherAiConfig, + logWarning: (message: string) => void, + expectedLanguage?: string, +): Promise { + if (aiConfig.enabled !== true) { + return null; + } + + const apiKey = await resolveAiApiKey(aiConfig); + if (!apiKey) { + return null; + } + + const cues = parseSrt(subtitleContent); + if (cues.length === 0) { + return null; + } + + const fixedChunks: SrtCue[] = []; + for (const chunk of chunkCues(cues, 25)) { + const fixedContent = await requestAiChatCompletion( + { + apiKey, + baseUrl: aiConfig.baseUrl, + model: aiConfig.model, + timeoutMs: aiConfig.requestTimeoutMs, + messages: [ + { + role: 'system', + content: aiConfig.systemPrompt?.trim() || DEFAULT_SUBTITLE_FIX_PROMPT, + }, + { + role: 'user', + content: stringifySrt(chunk), + }, + ], + }, + { + logWarning, + }, + ); + if (!fixedContent) { + return null; + } + + let parsedFixed: SrtCue[]; + try { + parsedFixed = parseAiSubtitleFixResponse(chunk, fixedContent, expectedLanguage); + } catch (error) { + logWarning(`AI subtitle fix returned invalid SRT: ${(error as Error).message}`); + return null; + } + + try { + fixedChunks.push(...applyFixedCueBatch(chunk, parsedFixed)); + } catch (error) { + logWarning(`AI subtitle fix validation failed: ${(error as Error).message}`); + return null; + } + } + + return stringifySrt(fixedChunks); +} diff --git a/plugin/subminer/lifecycle.lua b/plugin/subminer/lifecycle.lua index 069ae33..721724b 100644 --- a/plugin/subminer/lifecycle.lua +++ b/plugin/subminer/lifecycle.lua @@ -62,9 +62,7 @@ function M.create(ctx) hover.clear_hover_overlay() process.disarm_auto_play_ready_gate() if state.overlay_running or state.texthooker_running then - subminer_log("info", "lifecycle", "mpv shutting down, stopping SubMiner process") - show_osd("Shutting down...") - process.stop_overlay() + subminer_log("info", "lifecycle", "mpv shutting down, preserving SubMiner background process") end end diff --git a/scripts/get-mpv-window-macos.swift b/scripts/get-mpv-window-macos.swift index 2353256..0073a34 100644 --- a/scripts/get-mpv-window-macos.swift +++ b/scripts/get-mpv-window-macos.swift @@ -20,6 +20,11 @@ private struct WindowGeometry { let height: Int } +private struct WindowState { + let geometry: WindowGeometry + let focused: Bool +} + private let targetMpvSocketPath: String? = { guard CommandLine.arguments.count > 1 else { return nil @@ -136,7 +141,11 @@ private func geometryFromAXWindow(_ axWindow: AXUIElement) -> WindowGeometry? { return geometry } -private func geometryFromAccessibilityAPI() -> WindowGeometry? { +private func frontmostApplicationPid() -> pid_t? { + NSWorkspace.shared.frontmostApplication?.processIdentifier +} + +private func windowStateFromAccessibilityAPI() -> WindowState? { let runningApps = NSWorkspace.shared.runningApplications.filter { app in guard let name = app.localizedName else { return false @@ -144,6 +153,8 @@ private func geometryFromAccessibilityAPI() -> WindowGeometry? { return normalizedMpvName(name) } + let frontmostPid = frontmostApplicationPid() + for app in runningApps { let appElement = AXUIElementCreateApplication(app.processIdentifier) if !windowHasTargetSocket(app.processIdentifier) { @@ -173,7 +184,10 @@ private func geometryFromAccessibilityAPI() -> WindowGeometry? { } if let geometry = geometryFromAXWindow(window) { - return geometry + return WindowState( + geometry: geometry, + focused: frontmostPid == windowPid + ) } } } @@ -181,11 +195,12 @@ private func geometryFromAccessibilityAPI() -> WindowGeometry? { return nil } -private func geometryFromCoreGraphics() -> WindowGeometry? { +private func windowStateFromCoreGraphics() -> WindowState? { // Keep the CG fallback for environments without Accessibility permissions. // Use on-screen layer-0 windows to avoid off-screen helpers/shadows. let options: CGWindowListOption = [.optionOnScreenOnly, .excludeDesktopElements] let windowList = CGWindowListCopyWindowInfo(options, kCGNullWindowID) as? [[String: Any]] ?? [] + let frontmostPid = frontmostApplicationPid() for window in windowList { guard let ownerName = window[kCGWindowOwnerName as String] as? String, @@ -226,14 +241,19 @@ private func geometryFromCoreGraphics() -> WindowGeometry? { continue } - return geometry + return WindowState( + geometry: geometry, + focused: frontmostPid == ownerPid + ) } return nil } -if let window = geometryFromAccessibilityAPI() ?? geometryFromCoreGraphics() { - print("\(window.x),\(window.y),\(window.width),\(window.height)") +if let window = windowStateFromAccessibilityAPI() ?? windowStateFromCoreGraphics() { + print( + "\(window.geometry.x),\(window.geometry.y),\(window.geometry.width),\(window.geometry.height),\(window.focused ? 1 : 0)" + ) } else { print("not-found") } diff --git a/src/core/services/cli-command.test.ts b/src/core/services/cli-command.test.ts index 9700a5d..58dec2b 100644 --- a/src/core/services/cli-command.test.ts +++ b/src/core/services/cli-command.test.ts @@ -201,7 +201,7 @@ function createDeps(overrides: Partial = {}) { return { deps, calls, osd }; } -test('handleCliCommand ignores --start for second-instance when overlay runtime is already initialized', () => { +test('handleCliCommand reconnects MPV for second-instance --start when overlay runtime is already initialized', () => { const { deps, calls } = createDeps({ isOverlayRuntimeInitialized: () => true, }); @@ -209,11 +209,9 @@ test('handleCliCommand ignores --start for second-instance when overlay runtime handleCliCommand(args, 'second-instance', deps); - assert.ok(calls.includes('log:Ignoring --start because SubMiner is already running.')); - assert.equal( - calls.some((value) => value.includes('connectMpvClient')), - false, - ); + assert.ok(calls.includes('setMpvClientSocketPath:/tmp/subminer.sock')); + assert.equal(calls.some((value) => value.includes('connectMpvClient')), true); + assert.equal(calls.some((value) => value.includes('initializeOverlayRuntime')), false); }); test('handleCliCommand processes --start for second-instance when overlay runtime is not initialized', () => { diff --git a/src/core/services/cli-command.ts b/src/core/services/cli-command.ts index ed5acd0..05a91b5 100644 --- a/src/core/services/cli-command.ts +++ b/src/core/services/cli-command.ts @@ -259,10 +259,9 @@ export function handleCliCommand( deps.setLogLevel?.(args.logLevel); } - const ignoreSecondInstanceStart = + const reuseSecondInstanceStart = source === 'second-instance' && args.start && deps.isOverlayRuntimeInitialized(); - const shouldStart = - (!ignoreSecondInstanceStart && args.start) || args.toggle || args.toggleVisibleOverlay; + const shouldStart = args.start || args.toggle || args.toggleVisibleOverlay; const needsOverlayRuntime = commandNeedsOverlayRuntime(args); const shouldInitializeOverlayRuntime = needsOverlayRuntime || args.start; @@ -285,8 +284,8 @@ export function handleCliCommand( return; } - if (ignoreSecondInstanceStart) { - deps.log('Ignoring --start because SubMiner is already running.'); + if (reuseSecondInstanceStart) { + deps.log('Reusing running SubMiner instance for --start.'); } if (shouldInitializeOverlayRuntime && !deps.isOverlayRuntimeInitialized()) { diff --git a/src/core/services/overlay-shortcut-handler.test.ts b/src/core/services/overlay-shortcut-handler.test.ts index 6570138..8db2f9a 100644 --- a/src/core/services/overlay-shortcut-handler.test.ts +++ b/src/core/services/overlay-shortcut-handler.test.ts @@ -6,6 +6,7 @@ import { OverlayShortcutRuntimeDeps, runOverlayShortcutLocalFallback, } from './overlay-shortcut-handler'; +import { shouldActivateOverlayShortcuts } from './overlay-shortcut'; function makeShortcuts(overrides: Partial = {}): ConfiguredShortcuts { return { @@ -279,3 +280,36 @@ test('runOverlayShortcutLocalFallback returns false when no action matches', () assert.equal(result, false); assert.equal(called, false); }); + +test('shouldActivateOverlayShortcuts disables macOS overlay shortcuts when tracked mpv is unfocused', () => { + assert.equal( + shouldActivateOverlayShortcuts({ + overlayRuntimeInitialized: true, + isMacOSPlatform: true, + trackedMpvWindowFocused: false, + }), + false, + ); +}); + +test('shouldActivateOverlayShortcuts keeps macOS overlay shortcuts active when tracked mpv is focused', () => { + assert.equal( + shouldActivateOverlayShortcuts({ + overlayRuntimeInitialized: true, + isMacOSPlatform: true, + trackedMpvWindowFocused: true, + }), + true, + ); +}); + +test('shouldActivateOverlayShortcuts preserves non-macOS behavior', () => { + assert.equal( + shouldActivateOverlayShortcuts({ + overlayRuntimeInitialized: true, + isMacOSPlatform: false, + trackedMpvWindowFocused: false, + }), + true, + ); +}); diff --git a/src/core/services/overlay-shortcut.ts b/src/core/services/overlay-shortcut.ts index 4c566ef..09ea8f1 100644 --- a/src/core/services/overlay-shortcut.ts +++ b/src/core/services/overlay-shortcut.ts @@ -27,6 +27,20 @@ export interface OverlayShortcutLifecycleDeps { cancelPendingMineSentenceMultiple: () => void; } +export function shouldActivateOverlayShortcuts(args: { + overlayRuntimeInitialized: boolean; + isMacOSPlatform: boolean; + trackedMpvWindowFocused: boolean; +}): boolean { + if (!args.overlayRuntimeInitialized) { + return false; + } + if (!args.isMacOSPlatform) { + return true; + } + return args.trackedMpvWindowFocused; +} + export function registerOverlayShortcuts( shortcuts: ConfiguredShortcuts, handlers: OverlayShortcutHandlers, diff --git a/src/main/overlay-shortcuts-runtime.ts b/src/main/overlay-shortcuts-runtime.ts index a041c60..756fb59 100644 --- a/src/main/overlay-shortcuts-runtime.ts +++ b/src/main/overlay-shortcuts-runtime.ts @@ -6,9 +6,10 @@ import { import { refreshOverlayShortcutsRuntime, registerOverlayShortcuts, + shouldActivateOverlayShortcuts, syncOverlayShortcutsRuntime, unregisterOverlayShortcutsRuntime, -} from '../core/services'; +} from '../core/services/overlay-shortcut'; import { runOverlayShortcutLocalFallback } from '../core/services/overlay-shortcut-handler'; export interface OverlayShortcutRuntimeServiceInput { @@ -16,6 +17,8 @@ export interface OverlayShortcutRuntimeServiceInput { getShortcutsRegistered: () => boolean; setShortcutsRegistered: (registered: boolean) => void; isOverlayRuntimeInitialized: () => boolean; + isMacOSPlatform: () => boolean; + isTrackedMpvWindowFocused: () => boolean; showMpvOsd: (text: string) => void; openRuntimeOptionsPalette: () => void; openJimaku: () => void; @@ -89,7 +92,12 @@ export function createOverlayShortcutsRuntimeService( }; }; - const shouldOverlayShortcutsBeActive = () => input.isOverlayRuntimeInitialized(); + const shouldOverlayShortcutsBeActive = () => + shouldActivateOverlayShortcuts({ + overlayRuntimeInitialized: input.isOverlayRuntimeInitialized(), + isMacOSPlatform: input.isMacOSPlatform(), + trackedMpvWindowFocused: input.isTrackedMpvWindowFocused(), + }); return { tryHandleOverlayShortcutLocalFallback: (inputEvent) => diff --git a/src/main/runtime/mpv-main-event-actions.test.ts b/src/main/runtime/mpv-main-event-actions.test.ts index b35c86b..ed818ab 100644 --- a/src/main/runtime/mpv-main-event-actions.test.ts +++ b/src/main/runtime/mpv-main-event-actions.test.ts @@ -58,6 +58,7 @@ test('media path change handler reports stop for empty path and probes media key ensureAnilistMediaGuess: (mediaKey) => calls.push(`guess:${mediaKey}`), syncImmersionMediaState: () => calls.push('sync'), scheduleCharacterDictionarySync: () => calls.push('dict-sync'), + signalAutoplayReadyIfWarm: (path) => calls.push(`autoplay:${path}`), refreshDiscordPresence: () => calls.push('presence'), }); @@ -74,6 +75,34 @@ test('media path change handler reports stop for empty path and probes media key ]); }); +test('media path change handler signals autoplay-ready fast path for warm non-empty media', () => { + const calls: string[] = []; + const handler = createHandleMpvMediaPathChangeHandler({ + updateCurrentMediaPath: (path) => calls.push(`path:${path}`), + reportJellyfinRemoteStopped: () => calls.push('stopped'), + restoreMpvSubVisibility: () => calls.push('restore-mpv-sub'), + getCurrentAnilistMediaKey: () => null, + resetAnilistMediaTracking: (mediaKey) => calls.push(`reset:${String(mediaKey)}`), + maybeProbeAnilistDuration: (mediaKey) => calls.push(`probe:${mediaKey}`), + ensureAnilistMediaGuess: (mediaKey) => calls.push(`guess:${mediaKey}`), + syncImmersionMediaState: () => calls.push('sync'), + scheduleCharacterDictionarySync: () => calls.push('dict-sync'), + signalAutoplayReadyIfWarm: (path) => calls.push(`autoplay:${path}`), + refreshDiscordPresence: () => calls.push('presence'), + }); + + handler({ path: '/tmp/video.mkv' }); + + assert.deepEqual(calls, [ + 'path:/tmp/video.mkv', + 'reset:null', + 'sync', + 'dict-sync', + 'autoplay:/tmp/video.mkv', + 'presence', + ]); +}); + test('media title change handler clears guess state and syncs immersion', () => { const calls: string[] = []; const handler = createHandleMpvMediaTitleChangeHandler({ diff --git a/src/main/runtime/mpv-main-event-actions.ts b/src/main/runtime/mpv-main-event-actions.ts index ed61f55..14cf793 100644 --- a/src/main/runtime/mpv-main-event-actions.ts +++ b/src/main/runtime/mpv-main-event-actions.ts @@ -40,6 +40,7 @@ export function createHandleMpvMediaPathChangeHandler(deps: { ensureAnilistMediaGuess: (mediaKey: string) => void; syncImmersionMediaState: () => void; scheduleCharacterDictionarySync?: () => void; + signalAutoplayReadyIfWarm?: (path: string) => void; refreshDiscordPresence: () => void; }) { return ({ path }: { path: string | null }): void => { @@ -58,6 +59,7 @@ export function createHandleMpvMediaPathChangeHandler(deps: { deps.syncImmersionMediaState(); if (normalizedPath.trim().length > 0) { deps.scheduleCharacterDictionarySync?.(); + deps.signalAutoplayReadyIfWarm?.(normalizedPath); } deps.refreshDiscordPresence(); }; diff --git a/src/main/runtime/mpv-main-event-bindings.ts b/src/main/runtime/mpv-main-event-bindings.ts index 081ab7c..ba7e678 100644 --- a/src/main/runtime/mpv-main-event-bindings.ts +++ b/src/main/runtime/mpv-main-event-bindings.ts @@ -50,6 +50,7 @@ export function createBindMpvMainEventHandlersHandler(deps: { maybeProbeAnilistDuration: (mediaKey: string) => void; ensureAnilistMediaGuess: (mediaKey: string) => void; syncImmersionMediaState: () => void; + signalAutoplayReadyIfWarm?: (path: string) => void; updateCurrentMediaTitle: (title: string) => void; resetAnilistMediaGuessState: () => void; @@ -105,6 +106,7 @@ export function createBindMpvMainEventHandlersHandler(deps: { maybeProbeAnilistDuration: (mediaKey) => deps.maybeProbeAnilistDuration(mediaKey), ensureAnilistMediaGuess: (mediaKey) => deps.ensureAnilistMediaGuess(mediaKey), syncImmersionMediaState: () => deps.syncImmersionMediaState(), + signalAutoplayReadyIfWarm: (path) => deps.signalAutoplayReadyIfWarm?.(path), scheduleCharacterDictionarySync: () => deps.scheduleCharacterDictionarySync?.(), refreshDiscordPresence: () => deps.refreshDiscordPresence(), }); diff --git a/src/main/runtime/mpv-main-event-main-deps.ts b/src/main/runtime/mpv-main-event-main-deps.ts index 7cad89d..18e21c1 100644 --- a/src/main/runtime/mpv-main-event-main-deps.ts +++ b/src/main/runtime/mpv-main-event-main-deps.ts @@ -33,6 +33,7 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: { maybeProbeAnilistDuration: (mediaKey: string) => void; ensureAnilistMediaGuess: (mediaKey: string) => void; syncImmersionMediaState: () => void; + signalAutoplayReadyIfWarm?: (path: string) => void; scheduleCharacterDictionarySync?: () => void; updateCurrentMediaTitle: (title: string) => void; resetAnilistMediaGuessState: () => void; @@ -82,6 +83,7 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: { maybeProbeAnilistDuration: (mediaKey: string) => deps.maybeProbeAnilistDuration(mediaKey), ensureAnilistMediaGuess: (mediaKey: string) => deps.ensureAnilistMediaGuess(mediaKey), syncImmersionMediaState: () => deps.syncImmersionMediaState(), + signalAutoplayReadyIfWarm: (path: string) => deps.signalAutoplayReadyIfWarm?.(path), scheduleCharacterDictionarySync: () => deps.scheduleCharacterDictionarySync?.(), updateCurrentMediaTitle: (title: string) => deps.updateCurrentMediaTitle(title), resetAnilistMediaGuessState: () => deps.resetAnilistMediaGuessState(), diff --git a/src/main/runtime/overlay-shortcuts-runtime-main-deps.test.ts b/src/main/runtime/overlay-shortcuts-runtime-main-deps.test.ts index aa14001..1287271 100644 --- a/src/main/runtime/overlay-shortcuts-runtime-main-deps.test.ts +++ b/src/main/runtime/overlay-shortcuts-runtime-main-deps.test.ts @@ -13,6 +13,8 @@ test('overlay shortcuts runtime main deps builder maps lifecycle and action call calls.push(`registered:${registered}`); }, isOverlayRuntimeInitialized: () => true, + isMacOSPlatform: () => true, + isTrackedMpvWindowFocused: () => false, showMpvOsd: (text) => calls.push(`osd:${text}`), openRuntimeOptionsPalette: () => calls.push('runtime-options'), openJimaku: () => calls.push('jimaku'), @@ -40,6 +42,8 @@ test('overlay shortcuts runtime main deps builder maps lifecycle and action call })(); assert.equal(deps.isOverlayRuntimeInitialized(), true); + assert.equal(deps.isMacOSPlatform(), true); + assert.equal(deps.isTrackedMpvWindowFocused(), false); assert.equal(deps.getShortcutsRegistered(), false); deps.setShortcutsRegistered(true); assert.equal(shortcutsRegistered, true); diff --git a/src/main/runtime/overlay-shortcuts-runtime-main-deps.ts b/src/main/runtime/overlay-shortcuts-runtime-main-deps.ts index ac0dfa3..5915fe4 100644 --- a/src/main/runtime/overlay-shortcuts-runtime-main-deps.ts +++ b/src/main/runtime/overlay-shortcuts-runtime-main-deps.ts @@ -8,6 +8,8 @@ export function createBuildOverlayShortcutsRuntimeMainDepsHandler( getShortcutsRegistered: () => deps.getShortcutsRegistered(), setShortcutsRegistered: (registered: boolean) => deps.setShortcutsRegistered(registered), isOverlayRuntimeInitialized: () => deps.isOverlayRuntimeInitialized(), + isMacOSPlatform: () => deps.isMacOSPlatform(), + isTrackedMpvWindowFocused: () => deps.isTrackedMpvWindowFocused(), showMpvOsd: (text: string) => deps.showMpvOsd(text), openRuntimeOptionsPalette: () => deps.openRuntimeOptionsPalette(), openJimaku: () => deps.openJimaku(), diff --git a/src/window-trackers/base-tracker.ts b/src/window-trackers/base-tracker.ts index dcc7254..5778a80 100644 --- a/src/window-trackers/base-tracker.ts +++ b/src/window-trackers/base-tracker.ts @@ -21,13 +21,17 @@ import { WindowGeometry } from '../types'; export type GeometryChangeCallback = (geometry: WindowGeometry) => void; export type WindowFoundCallback = (geometry: WindowGeometry) => void; export type WindowLostCallback = () => void; +export type WindowFocusChangeCallback = (focused: boolean) => void; export abstract class BaseWindowTracker { protected currentGeometry: WindowGeometry | null = null; protected windowFound: boolean = false; + protected focusKnown: boolean = false; + protected windowFocused: boolean = false; public onGeometryChange: GeometryChangeCallback | null = null; public onWindowFound: WindowFoundCallback | null = null; public onWindowLost: WindowLostCallback | null = null; + public onWindowFocusChange: WindowFocusChangeCallback | null = null; abstract start(): void; abstract stop(): void; @@ -40,6 +44,19 @@ export abstract class BaseWindowTracker { return this.windowFound; } + isFocused(): boolean { + return this.focusKnown ? this.windowFocused : this.windowFound; + } + + protected updateFocus(focused: boolean): void { + const changed = !this.focusKnown || this.windowFocused !== focused; + this.focusKnown = true; + this.windowFocused = focused; + if (changed) { + this.onWindowFocusChange?.(focused); + } + } + protected updateGeometry(newGeometry: WindowGeometry | null): void { if (newGeometry) { if (!this.windowFound) { @@ -58,6 +75,12 @@ export abstract class BaseWindowTracker { if (this.onGeometryChange) this.onGeometryChange(newGeometry); } } else { + const focusChanged = this.focusKnown && this.windowFocused; + this.focusKnown = false; + this.windowFocused = false; + if (focusChanged) { + this.onWindowFocusChange?.(false); + } if (this.windowFound) { this.windowFound = false; this.currentGeometry = null; diff --git a/src/window-trackers/macos-tracker.ts b/src/window-trackers/macos-tracker.ts index 13b9272..cf4e798 100644 --- a/src/window-trackers/macos-tracker.ts +++ b/src/window-trackers/macos-tracker.ts @@ -22,9 +22,56 @@ import * as fs from 'fs'; import * as os from 'os'; import { BaseWindowTracker } from './base-tracker'; import { createLogger } from '../logger'; +import type { WindowGeometry } from '../types'; const log = createLogger('tracker').child('macos'); +export interface MacOSHelperWindowState { + geometry: WindowGeometry; + focused: boolean; +} + +export function parseMacOSHelperOutput(result: string): MacOSHelperWindowState | null { + const trimmed = result.trim(); + if (!trimmed || trimmed === 'not-found') { + return null; + } + + const parts = trimmed.split(','); + if (parts.length !== 4 && parts.length !== 5) { + return null; + } + + const x = parseInt(parts[0]!, 10); + const y = parseInt(parts[1]!, 10); + const width = parseInt(parts[2]!, 10); + const height = parseInt(parts[3]!, 10); + if ( + !Number.isFinite(x) || + !Number.isFinite(y) || + !Number.isFinite(width) || + !Number.isFinite(height) || + width <= 0 || + height <= 0 + ) { + return null; + } + + const focusedRaw = parts[4]?.trim().toLowerCase(); + const focused = + focusedRaw === undefined ? true : focusedRaw === '1' || focusedRaw === 'true'; + + return { + geometry: { + x, + y, + width, + height, + }, + focused, + }; +} + export class MacOSWindowTracker extends BaseWindowTracker { private pollInterval: ReturnType | null = null; private pollInFlight = false; @@ -173,33 +220,12 @@ export class MacOSWindowTracker extends BaseWindowTracker { return; } - const result = (stdout || '').trim(); - if (result && result !== 'not-found') { - const parts = result.split(','); - if (parts.length === 4) { - const x = parseInt(parts[0]!, 10); - const y = parseInt(parts[1]!, 10); - const width = parseInt(parts[2]!, 10); - const height = parseInt(parts[3]!, 10); - - if ( - Number.isFinite(x) && - Number.isFinite(y) && - Number.isFinite(width) && - Number.isFinite(height) && - width > 0 && - height > 0 - ) { - this.updateGeometry({ - x, - y, - width, - height, - }); - this.pollInFlight = false; - return; - } - } + const parsed = parseMacOSHelperOutput(stdout || ''); + if (parsed) { + this.updateFocus(parsed.focused); + this.updateGeometry(parsed.geometry); + this.pollInFlight = false; + return; } this.updateGeometry(null); diff --git a/src/window-trackers/x11-tracker.test.ts b/src/window-trackers/x11-tracker.test.ts index 0d1c42b..855e278 100644 --- a/src/window-trackers/x11-tracker.test.ts +++ b/src/window-trackers/x11-tracker.test.ts @@ -1,6 +1,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { parseX11WindowGeometry, parseX11WindowPid, X11WindowTracker } from './x11-tracker'; +import { parseMacOSHelperOutput } from './macos-tracker'; test('parseX11WindowGeometry parses xwininfo output', () => { const geometry = parseX11WindowGeometry(` @@ -52,3 +53,27 @@ Height: 360`; release(); await new Promise((resolve) => setTimeout(resolve, 0)); }); + +test('parseMacOSHelperOutput parses geometry and focused state', () => { + assert.deepEqual(parseMacOSHelperOutput('120,240,1280,720,1'), { + geometry: { + x: 120, + y: 240, + width: 1280, + height: 720, + }, + focused: true, + }); +}); + +test('parseMacOSHelperOutput tolerates unfocused helper output', () => { + assert.deepEqual(parseMacOSHelperOutput('120,240,1280,720,0'), { + geometry: { + x: 120, + y: 240, + width: 1280, + height: 720, + }, + focused: false, + }); +});