From f89aec31e8c585b5480f6c13b9dda420e5b7d99f Mon Sep 17 00:00:00 2001 From: sudacode Date: Sun, 15 Mar 2026 13:04:26 -0700 Subject: [PATCH] feat: add subtitle prefetch service with priority window Implements background tokenization of upcoming subtitle cues with a configurable priority window. Supports stop, pause/resume, seek re-prioritization, and cache-full stopping condition. --- src/core/services/subtitle-prefetch.test.ts | 142 ++++++++++++++++++ src/core/services/subtitle-prefetch.ts | 152 ++++++++++++++++++++ 2 files changed, 294 insertions(+) create mode 100644 src/core/services/subtitle-prefetch.test.ts create mode 100644 src/core/services/subtitle-prefetch.ts diff --git a/src/core/services/subtitle-prefetch.test.ts b/src/core/services/subtitle-prefetch.test.ts new file mode 100644 index 0000000..4f8b202 --- /dev/null +++ b/src/core/services/subtitle-prefetch.test.ts @@ -0,0 +1,142 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { + computePriorityWindow, + createSubtitlePrefetchService, +} from './subtitle-prefetch'; +import type { SubtitleCue } from './subtitle-cue-parser'; +import type { SubtitleData } from '../../types'; + +function makeCues(count: number, startOffset = 0): SubtitleCue[] { + return Array.from({ length: count }, (_, i) => ({ + startTime: startOffset + i * 5, + endTime: startOffset + i * 5 + 4, + text: `line-${i}`, + })); +} + +test('computePriorityWindow returns next N cues from current position', () => { + const cues = makeCues(20); + const window = computePriorityWindow(cues, 12.0, 5); + + assert.equal(window.length, 5); + // Position 12.0 is during cue index 2 (start=10, end=14). Priority window starts from index 3. + assert.equal(window[0]!.text, 'line-3'); + assert.equal(window[4]!.text, 'line-7'); +}); + +test('computePriorityWindow clamps to remaining cues at end of file', () => { + const cues = makeCues(5); + const window = computePriorityWindow(cues, 18.0, 10); + + // Position 18.0 is during cue 3 (start=15). Only cue 4 is ahead. + assert.equal(window.length, 1); + assert.equal(window[0]!.text, 'line-4'); +}); + +test('computePriorityWindow returns empty when past all cues', () => { + const cues = makeCues(3); + const window = computePriorityWindow(cues, 999.0, 10); + assert.equal(window.length, 0); +}); + +test('computePriorityWindow at position 0 returns first N cues', () => { + const cues = makeCues(20); + const window = computePriorityWindow(cues, 0, 5); + + assert.equal(window.length, 5); + assert.equal(window[0]!.text, 'line-0'); +}); + +function flushMicrotasks(): Promise { + return new Promise((resolve) => setTimeout(resolve, 0)); +} + +test('prefetch service tokenizes priority window cues and caches them', async () => { + const cues = makeCues(20); + const cached: Map = new Map(); + let tokenizeCalls = 0; + + const service = createSubtitlePrefetchService({ + cues, + tokenizeSubtitle: async (text) => { + tokenizeCalls += 1; + return { text, tokens: [] }; + }, + preCacheTokenization: (text, data) => { + cached.set(text, data); + }, + isCacheFull: () => false, + priorityWindowSize: 3, + }); + + service.start(0); + // Allow all async tokenization to complete + for (let i = 0; i < 25; i += 1) { + await flushMicrotasks(); + } + service.stop(); + + // Priority window (first 3) should be cached + assert.ok(cached.has('line-0')); + assert.ok(cached.has('line-1')); + assert.ok(cached.has('line-2')); +}); + +test('prefetch service stops when cache is full', async () => { + const cues = makeCues(20); + let tokenizeCalls = 0; + let cacheSize = 0; + + const service = createSubtitlePrefetchService({ + cues, + tokenizeSubtitle: async (text) => { + tokenizeCalls += 1; + return { text, tokens: [] }; + }, + preCacheTokenization: () => { + cacheSize += 1; + }, + isCacheFull: () => cacheSize >= 5, + priorityWindowSize: 3, + }); + + service.start(0); + for (let i = 0; i < 30; i += 1) { + await flushMicrotasks(); + } + service.stop(); + + // Should have stopped at 5 (cache full), not tokenized all 20 + assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`); +}); + +test('prefetch service can be stopped mid-flight', async () => { + const cues = makeCues(100); + let tokenizeCalls = 0; + + const service = createSubtitlePrefetchService({ + cues, + tokenizeSubtitle: async (text) => { + tokenizeCalls += 1; + return { text, tokens: [] }; + }, + preCacheTokenization: () => {}, + isCacheFull: () => false, + priorityWindowSize: 3, + }); + + service.start(0); + await flushMicrotasks(); + await flushMicrotasks(); + service.stop(); + const callsAtStop = tokenizeCalls; + + // Wait more to confirm no further calls + for (let i = 0; i < 10; i += 1) { + await flushMicrotasks(); + } + + assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop'); + assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues'); +}); diff --git a/src/core/services/subtitle-prefetch.ts b/src/core/services/subtitle-prefetch.ts new file mode 100644 index 0000000..cf6d479 --- /dev/null +++ b/src/core/services/subtitle-prefetch.ts @@ -0,0 +1,152 @@ +import type { SubtitleCue } from './subtitle-cue-parser'; +import type { SubtitleData } from '../../types'; + +export interface SubtitlePrefetchServiceDeps { + cues: SubtitleCue[]; + tokenizeSubtitle: (text: string) => Promise; + preCacheTokenization: (text: string, data: SubtitleData) => void; + isCacheFull: () => boolean; + priorityWindowSize?: number; +} + +export interface SubtitlePrefetchService { + start: (currentTimeSeconds: number) => void; + stop: () => void; + onSeek: (newTimeSeconds: number) => void; + pause: () => void; + resume: () => void; +} + +const DEFAULT_PRIORITY_WINDOW_SIZE = 10; + +export function computePriorityWindow( + cues: SubtitleCue[], + currentTimeSeconds: number, + windowSize: number, +): SubtitleCue[] { + if (cues.length === 0) { + return []; + } + + // Find the first cue whose start time is >= current position. + // This includes cues that start exactly at the current time (they haven't + // been displayed yet and should be prefetched). + let startIndex = -1; + for (let i = 0; i < cues.length; i += 1) { + if (cues[i]!.startTime >= currentTimeSeconds) { + startIndex = i; + break; + } + } + + if (startIndex < 0) { + // All cues are before current time + return []; + } + + return cues.slice(startIndex, startIndex + windowSize); +} + +export function createSubtitlePrefetchService( + deps: SubtitlePrefetchServiceDeps, +): SubtitlePrefetchService { + const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE; + let stopped = true; + let paused = false; + let currentRunId = 0; + + async function tokenizeCueList( + cuesToProcess: SubtitleCue[], + runId: number, + ): Promise { + for (const cue of cuesToProcess) { + if (stopped || runId !== currentRunId) { + return; + } + + // Wait while paused + while (paused && !stopped && runId === currentRunId) { + await new Promise((resolve) => setTimeout(resolve, 10)); + } + + if (stopped || runId !== currentRunId) { + return; + } + + if (deps.isCacheFull()) { + return; + } + + try { + const result = await deps.tokenizeSubtitle(cue.text); + if (result && !stopped && runId === currentRunId) { + deps.preCacheTokenization(cue.text, result); + } + } catch { + // Skip failed cues, continue prefetching + } + + // Yield to allow live processing to take priority + await new Promise((resolve) => setTimeout(resolve, 0)); + } + } + + async function startPrefetching(currentTimeSeconds: number, runId: number): Promise { + const cues = deps.cues; + + // Phase 1: Priority window + const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize); + await tokenizeCueList(priorityCues, runId); + + if (stopped || runId !== currentRunId) { + return; + } + + // Phase 2: Background - remaining cues forward from current position + const priorityTexts = new Set(priorityCues.map((c) => c.text)); + const remainingCues = cues.filter( + (cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text), + ); + await tokenizeCueList(remainingCues, runId); + + if (stopped || runId !== currentRunId) { + return; + } + + // Phase 3: Background - earlier cues (for rewind support) + const earlierCues = cues.filter( + (cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text), + ); + await tokenizeCueList(earlierCues, runId); + } + + return { + start(currentTimeSeconds: number) { + stopped = false; + paused = false; + currentRunId += 1; + const runId = currentRunId; + void startPrefetching(currentTimeSeconds, runId); + }, + + stop() { + stopped = true; + currentRunId += 1; + }, + + onSeek(newTimeSeconds: number) { + // Cancel current run and restart from new position + currentRunId += 1; + const runId = currentRunId; + void startPrefetching(newTimeSeconds, runId); + }, + + pause() { + paused = true; + }, + + resume() { + paused = false; + }, + }; +}