feat: add subtitle prefetch service with priority window

Implements background tokenization of upcoming subtitle cues with a
configurable priority window. Supports stop, pause/resume, seek
re-prioritization, and cache-full stopping condition.
This commit is contained in:
2026-03-15 13:04:26 -07:00
parent 6cf0272e7e
commit f89aec31e8
2 changed files with 294 additions and 0 deletions

View File

@@ -0,0 +1,142 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
computePriorityWindow,
createSubtitlePrefetchService,
} from './subtitle-prefetch';
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
function makeCues(count: number, startOffset = 0): SubtitleCue[] {
return Array.from({ length: count }, (_, i) => ({
startTime: startOffset + i * 5,
endTime: startOffset + i * 5 + 4,
text: `line-${i}`,
}));
}
test('computePriorityWindow returns next N cues from current position', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 12.0, 5);
assert.equal(window.length, 5);
// Position 12.0 is during cue index 2 (start=10, end=14). Priority window starts from index 3.
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[4]!.text, 'line-7');
});
test('computePriorityWindow clamps to remaining cues at end of file', () => {
const cues = makeCues(5);
const window = computePriorityWindow(cues, 18.0, 10);
// Position 18.0 is during cue 3 (start=15). Only cue 4 is ahead.
assert.equal(window.length, 1);
assert.equal(window[0]!.text, 'line-4');
});
test('computePriorityWindow returns empty when past all cues', () => {
const cues = makeCues(3);
const window = computePriorityWindow(cues, 999.0, 10);
assert.equal(window.length, 0);
});
test('computePriorityWindow at position 0 returns first N cues', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 0, 5);
assert.equal(window.length, 5);
assert.equal(window[0]!.text, 'line-0');
});
function flushMicrotasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
}
test('prefetch service tokenizes priority window cues and caches them', async () => {
const cues = makeCues(20);
const cached: Map<string, SubtitleData> = new Map();
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: (text, data) => {
cached.set(text, data);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Allow all async tokenization to complete
for (let i = 0; i < 25; i += 1) {
await flushMicrotasks();
}
service.stop();
// Priority window (first 3) should be cached
assert.ok(cached.has('line-0'));
assert.ok(cached.has('line-1'));
assert.ok(cached.has('line-2'));
});
test('prefetch service stops when cache is full', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
let cacheSize = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {
cacheSize += 1;
},
isCacheFull: () => cacheSize >= 5,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// Should have stopped at 5 (cache full), not tokenized all 20
assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
});
test('prefetch service can be stopped mid-flight', async () => {
const cues = makeCues(100);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.stop();
const callsAtStop = tokenizeCalls;
// Wait more to confirm no further calls
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
});

View File

@@ -0,0 +1,152 @@
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
export interface SubtitlePrefetchServiceDeps {
cues: SubtitleCue[];
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
preCacheTokenization: (text: string, data: SubtitleData) => void;
isCacheFull: () => boolean;
priorityWindowSize?: number;
}
export interface SubtitlePrefetchService {
start: (currentTimeSeconds: number) => void;
stop: () => void;
onSeek: (newTimeSeconds: number) => void;
pause: () => void;
resume: () => void;
}
const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
export function computePriorityWindow(
cues: SubtitleCue[],
currentTimeSeconds: number,
windowSize: number,
): SubtitleCue[] {
if (cues.length === 0) {
return [];
}
// Find the first cue whose start time is >= current position.
// This includes cues that start exactly at the current time (they haven't
// been displayed yet and should be prefetched).
let startIndex = -1;
for (let i = 0; i < cues.length; i += 1) {
if (cues[i]!.startTime >= currentTimeSeconds) {
startIndex = i;
break;
}
}
if (startIndex < 0) {
// All cues are before current time
return [];
}
return cues.slice(startIndex, startIndex + windowSize);
}
export function createSubtitlePrefetchService(
deps: SubtitlePrefetchServiceDeps,
): SubtitlePrefetchService {
const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
let stopped = true;
let paused = false;
let currentRunId = 0;
async function tokenizeCueList(
cuesToProcess: SubtitleCue[],
runId: number,
): Promise<void> {
for (const cue of cuesToProcess) {
if (stopped || runId !== currentRunId) {
return;
}
// Wait while paused
while (paused && !stopped && runId === currentRunId) {
await new Promise((resolve) => setTimeout(resolve, 10));
}
if (stopped || runId !== currentRunId) {
return;
}
if (deps.isCacheFull()) {
return;
}
try {
const result = await deps.tokenizeSubtitle(cue.text);
if (result && !stopped && runId === currentRunId) {
deps.preCacheTokenization(cue.text, result);
}
} catch {
// Skip failed cues, continue prefetching
}
// Yield to allow live processing to take priority
await new Promise((resolve) => setTimeout(resolve, 0));
}
}
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
const cues = deps.cues;
// Phase 1: Priority window
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
await tokenizeCueList(priorityCues, runId);
if (stopped || runId !== currentRunId) {
return;
}
// Phase 2: Background - remaining cues forward from current position
const priorityTexts = new Set(priorityCues.map((c) => c.text));
const remainingCues = cues.filter(
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(remainingCues, runId);
if (stopped || runId !== currentRunId) {
return;
}
// Phase 3: Background - earlier cues (for rewind support)
const earlierCues = cues.filter(
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(earlierCues, runId);
}
return {
start(currentTimeSeconds: number) {
stopped = false;
paused = false;
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(currentTimeSeconds, runId);
},
stop() {
stopped = true;
currentRunId += 1;
},
onSeek(newTimeSeconds: number) {
// Cancel current run and restart from new position
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(newTimeSeconds, runId);
},
pause() {
paused = true;
},
resume() {
paused = false;
},
};
}