feat: add subtitle prefetch service with priority window

Implements background tokenization of upcoming subtitle cues with a configurable priority window. Supports stop, pause/resume, seek re-prioritization, and cache-full stopping condition.
2026-03-20 12:11:28 -07:00 · 2026-03-15 13:04:26 -07:00
parent 6cf0272e7e
commit f89aec31e8
2 changed files with 294 additions and 0 deletions
--- a/src/core/services/subtitle-prefetch.test.ts
+++ b/src/core/services/subtitle-prefetch.test.ts
@@ -0,0 +1,142 @@
+import assert from 'node:assert/strict';
+import test from 'node:test';
+import {
+  computePriorityWindow,
+  createSubtitlePrefetchService,
+} from './subtitle-prefetch';
+import type { SubtitleCue } from './subtitle-cue-parser';
+import type { SubtitleData } from '../../types';
+
+function makeCues(count: number, startOffset = 0): SubtitleCue[] {
+  return Array.from({ length: count }, (_, i) => ({
+    startTime: startOffset + i * 5,
+    endTime: startOffset + i * 5 + 4,
+    text: `line-${i}`,
+  }));
+}
+
+test('computePriorityWindow returns next N cues from current position', () => {
+  const cues = makeCues(20);
+  const window = computePriorityWindow(cues, 12.0, 5);
+
+  assert.equal(window.length, 5);
+  // Position 12.0 is during cue index 2 (start=10, end=14). Priority window starts from index 3.
+  assert.equal(window[0]!.text, 'line-3');
+  assert.equal(window[4]!.text, 'line-7');
+});
+
+test('computePriorityWindow clamps to remaining cues at end of file', () => {
+  const cues = makeCues(5);
+  const window = computePriorityWindow(cues, 18.0, 10);
+
+  // Position 18.0 is during cue 3 (start=15). Only cue 4 is ahead.
+  assert.equal(window.length, 1);
+  assert.equal(window[0]!.text, 'line-4');
+});
+
+test('computePriorityWindow returns empty when past all cues', () => {
+  const cues = makeCues(3);
+  const window = computePriorityWindow(cues, 999.0, 10);
+  assert.equal(window.length, 0);
+});
+
+test('computePriorityWindow at position 0 returns first N cues', () => {
+  const cues = makeCues(20);
+  const window = computePriorityWindow(cues, 0, 5);
+
+  assert.equal(window.length, 5);
+  assert.equal(window[0]!.text, 'line-0');
+});
+
+function flushMicrotasks(): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, 0));
+}
+
+test('prefetch service tokenizes priority window cues and caches them', async () => {
+  const cues = makeCues(20);
+  const cached: Map<string, SubtitleData> = new Map();
+  let tokenizeCalls = 0;
+
+  const service = createSubtitlePrefetchService({
+    cues,
+    tokenizeSubtitle: async (text) => {
+      tokenizeCalls += 1;
+      return { text, tokens: [] };
+    },
+    preCacheTokenization: (text, data) => {
+      cached.set(text, data);
+    },
+    isCacheFull: () => false,
+    priorityWindowSize: 3,
+  });
+
+  service.start(0);
+  // Allow all async tokenization to complete
+  for (let i = 0; i < 25; i += 1) {
+    await flushMicrotasks();
+  }
+  service.stop();
+
+  // Priority window (first 3) should be cached
+  assert.ok(cached.has('line-0'));
+  assert.ok(cached.has('line-1'));
+  assert.ok(cached.has('line-2'));
+});
+
+test('prefetch service stops when cache is full', async () => {
+  const cues = makeCues(20);
+  let tokenizeCalls = 0;
+  let cacheSize = 0;
+
+  const service = createSubtitlePrefetchService({
+    cues,
+    tokenizeSubtitle: async (text) => {
+      tokenizeCalls += 1;
+      return { text, tokens: [] };
+    },
+    preCacheTokenization: () => {
+      cacheSize += 1;
+    },
+    isCacheFull: () => cacheSize >= 5,
+    priorityWindowSize: 3,
+  });
+
+  service.start(0);
+  for (let i = 0; i < 30; i += 1) {
+    await flushMicrotasks();
+  }
+  service.stop();
+
+  // Should have stopped at 5 (cache full), not tokenized all 20
+  assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
+});
+
+test('prefetch service can be stopped mid-flight', async () => {
+  const cues = makeCues(100);
+  let tokenizeCalls = 0;
+
+  const service = createSubtitlePrefetchService({
+    cues,
+    tokenizeSubtitle: async (text) => {
+      tokenizeCalls += 1;
+      return { text, tokens: [] };
+    },
+    preCacheTokenization: () => {},
+    isCacheFull: () => false,
+    priorityWindowSize: 3,
+  });
+
+  service.start(0);
+  await flushMicrotasks();
+  await flushMicrotasks();
+  service.stop();
+  const callsAtStop = tokenizeCalls;
+
+  // Wait more to confirm no further calls
+  for (let i = 0; i < 10; i += 1) {
+    await flushMicrotasks();
+  }
+
+  assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
+  assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
+});
--- a/src/core/services/subtitle-prefetch.ts
+++ b/src/core/services/subtitle-prefetch.ts
@@ -0,0 +1,152 @@
+import type { SubtitleCue } from './subtitle-cue-parser';
+import type { SubtitleData } from '../../types';
+
+export interface SubtitlePrefetchServiceDeps {
+  cues: SubtitleCue[];
+  tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
+  preCacheTokenization: (text: string, data: SubtitleData) => void;
+  isCacheFull: () => boolean;
+  priorityWindowSize?: number;
+}
+
+export interface SubtitlePrefetchService {
+  start: (currentTimeSeconds: number) => void;
+  stop: () => void;
+  onSeek: (newTimeSeconds: number) => void;
+  pause: () => void;
+  resume: () => void;
+}
+
+const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
+
+export function computePriorityWindow(
+  cues: SubtitleCue[],
+  currentTimeSeconds: number,
+  windowSize: number,
+): SubtitleCue[] {
+  if (cues.length === 0) {
+    return [];
+  }
+
+  // Find the first cue whose start time is >= current position.
+  // This includes cues that start exactly at the current time (they haven't
+  // been displayed yet and should be prefetched).
+  let startIndex = -1;
+  for (let i = 0; i < cues.length; i += 1) {
+    if (cues[i]!.startTime >= currentTimeSeconds) {
+      startIndex = i;
+      break;
+    }
+  }
+
+  if (startIndex < 0) {
+    // All cues are before current time
+    return [];
+  }
+
+  return cues.slice(startIndex, startIndex + windowSize);
+}
+
+export function createSubtitlePrefetchService(
+  deps: SubtitlePrefetchServiceDeps,
+): SubtitlePrefetchService {
+  const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
+  let stopped = true;
+  let paused = false;
+  let currentRunId = 0;
+
+  async function tokenizeCueList(
+    cuesToProcess: SubtitleCue[],
+    runId: number,
+  ): Promise<void> {
+    for (const cue of cuesToProcess) {
+      if (stopped || runId !== currentRunId) {
+        return;
+      }
+
+      // Wait while paused
+      while (paused && !stopped && runId === currentRunId) {
+        await new Promise((resolve) => setTimeout(resolve, 10));
+      }
+
+      if (stopped || runId !== currentRunId) {
+        return;
+      }
+
+      if (deps.isCacheFull()) {
+        return;
+      }
+
+      try {
+        const result = await deps.tokenizeSubtitle(cue.text);
+        if (result && !stopped && runId === currentRunId) {
+          deps.preCacheTokenization(cue.text, result);
+        }
+      } catch {
+        // Skip failed cues, continue prefetching
+      }
+
+      // Yield to allow live processing to take priority
+      await new Promise((resolve) => setTimeout(resolve, 0));
+    }
+  }
+
+  async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
+    const cues = deps.cues;
+
+    // Phase 1: Priority window
+    const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
+    await tokenizeCueList(priorityCues, runId);
+
+    if (stopped || runId !== currentRunId) {
+      return;
+    }
+
+    // Phase 2: Background - remaining cues forward from current position
+    const priorityTexts = new Set(priorityCues.map((c) => c.text));
+    const remainingCues = cues.filter(
+      (cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
+    );
+    await tokenizeCueList(remainingCues, runId);
+
+    if (stopped || runId !== currentRunId) {
+      return;
+    }
+
+    // Phase 3: Background - earlier cues (for rewind support)
+    const earlierCues = cues.filter(
+      (cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
+    );
+    await tokenizeCueList(earlierCues, runId);
+  }
+
+  return {
+    start(currentTimeSeconds: number) {
+      stopped = false;
+      paused = false;
+      currentRunId += 1;
+      const runId = currentRunId;
+      void startPrefetching(currentTimeSeconds, runId);
+    },
+
+    stop() {
+      stopped = true;
+      currentRunId += 1;
+    },
+
+    onSeek(newTimeSeconds: number) {
+      // Cancel current run and restart from new position
+      currentRunId += 1;
+      const runId = currentRunId;
+      void startPrefetching(newTimeSeconds, runId);
+    },
+
+    pause() {
+      paused = true;
+    },
+
+    resume() {
+      paused = false;
+    },
+  };
+}