Feature/renderer performance (#24)

This commit is contained in:
2026-03-15 17:21:57 -07:00
parent 46fbea902a
commit 650e95cdc3
28 changed files with 3435 additions and 107 deletions

View File

@@ -63,6 +63,8 @@ const MPV_SUBTITLE_PROPERTY_OBSERVATIONS: string[] = [
'media-title',
'secondary-sub-visibility',
'sub-visibility',
'sid',
'track-list',
];
const MPV_INITIAL_PROPERTY_REQUESTS: Array<MpvProtocolCommand> = [

View File

@@ -60,6 +60,8 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
emitSubtitleAssChange: (payload) => state.events.push(payload),
emitSubtitleTiming: (payload) => state.events.push(payload),
emitSecondarySubtitleChange: (payload) => state.events.push(payload),
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
getCurrentSubText: () => state.subText,
setCurrentSubText: (text) => {
state.subText = text;
@@ -120,6 +122,24 @@ test('dispatchMpvProtocolMessage emits subtitle text on property change', async
assert.deepEqual(state.events, [{ text: '字幕', isOverlayVisible: false }]);
});
test('dispatchMpvProtocolMessage emits subtitle track changes', async () => {
const { deps, state } = createDeps({
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
});
await dispatchMpvProtocolMessage(
{ event: 'property-change', name: 'sid', data: '3' },
deps,
);
await dispatchMpvProtocolMessage(
{ event: 'property-change', name: 'track-list', data: [{ type: 'sub', id: 3 }] },
deps,
);
assert.deepEqual(state.events, [{ sid: 3 }, { trackList: [{ type: 'sub', id: 3 }] }]);
});
test('dispatchMpvProtocolMessage enforces sub-visibility hidden when overlay suppression is enabled', async () => {
const { deps, state } = createDeps({
isVisibleOverlayVisible: () => true,

View File

@@ -52,6 +52,8 @@ export interface MpvProtocolHandleMessageDeps {
emitSubtitleAssChange: (payload: { text: string }) => void;
emitSubtitleTiming: (payload: { text: string; start: number; end: number }) => void;
emitSecondarySubtitleChange: (payload: { text: string }) => void;
emitSubtitleTrackChange: (payload: { sid: number | null }) => void;
emitSubtitleTrackListChange: (payload: { trackList: unknown[] | null }) => void;
getCurrentSubText: () => string;
setCurrentSubText: (text: string) => void;
setCurrentSubStart: (value: number) => void;
@@ -160,6 +162,18 @@ export async function dispatchMpvProtocolMessage(
const nextSubText = (msg.data as string) || '';
deps.setCurrentSecondarySubText(nextSubText);
deps.emitSecondarySubtitleChange({ text: nextSubText });
} else if (msg.name === 'sid') {
const sid =
typeof msg.data === 'number'
? msg.data
: typeof msg.data === 'string'
? Number(msg.data)
: null;
deps.emitSubtitleTrackChange({ sid: sid !== null && Number.isFinite(sid) ? sid : null });
} else if (msg.name === 'track-list') {
deps.emitSubtitleTrackListChange({
trackList: Array.isArray(msg.data) ? (msg.data as unknown[]) : null,
});
} else if (msg.name === 'aid') {
deps.setCurrentAudioTrackId(typeof msg.data === 'number' ? (msg.data as number) : null);
deps.syncCurrentAudioStreamIndex();

View File

@@ -118,6 +118,8 @@ export interface MpvIpcClientEventMap {
'duration-change': { duration: number };
'pause-change': { paused: boolean };
'secondary-subtitle-change': { text: string };
'subtitle-track-change': { sid: number | null };
'subtitle-track-list-change': { trackList: unknown[] | null };
'media-path-change': { path: string };
'media-title-change': { title: string | null };
'subtitle-metrics-change': { patch: Partial<MpvSubtitleRenderMetrics> };
@@ -325,6 +327,12 @@ export class MpvIpcClient implements MpvClient {
emitSecondarySubtitleChange: (payload) => {
this.emit('secondary-subtitle-change', payload);
},
emitSubtitleTrackChange: (payload) => {
this.emit('subtitle-track-change', payload);
},
emitSubtitleTrackListChange: (payload) => {
this.emit('subtitle-track-list-change', payload);
},
getCurrentSubText: () => this.currentSubText,
setCurrentSubText: (text: string) => {
this.currentSubText = text;

View File

@@ -0,0 +1,274 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { parseSrtCues, parseAssCues, parseSubtitleCues } from './subtitle-cue-parser';
import type { SubtitleCue } from './subtitle-cue-parser';
test('parseSrtCues parses basic SRT content', () => {
const content = [
'1',
'00:00:01,000 --> 00:00:04,000',
'こんにちは',
'',
'2',
'00:00:05,000 --> 00:00:08,500',
'元気ですか',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseSrtCues handles multi-line subtitle text', () => {
const content = [
'1',
'00:01:00,000 --> 00:01:05,000',
'これは',
'テストです',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは\nテストです');
});
test('parseSrtCues handles hours in timestamps', () => {
const content = [
'1',
'01:30:00,000 --> 01:30:05,000',
'テスト',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseSrtCues handles VTT-style dot separator', () => {
const content = [
'1',
'00:00:01.000 --> 00:00:04.000',
'VTTスタイル',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
});
test('parseSrtCues returns empty array for empty content', () => {
assert.deepEqual(parseSrtCues(''), []);
assert.deepEqual(parseSrtCues(' \n\n '), []);
});
test('parseSrtCues skips malformed timing lines gracefully', () => {
const content = [
'1',
'NOT A TIMING LINE',
'テスト',
'',
'2',
'00:00:01,000 --> 00:00:02,000',
'有効',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, '有効');
});
test('parseAssCues parses basic ASS dialogue lines', () => {
const content = [
'[Script Info]',
'Title: Test',
'',
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,こんにちは',
'Dialogue: 0,0:00:05.00,0:00:08.50,Default,,0,0,0,,元気ですか',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseAssCues strips override tags from text', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\b1}太字{\\b0}テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '太字テスト');
});
test('parseAssCues handles text containing commas', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,はい、そうです、ね',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, 'はい、そうです、ね');
});
test('parseAssCues handles \\N line breaks', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,一行目\\N二行目',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '一行目\\N二行目');
});
test('parseAssCues returns empty for content without Events section', () => {
const content = [
'[Script Info]',
'Title: Test',
].join('\n');
assert.deepEqual(parseAssCues(content), []);
});
test('parseAssCues skips Comment lines', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Comment: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,これはコメント',
'Dialogue: 0,0:00:05.00,0:00:08.00,Default,,0,0,0,,これは字幕',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは字幕');
});
test('parseAssCues handles hour timestamps', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,1:30:00.00,1:30:05.00,Default,,0,0,0,,テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseAssCues respects dynamic field ordering from the Format row', () => {
const content = [
'[Events]',
'Format: Layer, Style, Start, End, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,Default,0:00:01.00,0:00:04.00,,0,0,0,,順番が違う',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, '順番が違う');
});
test('parseSubtitleCues auto-detects SRT format', () => {
const content = [
'1',
'00:00:01,000 --> 00:00:04,000',
'SRTテスト',
'',
].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'SRTテスト');
});
test('parseSubtitleCues auto-detects ASS format', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,ASSテスト',
].join('\n');
const cues = parseSubtitleCues(content, 'test.ass');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'ASSテスト');
});
test('parseSubtitleCues auto-detects VTT format', () => {
const content = [
'1',
'00:00:01.000 --> 00:00:04.000',
'VTTテスト',
'',
].join('\n');
const cues = parseSubtitleCues(content, 'test.vtt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'VTTテスト');
});
test('parseSubtitleCues returns empty for unknown format', () => {
assert.deepEqual(parseSubtitleCues('random content', 'test.xyz'), []);
});
test('parseSubtitleCues returns cues sorted by start time', () => {
const content = [
'1',
'00:00:10,000 --> 00:00:14,000',
'二番目',
'',
'2',
'00:00:01,000 --> 00:00:04,000',
'一番目',
'',
].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues[0]!.text, '一番目');
assert.equal(cues[1]!.text, '二番目');
});
test('parseSubtitleCues detects subtitle formats from remote URLs', () => {
const assContent = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,URLテスト',
].join('\n');
const cues = parseSubtitleCues(assContent, 'https://host/subs.ass?lang=ja#track');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'URLテスト');
});

View File

@@ -0,0 +1,180 @@
export interface SubtitleCue {
startTime: number;
endTime: number;
text: string;
}
const SRT_TIMING_PATTERN =
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
function parseTimestamp(
hours: string | undefined,
minutes: string,
seconds: string,
millis: string,
): number {
return (
Number(hours || 0) * 3600 +
Number(minutes) * 60 +
Number(seconds) +
Number(millis.padEnd(3, '0')) / 1000
);
}
export function parseSrtCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let i = 0;
while (i < lines.length) {
const line = lines[i]!;
const timingMatch = SRT_TIMING_PATTERN.exec(line);
if (!timingMatch) {
i += 1;
continue;
}
const startTime = parseTimestamp(timingMatch[1], timingMatch[2]!, timingMatch[3]!, timingMatch[4]!);
const endTime = parseTimestamp(timingMatch[5], timingMatch[6]!, timingMatch[7]!, timingMatch[8]!);
i += 1;
const textLines: string[] = [];
while (i < lines.length && lines[i]!.trim() !== '') {
textLines.push(lines[i]!);
i += 1;
}
const text = textLines.join('\n').trim();
if (text) {
cues.push({ startTime, endTime, text });
}
}
return cues;
}
const ASS_OVERRIDE_TAG_PATTERN = /\{[^}]*\}/g;
const ASS_TIMING_PATTERN = /^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/;
const ASS_FORMAT_PREFIX = 'Format:';
const ASS_DIALOGUE_PREFIX = 'Dialogue:';
function parseAssTimestamp(raw: string): number | null {
const match = ASS_TIMING_PATTERN.exec(raw.trim());
if (!match) {
return null;
}
const hours = Number(match[1]);
const minutes = Number(match[2]);
const seconds = Number(match[3]);
const centiseconds = Number(match[4]!.padEnd(2, '0'));
return hours * 3600 + minutes * 60 + seconds + centiseconds / 100;
}
export function parseAssCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let inEventsSection = false;
let startFieldIndex = -1;
let endFieldIndex = -1;
let textFieldIndex = -1;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
inEventsSection = trimmed.toLowerCase() === '[events]';
if (!inEventsSection) {
startFieldIndex = -1;
endFieldIndex = -1;
textFieldIndex = -1;
}
continue;
}
if (!inEventsSection) {
continue;
}
if (trimmed.startsWith(ASS_FORMAT_PREFIX)) {
const formatFields = trimmed
.slice(ASS_FORMAT_PREFIX.length)
.split(',')
.map((field) => field.trim().toLowerCase());
startFieldIndex = formatFields.indexOf('start');
endFieldIndex = formatFields.indexOf('end');
textFieldIndex = formatFields.indexOf('text');
continue;
}
if (!trimmed.startsWith(ASS_DIALOGUE_PREFIX)) {
continue;
}
if (startFieldIndex < 0 || endFieldIndex < 0 || textFieldIndex < 0) {
continue;
}
const fields = trimmed.slice(ASS_DIALOGUE_PREFIX.length).split(',');
if (
startFieldIndex >= fields.length ||
endFieldIndex >= fields.length ||
textFieldIndex >= fields.length
) {
continue;
}
const startTime = parseAssTimestamp(fields[startFieldIndex]!);
const endTime = parseAssTimestamp(fields[endFieldIndex]!);
if (startTime === null || endTime === null) {
continue;
}
const rawText = fields
.slice(textFieldIndex)
.join(',')
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
.trim();
if (rawText) {
cues.push({ startTime, endTime, text: rawText });
}
}
return cues;
}
function detectSubtitleFormat(source: string): 'srt' | 'vtt' | 'ass' | 'ssa' | null {
const [normalizedSource = source] = (() => {
try {
return /^[a-z]+:\/\//i.test(source) ? new URL(source).pathname : source;
} catch {
return source;
}
})().split(/[?#]/, 1)[0] ?? '';
const ext = normalizedSource.split('.').pop()?.toLowerCase() ?? '';
if (ext === 'srt') return 'srt';
if (ext === 'vtt') return 'vtt';
if (ext === 'ass' || ext === 'ssa') return 'ass';
return null;
}
export function parseSubtitleCues(content: string, filename: string): SubtitleCue[] {
const format = detectSubtitleFormat(filename);
let cues: SubtitleCue[];
switch (format) {
case 'srt':
case 'vtt':
cues = parseSrtCues(content);
break;
case 'ass':
case 'ssa':
cues = parseAssCues(content);
break;
default:
return [];
}
cues.sort((a, b) => a.startTime - b.startTime);
return cues;
}

View File

@@ -0,0 +1,234 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
computePriorityWindow,
createSubtitlePrefetchService,
} from './subtitle-prefetch';
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
function makeCues(count: number, startOffset = 0): SubtitleCue[] {
return Array.from({ length: count }, (_, i) => ({
startTime: startOffset + i * 5,
endTime: startOffset + i * 5 + 4,
text: `line-${i}`,
}));
}
test('computePriorityWindow returns next N cues from current position', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 12.0, 5);
assert.equal(window.length, 5);
// Position 12.0 falls during cue 2, so the window starts at cue 3 (startTime >= 12.0).
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[4]!.text, 'line-7');
});
test('computePriorityWindow clamps to remaining cues at end of file', () => {
const cues = makeCues(5);
const window = computePriorityWindow(cues, 18.0, 10);
// Position 18.0 is during cue 3 (start=15). Only cue 4 is ahead.
assert.equal(window.length, 1);
assert.equal(window[0]!.text, 'line-4');
});
test('computePriorityWindow returns empty when past all cues', () => {
const cues = makeCues(3);
const window = computePriorityWindow(cues, 999.0, 10);
assert.equal(window.length, 0);
});
test('computePriorityWindow at position 0 returns first N cues', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 0, 5);
assert.equal(window.length, 5);
assert.equal(window[0]!.text, 'line-0');
});
function flushMicrotasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
}
test('prefetch service tokenizes priority window cues and caches them', async () => {
const cues = makeCues(20);
const cached: Map<string, SubtitleData> = new Map();
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: (text, data) => {
cached.set(text, data);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Allow all async tokenization to complete
for (let i = 0; i < 25; i += 1) {
await flushMicrotasks();
}
service.stop();
// Priority window (first 3) should be cached
assert.ok(cached.has('line-0'));
assert.ok(cached.has('line-1'));
assert.ok(cached.has('line-2'));
});
test('prefetch service stops when cache is full', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
let cacheSize = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {
cacheSize += 1;
},
isCacheFull: () => cacheSize >= 5,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// Should have stopped at 5 (cache full), not tokenized all 20
assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
});
test('prefetch service can be stopped mid-flight', async () => {
const cues = makeCues(100);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.stop();
const callsAtStop = tokenizeCalls;
// Wait more to confirm no further calls
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
});
test('prefetch service onSeek re-prioritizes from new position', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Let a few cues process
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Seek to near the end
service.onSeek(80.0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// After seek to 80.0, cues starting after 80.0 (line-17, line-18, line-19) should appear in cached
const hasPostSeekCue = cachedTexts.some((t) => t === 'line-17' || t === 'line-18' || t === 'line-19');
assert.ok(hasPostSeekCue, 'Should have cached cues after seek position');
});
test('prefetch service still warms the priority window when cache is full', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => true,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.deepEqual(cachedTexts.slice(0, 3), ['line-0', 'line-1', 'line-2']);
});
test('prefetch service pause/resume halts and continues tokenization', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.pause();
const callsWhenPaused = tokenizeCalls;
// Wait while paused
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Should not have advanced much (may have 1 in-flight)
assert.ok(tokenizeCalls <= callsWhenPaused + 1, 'Should not tokenize much while paused');
service.resume();
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.ok(tokenizeCalls > callsWhenPaused + 1, 'Should resume tokenizing after unpause');
});

View File

@@ -0,0 +1,153 @@
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
export interface SubtitlePrefetchServiceDeps {
cues: SubtitleCue[];
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
preCacheTokenization: (text: string, data: SubtitleData) => void;
isCacheFull: () => boolean;
priorityWindowSize?: number;
}
export interface SubtitlePrefetchService {
start: (currentTimeSeconds: number) => void;
stop: () => void;
onSeek: (newTimeSeconds: number) => void;
pause: () => void;
resume: () => void;
}
const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
export function computePriorityWindow(
cues: SubtitleCue[],
currentTimeSeconds: number,
windowSize: number,
): SubtitleCue[] {
if (cues.length === 0) {
return [];
}
// Find the first cue whose start time is >= current position.
// This includes cues that start exactly at the current time (they haven't
// been displayed yet and should be prefetched).
let startIndex = -1;
for (let i = 0; i < cues.length; i += 1) {
if (cues[i]!.startTime >= currentTimeSeconds) {
startIndex = i;
break;
}
}
if (startIndex < 0) {
// All cues are before current time
return [];
}
return cues.slice(startIndex, startIndex + windowSize);
}
export function createSubtitlePrefetchService(
deps: SubtitlePrefetchServiceDeps,
): SubtitlePrefetchService {
const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
let stopped = true;
let paused = false;
let currentRunId = 0;
async function tokenizeCueList(
cuesToProcess: SubtitleCue[],
runId: number,
options: { allowWhenCacheFull?: boolean } = {},
): Promise<void> {
for (const cue of cuesToProcess) {
if (stopped || runId !== currentRunId) {
return;
}
// Wait while paused
while (paused && !stopped && runId === currentRunId) {
await new Promise((resolve) => setTimeout(resolve, 10));
}
if (stopped || runId !== currentRunId) {
return;
}
if (!options.allowWhenCacheFull && deps.isCacheFull()) {
return;
}
try {
const result = await deps.tokenizeSubtitle(cue.text);
if (result && !stopped && runId === currentRunId) {
deps.preCacheTokenization(cue.text, result);
}
} catch {
// Skip failed cues, continue prefetching
}
// Yield to allow live processing to take priority
await new Promise((resolve) => setTimeout(resolve, 0));
}
}
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
const cues = deps.cues;
// Phase 1: Priority window
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
await tokenizeCueList(priorityCues, runId, { allowWhenCacheFull: true });
if (stopped || runId !== currentRunId) {
return;
}
// Phase 2: Background - remaining cues forward from current position
const priorityTexts = new Set(priorityCues.map((c) => c.text));
const remainingCues = cues.filter(
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(remainingCues, runId);
if (stopped || runId !== currentRunId) {
return;
}
// Phase 3: Background - earlier cues (for rewind support)
const earlierCues = cues.filter(
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(earlierCues, runId);
}
return {
start(currentTimeSeconds: number) {
stopped = false;
paused = false;
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(currentTimeSeconds, runId);
},
stop() {
stopped = true;
currentRunId += 1;
},
onSeek(newTimeSeconds: number) {
// Cancel current run and restart from new position
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(newTimeSeconds, runId);
},
pause() {
paused = true;
},
resume() {
paused = false;
},
};
}

View File

@@ -170,3 +170,45 @@ test('subtitle processing cache invalidation only affects future subtitle events
assert.equal(callsByText.get('same'), 2);
});
test('preCacheTokenization stores entry that is returned on next subtitle change', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('予め', { text: '予め', tokens: [] });
controller.onSubtitleChange('予め');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'should not call tokenize when pre-cached');
assert.deepEqual(emitted, [{ text: '予め', tokens: [] }]);
});
test('isCacheFull returns false when cache is below limit', () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: null }),
emitSubtitle: () => {},
});
assert.equal(controller.isCacheFull(), false);
});
test('isCacheFull returns true when cache reaches limit', async () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
emitSubtitle: () => {},
});
// Fill cache to the 256 limit
for (let i = 0; i < 256; i += 1) {
controller.preCacheTokenization(`line-${i}`, { text: `line-${i}`, tokens: [] });
}
assert.equal(controller.isCacheFull(), true);
});

View File

@@ -11,6 +11,8 @@ export interface SubtitleProcessingController {
onSubtitleChange: (text: string) => void;
refreshCurrentSubtitle: (textOverride?: string) => void;
invalidateTokenizationCache: () => void;
preCacheTokenization: (text: string, data: SubtitleData) => void;
isCacheFull: () => boolean;
}
export function createSubtitleProcessingController(
@@ -130,5 +132,11 @@ export function createSubtitleProcessingController(
invalidateTokenizationCache: () => {
tokenizationCache.clear();
},
preCacheTokenization: (text: string, data: SubtitleData) => {
setCachedTokenization(text, data);
},
isCacheFull: () => {
return tokenizationCache.size >= SUBTITLE_TOKENIZATION_CACHE_LIMIT;
},
};
}

View File

@@ -2707,6 +2707,63 @@ test('tokenizeSubtitle excludes merged function/content token from frequency hig
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, true);
});
test('tokenizeSubtitle keeps frequency for content-led merged token with trailing colloquial suffixes', async () => {
const result = await tokenizeSubtitle(
'張り切ってんじゃ',
makeDepsFromYomitanTokens([{ surface: '張り切ってん', reading: 'はき', headword: '張り切る' }], {
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: (text) => (text === '張り切る' ? 5468 : null),
tokenizeWithMecab: async () => [
{
headword: '張り切る',
surface: '張り切っ',
reading: 'ハリキッ',
startPos: 0,
endPos: 4,
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'て',
surface: 'て',
reading: 'テ',
startPos: 4,
endPos: 5,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
pos2: '接続助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'んじゃ',
surface: 'んじゃ',
reading: 'ンジャ',
startPos: 5,
endPos: 8,
partOfSpeech: PartOfSpeech.other,
pos1: '接続詞',
pos2: '*',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
getMinSentenceWordsForNPlusOne: () => 1,
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.surface, '張り切ってん');
assert.equal(result.tokens?.[0]?.pos1, '動詞|助詞|接続詞');
assert.equal(result.tokens?.[0]?.frequencyRank, 5468);
});
test('tokenizeSubtitle excludes default non-independent pos2 from N+1 when JLPT/frequency are disabled', async () => {
let mecabCalls = 0;
const result = await tokenizeSubtitle(

View File

@@ -43,33 +43,24 @@ function resolveKnownWordText(
return matchMode === 'surface' ? surface : headword;
}
function applyKnownWordMarking(
tokens: MergedToken[],
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): MergedToken[] {
return tokens.map((token) => {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return {
...token,
isKnown: token.isKnown || (matchText ? isKnownWord(matchText) : false),
};
});
}
function normalizePos1Tag(pos1: string | undefined): string {
return typeof pos1 === 'string' ? pos1.trim() : '';
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
return false;
return [];
}
const parts = normalizedTag
return normalizedTag
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const parts = splitNormalizedTagParts(normalizedTag);
if (parts.length === 0) {
return false;
}
@@ -98,6 +89,44 @@ function normalizePos2Tag(pos2: string | undefined): string {
return typeof pos2 === 'string' ? pos2.trim() : '';
}
function isExcludedComponent(
pos1: string | undefined,
pos2: string | undefined,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
return (
(typeof pos1 === 'string' && pos1Exclusions.has(pos1)) ||
(typeof pos2 === 'string' && pos2Exclusions.has(pos2))
);
}
function shouldAllowContentLedMergedTokenFrequency(
normalizedPos1: string,
normalizedPos2: string,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
const pos1Parts = splitNormalizedTagParts(normalizedPos1);
if (pos1Parts.length < 2) {
return false;
}
const pos2Parts = splitNormalizedTagParts(normalizedPos2);
if (isExcludedComponent(pos1Parts[0], pos2Parts[0], pos1Exclusions, pos2Exclusions)) {
return false;
}
const componentCount = Math.max(pos1Parts.length, pos2Parts.length);
for (let index = 1; index < componentCount; index += 1) {
if (!isExcludedComponent(pos1Parts[index], pos2Parts[index], pos1Exclusions, pos2Exclusions)) {
return false;
}
}
return true;
}
function isFrequencyExcludedByPos(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
@@ -109,13 +138,20 @@ function isFrequencyExcludedByPos(
const normalizedPos1 = normalizePos1Tag(token.pos1);
const hasPos1 = normalizedPos1.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
const allowContentLedMergedToken = shouldAllowContentLedMergedTokenFrequency(
normalizedPos1,
normalizedPos2,
pos1Exclusions,
pos2Exclusions,
);
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions) && !allowContentLedMergedToken) {
return true;
}
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions) && !allowContentLedMergedToken) {
return true;
}
@@ -144,27 +180,6 @@ export function shouldExcludeTokenFromVocabularyPersistence(
);
}
function applyFrequencyMarking(
tokens: MergedToken[],
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): MergedToken[] {
return tokens.map((token) => {
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
return { ...token, frequencyRank: undefined };
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
const rank = Math.max(1, Math.floor(token.frequencyRank));
return { ...token, frequencyRank: rank };
}
return {
...token,
frequencyRank: undefined,
};
});
}
function getCachedJlptLevel(
lookupText: string,
@@ -425,24 +440,45 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return true;
}
function applyJlptMarking(
tokens: MergedToken[],
function computeTokenKnownStatus(
token: MergedToken,
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
}
function filterTokenFrequencyRank(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): number | undefined {
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
return undefined;
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
return Math.max(1, Math.floor(token.frequencyRank));
}
return undefined;
}
function computeTokenJlptLevel(
token: MergedToken,
getJlptLevel: (text: string) => JlptLevel | null,
): MergedToken[] {
return tokens.map((token) => {
if (!isJlptEligibleToken(token)) {
return { ...token, jlptLevel: undefined };
}
): JlptLevel | undefined {
if (!isJlptEligibleToken(token)) {
return undefined;
}
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
return {
...token,
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
};
});
const level = primaryLevel ?? fallbackLevel ?? token.jlptLevel;
return level ?? undefined;
}
export function annotateTokens(
@@ -453,36 +489,34 @@ export function annotateTokens(
const pos1Exclusions = resolvePos1Exclusions(options);
const pos2Exclusions = resolvePos2Exclusions(options);
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
const knownMarkedTokens = nPlusOneEnabled
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
: tokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
const frequencyEnabled = options.frequencyEnabled !== false;
const frequencyMarkedTokens = frequencyEnabled
? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
: knownMarkedTokens.map((token) => ({
...token,
frequencyRank: undefined,
}));
const jlptEnabled = options.jlptEnabled !== false;
const jlptMarkedTokens = jlptEnabled
? applyJlptMarking(frequencyMarkedTokens, deps.getJlptLevel)
: frequencyMarkedTokens.map((token) => ({
...token,
jlptLevel: undefined,
}));
// Single pass: compute known word status, frequency filtering, and JLPT level together
const annotated = tokens.map((token) => {
const isKnown = nPlusOneEnabled
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
: false;
const frequencyRank = frequencyEnabled
? filterTokenFrequencyRank(token, pos1Exclusions, pos2Exclusions)
: undefined;
const jlptLevel = jlptEnabled
? computeTokenJlptLevel(token, deps.getJlptLevel)
: undefined;
return {
...token,
isKnown,
isNPlusOneTarget: nPlusOneEnabled ? token.isNPlusOneTarget : false,
frequencyRank,
jlptLevel,
};
});
if (!nPlusOneEnabled) {
return jlptMarkedTokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
return annotated;
}
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
@@ -494,7 +528,7 @@ export function annotateTokens(
: 3;
return markNPlusOneTargets(
jlptMarkedTokens,
annotated,
sanitizedMinSentenceWordsForNPlusOne,
pos1Exclusions,
pos2Exclusions,