fix: make tokenization warmup one-shot

This commit is contained in:
2026-03-02 01:33:09 -08:00
parent 9a91951656
commit 7161fc3513
3 changed files with 150 additions and 2 deletions

View File

@@ -4,7 +4,7 @@ title: 'Tokenization performance: disable Yomitan MeCab parser, gate local MeCab
status: Done
assignee: []
created_date: '2026-03-02 07:44'
updated_date: '2026-03-02 07:46'
updated_date: '2026-03-02 09:20'
labels: []
dependencies: []
priority: high
@@ -43,6 +43,8 @@ Implemented tokenizer latency optimizations:
- added annotation-aware MeCab initialization gating in runtime warmup flow;
- added persistent local MeCab process (default idle shutdown: 30s) with queued requests, retry-on-process-end, idle auto-shutdown, and automatic restart on new work;
- added regression tests for Yomitan parse flag, MeCab warmup gating, and persistent/idle lifecycle behavior;
- fixed tokenization warmup gate so first-use warmup completion is sticky (`tokenizationWarmupCompleted`) and sequential `tokenizeSubtitle` calls no longer re-run Yomitan/dictionary warmup path;
- added regression coverage in `src/main/runtime/composers/mpv-runtime-composer.test.ts` for sequential tokenize calls (`warmup` side effects run once);
- validated with targeted tests and `tsc --noEmit`.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -377,3 +377,142 @@ test('composeMpvRuntimeHandlers skips MeCab warmup when all POS-dependent annota
assert.deepEqual(calls, []);
});
test('composeMpvRuntimeHandlers runs tokenization warmup once across sequential tokenize calls', async () => {
let yomitanWarmupCalls = 0;
let prewarmJlptCalls = 0;
let prewarmFrequencyCalls = 0;
const tokenizeCalls: string[] = [];
const composed = composeMpvRuntimeHandlers<
{ connect: () => void; on: () => void },
{ isKnownWord: () => boolean },
{ text: string }
>({
bindMpvMainEventHandlersMainDeps: {
appState: {
initialArgs: null,
overlayRuntimeInitialized: true,
mpvClient: null,
immersionTracker: null,
subtitleTimingTracker: null,
currentSubText: '',
currentSubAssText: '',
playbackPaused: null,
previousSecondarySubVisibility: null,
},
getQuitOnDisconnectArmed: () => false,
scheduleQuitCheck: () => {},
quitApp: () => {},
reportJellyfinRemoteStopped: () => {},
syncOverlayMpvSubtitleSuppression: () => {},
maybeRunAnilistPostWatchUpdate: async () => {},
logSubtitleTimingError: () => {},
broadcastToOverlayWindows: () => {},
onSubtitleChange: () => {},
refreshDiscordPresence: () => {},
ensureImmersionTrackerInitialized: () => {},
updateCurrentMediaPath: () => {},
restoreMpvSubVisibility: () => {},
getCurrentAnilistMediaKey: () => null,
resetAnilistMediaTracking: () => {},
maybeProbeAnilistDuration: () => {},
ensureAnilistMediaGuess: () => {},
syncImmersionMediaState: () => {},
updateCurrentMediaTitle: () => {},
resetAnilistMediaGuessState: () => {},
reportJellyfinRemoteProgress: () => {},
updateSubtitleRenderMetrics: () => {},
},
mpvClientRuntimeServiceFactoryMainDeps: {
createClient: class {
connect(): void {}
on(): void {}
},
getSocketPath: () => '/tmp/mpv.sock',
getResolvedConfig: () => ({ auto_start_overlay: false }),
isAutoStartOverlayEnabled: () => false,
setOverlayVisible: () => {},
isVisibleOverlayVisible: () => false,
getReconnectTimer: () => null,
setReconnectTimer: () => {},
},
updateMpvSubtitleRenderMetricsMainDeps: {
getCurrentMetrics: () => BASE_METRICS,
setCurrentMetrics: () => {},
applyPatch: (current, patch) => ({ next: { ...current, ...patch }, changed: true }),
broadcastMetrics: () => {},
},
tokenizer: {
buildTokenizerDepsMainDeps: {
getYomitanExt: () => null,
getYomitanParserWindow: () => null,
setYomitanParserWindow: () => {},
getYomitanParserReadyPromise: () => null,
setYomitanParserReadyPromise: () => {},
getYomitanParserInitPromise: () => null,
setYomitanParserInitPromise: () => {},
isKnownWord: () => false,
recordLookup: () => {},
getKnownWordMatchMode: () => 'headword',
getNPlusOneEnabled: () => false,
getMinSentenceWordsForNPlusOne: () => 3,
getJlptLevel: () => null,
getJlptEnabled: () => false,
getFrequencyDictionaryEnabled: () => false,
getFrequencyDictionaryMatchMode: () => 'headword',
getFrequencyRank: () => null,
getYomitanGroupDebugEnabled: () => false,
getMecabTokenizer: () => null,
},
createTokenizerRuntimeDeps: () => ({ isKnownWord: () => false }),
tokenizeSubtitle: async (text) => {
tokenizeCalls.push(text);
return { text };
},
createMecabTokenizerAndCheckMainDeps: {
getMecabTokenizer: () => null,
setMecabTokenizer: () => {},
createMecabTokenizer: () => ({ id: 'mecab' }),
checkAvailability: async () => {},
},
prewarmSubtitleDictionariesMainDeps: {
ensureJlptDictionaryLookup: async () => {
prewarmJlptCalls += 1;
},
ensureFrequencyDictionaryLookup: async () => {
prewarmFrequencyCalls += 1;
},
},
},
warmups: {
launchBackgroundWarmupTaskMainDeps: {
now: () => 0,
logDebug: () => {},
logWarn: () => {},
},
startBackgroundWarmupsMainDeps: {
getStarted: () => false,
setStarted: () => {},
isTexthookerOnlyMode: () => false,
ensureYomitanExtensionLoaded: async () => {
yomitanWarmupCalls += 1;
},
shouldWarmupMecab: () => false,
shouldWarmupYomitanExtension: () => false,
shouldWarmupSubtitleDictionaries: () => false,
shouldWarmupJellyfinRemoteSession: () => false,
shouldAutoConnectJellyfinRemote: () => false,
startJellyfinRemoteSession: async () => {},
},
},
});
await composed.tokenizeSubtitle('first');
await composed.tokenizeSubtitle('second');
assert.deepEqual(tokenizeCalls, ['first', 'second']);
assert.equal(yomitanWarmupCalls, 1);
assert.equal(prewarmJlptCalls, 1);
assert.equal(prewarmFrequencyCalls, 1);
});

View File

@@ -142,7 +142,11 @@ export function composeMpvRuntimeHandlers<
return nPlusOneEnabled || jlptEnabled || frequencyEnabled;
};
let tokenizationWarmupInFlight: Promise<void> | null = null;
let tokenizationWarmupCompleted = false;
const startTokenizationWarmups = (): Promise<void> => {
if (tokenizationWarmupCompleted) {
return Promise.resolve();
}
if (!tokenizationWarmupInFlight) {
tokenizationWarmupInFlight = (async () => {
await options.warmups.startBackgroundWarmupsMainDeps.ensureYomitanExtensionLoaded();
@@ -153,6 +157,7 @@ export function composeMpvRuntimeHandlers<
await createMecabTokenizerAndCheck().catch(() => {});
}
await prewarmSubtitleDictionaries({ showLoadingOsd: true });
tokenizationWarmupCompleted = true;
})().finally(() => {
tokenizationWarmupInFlight = null;
});
@@ -160,7 +165,9 @@ export function composeMpvRuntimeHandlers<
return tokenizationWarmupInFlight;
};
const tokenizeSubtitle = async (text: string): Promise<TTokenizedSubtitle> => {
await startTokenizationWarmups();
if (!tokenizationWarmupCompleted) {
await startTokenizationWarmups();
}
return options.tokenizer.tokenizeSubtitle(
text,
options.tokenizer.createTokenizerRuntimeDeps(buildTokenizerDepsHandler()),