mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-02 06:22:42 -08:00
fix: make tokenization warmup one-shot
This commit is contained in:
@@ -4,7 +4,7 @@ title: 'Tokenization performance: disable Yomitan MeCab parser, gate local MeCab
|
|||||||
status: Done
|
status: Done
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-03-02 07:44'
|
created_date: '2026-03-02 07:44'
|
||||||
updated_date: '2026-03-02 07:46'
|
updated_date: '2026-03-02 09:20'
|
||||||
labels: []
|
labels: []
|
||||||
dependencies: []
|
dependencies: []
|
||||||
priority: high
|
priority: high
|
||||||
@@ -43,6 +43,8 @@ Implemented tokenizer latency optimizations:
|
|||||||
- added annotation-aware MeCab initialization gating in runtime warmup flow;
|
- added annotation-aware MeCab initialization gating in runtime warmup flow;
|
||||||
- added persistent local MeCab process (default idle shutdown: 30s) with queued requests, retry-on-process-end, idle auto-shutdown, and automatic restart on new work;
|
- added persistent local MeCab process (default idle shutdown: 30s) with queued requests, retry-on-process-end, idle auto-shutdown, and automatic restart on new work;
|
||||||
- added regression tests for Yomitan parse flag, MeCab warmup gating, and persistent/idle lifecycle behavior;
|
- added regression tests for Yomitan parse flag, MeCab warmup gating, and persistent/idle lifecycle behavior;
|
||||||
|
- fixed tokenization warmup gate so first-use warmup completion is sticky (`tokenizationWarmupCompleted`) and sequential `tokenizeSubtitle` calls no longer re-run Yomitan/dictionary warmup path;
|
||||||
|
- added regression coverage in `src/main/runtime/composers/mpv-runtime-composer.test.ts` for sequential tokenize calls (`warmup` side effects run once);
|
||||||
- validated with targeted tests and `tsc --noEmit`.
|
- validated with targeted tests and `tsc --noEmit`.
|
||||||
|
|
||||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||||
|
|||||||
@@ -377,3 +377,142 @@ test('composeMpvRuntimeHandlers skips MeCab warmup when all POS-dependent annota
|
|||||||
|
|
||||||
assert.deepEqual(calls, []);
|
assert.deepEqual(calls, []);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('composeMpvRuntimeHandlers runs tokenization warmup once across sequential tokenize calls', async () => {
|
||||||
|
let yomitanWarmupCalls = 0;
|
||||||
|
let prewarmJlptCalls = 0;
|
||||||
|
let prewarmFrequencyCalls = 0;
|
||||||
|
const tokenizeCalls: string[] = [];
|
||||||
|
|
||||||
|
const composed = composeMpvRuntimeHandlers<
|
||||||
|
{ connect: () => void; on: () => void },
|
||||||
|
{ isKnownWord: () => boolean },
|
||||||
|
{ text: string }
|
||||||
|
>({
|
||||||
|
bindMpvMainEventHandlersMainDeps: {
|
||||||
|
appState: {
|
||||||
|
initialArgs: null,
|
||||||
|
overlayRuntimeInitialized: true,
|
||||||
|
mpvClient: null,
|
||||||
|
immersionTracker: null,
|
||||||
|
subtitleTimingTracker: null,
|
||||||
|
currentSubText: '',
|
||||||
|
currentSubAssText: '',
|
||||||
|
playbackPaused: null,
|
||||||
|
previousSecondarySubVisibility: null,
|
||||||
|
},
|
||||||
|
getQuitOnDisconnectArmed: () => false,
|
||||||
|
scheduleQuitCheck: () => {},
|
||||||
|
quitApp: () => {},
|
||||||
|
reportJellyfinRemoteStopped: () => {},
|
||||||
|
syncOverlayMpvSubtitleSuppression: () => {},
|
||||||
|
maybeRunAnilistPostWatchUpdate: async () => {},
|
||||||
|
logSubtitleTimingError: () => {},
|
||||||
|
broadcastToOverlayWindows: () => {},
|
||||||
|
onSubtitleChange: () => {},
|
||||||
|
refreshDiscordPresence: () => {},
|
||||||
|
ensureImmersionTrackerInitialized: () => {},
|
||||||
|
updateCurrentMediaPath: () => {},
|
||||||
|
restoreMpvSubVisibility: () => {},
|
||||||
|
getCurrentAnilistMediaKey: () => null,
|
||||||
|
resetAnilistMediaTracking: () => {},
|
||||||
|
maybeProbeAnilistDuration: () => {},
|
||||||
|
ensureAnilistMediaGuess: () => {},
|
||||||
|
syncImmersionMediaState: () => {},
|
||||||
|
updateCurrentMediaTitle: () => {},
|
||||||
|
resetAnilistMediaGuessState: () => {},
|
||||||
|
reportJellyfinRemoteProgress: () => {},
|
||||||
|
updateSubtitleRenderMetrics: () => {},
|
||||||
|
},
|
||||||
|
mpvClientRuntimeServiceFactoryMainDeps: {
|
||||||
|
createClient: class {
|
||||||
|
connect(): void {}
|
||||||
|
on(): void {}
|
||||||
|
},
|
||||||
|
getSocketPath: () => '/tmp/mpv.sock',
|
||||||
|
getResolvedConfig: () => ({ auto_start_overlay: false }),
|
||||||
|
isAutoStartOverlayEnabled: () => false,
|
||||||
|
setOverlayVisible: () => {},
|
||||||
|
isVisibleOverlayVisible: () => false,
|
||||||
|
getReconnectTimer: () => null,
|
||||||
|
setReconnectTimer: () => {},
|
||||||
|
},
|
||||||
|
updateMpvSubtitleRenderMetricsMainDeps: {
|
||||||
|
getCurrentMetrics: () => BASE_METRICS,
|
||||||
|
setCurrentMetrics: () => {},
|
||||||
|
applyPatch: (current, patch) => ({ next: { ...current, ...patch }, changed: true }),
|
||||||
|
broadcastMetrics: () => {},
|
||||||
|
},
|
||||||
|
tokenizer: {
|
||||||
|
buildTokenizerDepsMainDeps: {
|
||||||
|
getYomitanExt: () => null,
|
||||||
|
getYomitanParserWindow: () => null,
|
||||||
|
setYomitanParserWindow: () => {},
|
||||||
|
getYomitanParserReadyPromise: () => null,
|
||||||
|
setYomitanParserReadyPromise: () => {},
|
||||||
|
getYomitanParserInitPromise: () => null,
|
||||||
|
setYomitanParserInitPromise: () => {},
|
||||||
|
isKnownWord: () => false,
|
||||||
|
recordLookup: () => {},
|
||||||
|
getKnownWordMatchMode: () => 'headword',
|
||||||
|
getNPlusOneEnabled: () => false,
|
||||||
|
getMinSentenceWordsForNPlusOne: () => 3,
|
||||||
|
getJlptLevel: () => null,
|
||||||
|
getJlptEnabled: () => false,
|
||||||
|
getFrequencyDictionaryEnabled: () => false,
|
||||||
|
getFrequencyDictionaryMatchMode: () => 'headword',
|
||||||
|
getFrequencyRank: () => null,
|
||||||
|
getYomitanGroupDebugEnabled: () => false,
|
||||||
|
getMecabTokenizer: () => null,
|
||||||
|
},
|
||||||
|
createTokenizerRuntimeDeps: () => ({ isKnownWord: () => false }),
|
||||||
|
tokenizeSubtitle: async (text) => {
|
||||||
|
tokenizeCalls.push(text);
|
||||||
|
return { text };
|
||||||
|
},
|
||||||
|
createMecabTokenizerAndCheckMainDeps: {
|
||||||
|
getMecabTokenizer: () => null,
|
||||||
|
setMecabTokenizer: () => {},
|
||||||
|
createMecabTokenizer: () => ({ id: 'mecab' }),
|
||||||
|
checkAvailability: async () => {},
|
||||||
|
},
|
||||||
|
prewarmSubtitleDictionariesMainDeps: {
|
||||||
|
ensureJlptDictionaryLookup: async () => {
|
||||||
|
prewarmJlptCalls += 1;
|
||||||
|
},
|
||||||
|
ensureFrequencyDictionaryLookup: async () => {
|
||||||
|
prewarmFrequencyCalls += 1;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
warmups: {
|
||||||
|
launchBackgroundWarmupTaskMainDeps: {
|
||||||
|
now: () => 0,
|
||||||
|
logDebug: () => {},
|
||||||
|
logWarn: () => {},
|
||||||
|
},
|
||||||
|
startBackgroundWarmupsMainDeps: {
|
||||||
|
getStarted: () => false,
|
||||||
|
setStarted: () => {},
|
||||||
|
isTexthookerOnlyMode: () => false,
|
||||||
|
ensureYomitanExtensionLoaded: async () => {
|
||||||
|
yomitanWarmupCalls += 1;
|
||||||
|
},
|
||||||
|
shouldWarmupMecab: () => false,
|
||||||
|
shouldWarmupYomitanExtension: () => false,
|
||||||
|
shouldWarmupSubtitleDictionaries: () => false,
|
||||||
|
shouldWarmupJellyfinRemoteSession: () => false,
|
||||||
|
shouldAutoConnectJellyfinRemote: () => false,
|
||||||
|
startJellyfinRemoteSession: async () => {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await composed.tokenizeSubtitle('first');
|
||||||
|
await composed.tokenizeSubtitle('second');
|
||||||
|
|
||||||
|
assert.deepEqual(tokenizeCalls, ['first', 'second']);
|
||||||
|
assert.equal(yomitanWarmupCalls, 1);
|
||||||
|
assert.equal(prewarmJlptCalls, 1);
|
||||||
|
assert.equal(prewarmFrequencyCalls, 1);
|
||||||
|
});
|
||||||
|
|||||||
@@ -142,7 +142,11 @@ export function composeMpvRuntimeHandlers<
|
|||||||
return nPlusOneEnabled || jlptEnabled || frequencyEnabled;
|
return nPlusOneEnabled || jlptEnabled || frequencyEnabled;
|
||||||
};
|
};
|
||||||
let tokenizationWarmupInFlight: Promise<void> | null = null;
|
let tokenizationWarmupInFlight: Promise<void> | null = null;
|
||||||
|
let tokenizationWarmupCompleted = false;
|
||||||
const startTokenizationWarmups = (): Promise<void> => {
|
const startTokenizationWarmups = (): Promise<void> => {
|
||||||
|
if (tokenizationWarmupCompleted) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
if (!tokenizationWarmupInFlight) {
|
if (!tokenizationWarmupInFlight) {
|
||||||
tokenizationWarmupInFlight = (async () => {
|
tokenizationWarmupInFlight = (async () => {
|
||||||
await options.warmups.startBackgroundWarmupsMainDeps.ensureYomitanExtensionLoaded();
|
await options.warmups.startBackgroundWarmupsMainDeps.ensureYomitanExtensionLoaded();
|
||||||
@@ -153,6 +157,7 @@ export function composeMpvRuntimeHandlers<
|
|||||||
await createMecabTokenizerAndCheck().catch(() => {});
|
await createMecabTokenizerAndCheck().catch(() => {});
|
||||||
}
|
}
|
||||||
await prewarmSubtitleDictionaries({ showLoadingOsd: true });
|
await prewarmSubtitleDictionaries({ showLoadingOsd: true });
|
||||||
|
tokenizationWarmupCompleted = true;
|
||||||
})().finally(() => {
|
})().finally(() => {
|
||||||
tokenizationWarmupInFlight = null;
|
tokenizationWarmupInFlight = null;
|
||||||
});
|
});
|
||||||
@@ -160,7 +165,9 @@ export function composeMpvRuntimeHandlers<
|
|||||||
return tokenizationWarmupInFlight;
|
return tokenizationWarmupInFlight;
|
||||||
};
|
};
|
||||||
const tokenizeSubtitle = async (text: string): Promise<TTokenizedSubtitle> => {
|
const tokenizeSubtitle = async (text: string): Promise<TTokenizedSubtitle> => {
|
||||||
await startTokenizationWarmups();
|
if (!tokenizationWarmupCompleted) {
|
||||||
|
await startTokenizationWarmups();
|
||||||
|
}
|
||||||
return options.tokenizer.tokenizeSubtitle(
|
return options.tokenizer.tokenizeSubtitle(
|
||||||
text,
|
text,
|
||||||
options.tokenizer.createTokenizerRuntimeDeps(buildTokenizerDepsHandler()),
|
options.tokenizer.createTokenizerRuntimeDeps(buildTokenizerDepsHandler()),
|
||||||
|
|||||||
Reference in New Issue
Block a user