diff --git a/backlog/tasks/task-81 - Tokenization-performance-disable-yomitan-mecab-and-persistent-local-mecab.md b/backlog/tasks/task-81 - Tokenization-performance-disable-yomitan-mecab-and-persistent-local-mecab.md new file mode 100644 index 0000000..f809f19 --- /dev/null +++ b/backlog/tasks/task-81 - Tokenization-performance-disable-yomitan-mecab-and-persistent-local-mecab.md @@ -0,0 +1,48 @@ +--- +id: TASK-81 +title: 'Tokenization performance: disable Yomitan MeCab parser, gate local MeCab init, and add persistent MeCab process' +status: Done +assignee: [] +created_date: '2026-03-02 07:44' +updated_date: '2026-03-02 07:46' +labels: [] +dependencies: [] +priority: high +ordinal: 9001 +--- + +## Description + + + +Reduce subtitle annotation latency by: +- disabling Yomitan-side MeCab parser requests (`useMecabParser=false`); +- initializing local MeCab only when POS-dependent annotations are enabled (N+1 / JLPT / frequency); +- replacing per-line local MeCab process spawning with a persistent parser process that auto-shuts down after idle time and restarts on demand. + + + +## Acceptance Criteria + + + +- [x] #1 Yomitan parse requests disable MeCab parser path. +- [x] #2 MeCab warmup/init is skipped when all POS-dependent annotation toggles are off. +- [x] #3 Local MeCab tokenizer uses persistent process across subtitle lines. +- [x] #4 Persistent MeCab process auto-shuts down after idle timeout and restarts on next tokenize activity. +- [x] #5 Tests cover parser flag, warmup gating, and persistent MeCab lifecycle behavior. + + + +## Final Summary + + + +Implemented tokenizer latency optimizations: +- switched Yomitan parse requests to `useMecabParser: false`; +- added annotation-aware MeCab initialization gating in runtime warmup flow; +- added persistent local MeCab process (default idle shutdown: 30s) with queued requests, retry-on-process-end, idle auto-shutdown, and automatic restart on new work; +- added regression tests for Yomitan parse flag, MeCab warmup gating, and persistent/idle lifecycle behavior; +- validated with targeted tests and `tsc --noEmit`. + + diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts index 702f084..c3eb09d 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts @@ -1,6 +1,7 @@ import assert from 'node:assert/strict'; import test from 'node:test'; import { + requestYomitanParseResults, requestYomitanTermFrequencies, syncYomitanDefaultAnkiServer, } from './yomitan-parser-runtime'; @@ -246,3 +247,32 @@ test('requestYomitanTermFrequencies caches repeated term+reading lookups', async const frequencyCalls = scripts.filter((script) => script.includes('getTermFrequencies')).length; assert.equal(frequencyCalls, 1); }); + +test('requestYomitanParseResults disables Yomitan MeCab parser path', async () => { + const scripts: string[] = []; + const deps = createDeps(async (script) => { + scripts.push(script); + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profiles: [ + { + options: { + scanning: { length: 40 }, + }, + }, + ], + }; + } + return []; + }); + + const result = await requestYomitanParseResults('猫です', deps, { + error: () => undefined, + }); + + assert.deepEqual(result, []); + const parseScript = scripts.find((script) => script.includes('parseText')); + assert.ok(parseScript, 'expected parseText request script'); + assert.match(parseScript ?? '', /useMecabParser:\s*false/); +}); diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.ts b/src/core/services/tokenizer/yomitan-parser-runtime.ts index 6b77670..ba24854 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.ts @@ -529,7 +529,7 @@ export async function requestYomitanParseResults( optionsContext: { index: ${metadata.profileIndex} }, scanLength: ${metadata.scanLength}, useInternalParser: true, - useMecabParser: true + useMecabParser: false }); })(); ` @@ -564,7 +564,7 @@ export async function requestYomitanParseResults( optionsContext: { index: profileIndex }, scanLength, useInternalParser: true, - useMecabParser: true + useMecabParser: false }); })(); `; diff --git a/src/main/runtime/composers/mpv-runtime-composer.test.ts b/src/main/runtime/composers/mpv-runtime-composer.test.ts index 10ab215..5267c06 100644 --- a/src/main/runtime/composers/mpv-runtime-composer.test.ts +++ b/src/main/runtime/composers/mpv-runtime-composer.test.ts @@ -236,3 +236,144 @@ test('composeMpvRuntimeHandlers returns callable handlers and forwards to inject assert.ok(calls.includes('warmup-yomitan')); assert.ok(calls.indexOf('create-mecab') < calls.indexOf('set-started:true')); }); + +test('composeMpvRuntimeHandlers skips MeCab warmup when all POS-dependent annotations are disabled', async () => { + const calls: string[] = []; + let mecabTokenizer: { id: string } | null = null; + + class FakeMpvClient { + connected = false; + constructor( + public socketPath: string, + public options: unknown, + ) {} + on(): void {} + connect(): void { + this.connected = true; + } + } + + const composed = composeMpvRuntimeHandlers< + FakeMpvClient, + { isKnownWord: (text: string) => boolean }, + { text: string } + >({ + bindMpvMainEventHandlersMainDeps: { + appState: { + initialArgs: null, + overlayRuntimeInitialized: true, + mpvClient: null, + immersionTracker: null, + subtitleTimingTracker: null, + currentSubText: '', + currentSubAssText: '', + playbackPaused: null, + previousSecondarySubVisibility: null, + }, + getQuitOnDisconnectArmed: () => false, + scheduleQuitCheck: () => {}, + quitApp: () => {}, + reportJellyfinRemoteStopped: () => {}, + syncOverlayMpvSubtitleSuppression: () => {}, + maybeRunAnilistPostWatchUpdate: async () => {}, + logSubtitleTimingError: () => {}, + broadcastToOverlayWindows: () => {}, + onSubtitleChange: () => {}, + refreshDiscordPresence: () => {}, + ensureImmersionTrackerInitialized: () => {}, + updateCurrentMediaPath: () => {}, + restoreMpvSubVisibility: () => {}, + getCurrentAnilistMediaKey: () => null, + resetAnilistMediaTracking: () => {}, + maybeProbeAnilistDuration: () => {}, + ensureAnilistMediaGuess: () => {}, + syncImmersionMediaState: () => {}, + updateCurrentMediaTitle: () => {}, + resetAnilistMediaGuessState: () => {}, + reportJellyfinRemoteProgress: () => {}, + updateSubtitleRenderMetrics: () => {}, + }, + mpvClientRuntimeServiceFactoryMainDeps: { + createClient: FakeMpvClient, + getSocketPath: () => '/tmp/mpv.sock', + getResolvedConfig: () => ({ auto_start_overlay: false }), + isAutoStartOverlayEnabled: () => true, + setOverlayVisible: () => {}, + isVisibleOverlayVisible: () => false, + getReconnectTimer: () => null, + setReconnectTimer: () => {}, + }, + updateMpvSubtitleRenderMetricsMainDeps: { + getCurrentMetrics: () => BASE_METRICS, + setCurrentMetrics: () => {}, + applyPatch: (current, patch) => ({ next: { ...current, ...patch }, changed: true }), + broadcastMetrics: () => {}, + }, + tokenizer: { + buildTokenizerDepsMainDeps: { + getYomitanExt: () => null, + getYomitanParserWindow: () => null, + setYomitanParserWindow: () => {}, + getYomitanParserReadyPromise: () => null, + setYomitanParserReadyPromise: () => {}, + getYomitanParserInitPromise: () => null, + setYomitanParserInitPromise: () => {}, + isKnownWord: () => false, + recordLookup: () => {}, + getKnownWordMatchMode: () => 'headword', + getNPlusOneEnabled: () => false, + getMinSentenceWordsForNPlusOne: () => 3, + getJlptLevel: () => null, + getJlptEnabled: () => false, + getFrequencyDictionaryEnabled: () => false, + getFrequencyDictionaryMatchMode: () => 'headword', + getFrequencyRank: () => null, + getYomitanGroupDebugEnabled: () => false, + getMecabTokenizer: () => null, + }, + createTokenizerRuntimeDeps: () => ({ isKnownWord: () => false }), + tokenizeSubtitle: async (text) => ({ text }), + createMecabTokenizerAndCheckMainDeps: { + getMecabTokenizer: () => mecabTokenizer, + setMecabTokenizer: (next) => { + mecabTokenizer = next as { id: string }; + calls.push('set-mecab'); + }, + createMecabTokenizer: () => { + calls.push('create-mecab'); + return { id: 'mecab' }; + }, + checkAvailability: async () => { + calls.push('check-mecab'); + }, + }, + prewarmSubtitleDictionariesMainDeps: { + ensureJlptDictionaryLookup: async () => {}, + ensureFrequencyDictionaryLookup: async () => {}, + }, + }, + warmups: { + launchBackgroundWarmupTaskMainDeps: { + now: () => 0, + logDebug: () => {}, + logWarn: () => {}, + }, + startBackgroundWarmupsMainDeps: { + getStarted: () => false, + setStarted: () => {}, + isTexthookerOnlyMode: () => false, + ensureYomitanExtensionLoaded: async () => {}, + shouldWarmupMecab: () => false, + shouldWarmupYomitanExtension: () => false, + shouldWarmupSubtitleDictionaries: () => false, + shouldWarmupJellyfinRemoteSession: () => false, + shouldAutoConnectJellyfinRemote: () => false, + startJellyfinRemoteSession: async () => {}, + }, + }, + }); + + await composed.startTokenizationWarmups(); + + assert.deepEqual(calls, []); +}); diff --git a/src/main/runtime/composers/mpv-runtime-composer.ts b/src/main/runtime/composers/mpv-runtime-composer.ts index b1146af..5ed695a 100644 --- a/src/main/runtime/composers/mpv-runtime-composer.ts +++ b/src/main/runtime/composers/mpv-runtime-composer.ts @@ -133,12 +133,23 @@ export function composeMpvRuntimeHandlers< const prewarmSubtitleDictionaries = createPrewarmSubtitleDictionariesMainHandler( options.tokenizer.prewarmSubtitleDictionariesMainDeps, ); + const shouldInitializeMecabForAnnotations = (): boolean => { + const nPlusOneEnabled = + options.tokenizer.buildTokenizerDepsMainDeps.getNPlusOneEnabled?.() !== false; + const jlptEnabled = options.tokenizer.buildTokenizerDepsMainDeps.getJlptEnabled() !== false; + const frequencyEnabled = + options.tokenizer.buildTokenizerDepsMainDeps.getFrequencyDictionaryEnabled() !== false; + return nPlusOneEnabled || jlptEnabled || frequencyEnabled; + }; let tokenizationWarmupInFlight: Promise | null = null; const startTokenizationWarmups = (): Promise => { if (!tokenizationWarmupInFlight) { tokenizationWarmupInFlight = (async () => { await options.warmups.startBackgroundWarmupsMainDeps.ensureYomitanExtensionLoaded(); - if (!options.tokenizer.createMecabTokenizerAndCheckMainDeps.getMecabTokenizer()) { + if ( + shouldInitializeMecabForAnnotations() && + !options.tokenizer.createMecabTokenizerAndCheckMainDeps.getMecabTokenizer() + ) { await createMecabTokenizerAndCheck().catch(() => {}); } await prewarmSubtitleDictionaries({ showLoadingOsd: true }); diff --git a/src/mecab-tokenizer.test.ts b/src/mecab-tokenizer.test.ts new file mode 100644 index 0000000..7a4ebfa --- /dev/null +++ b/src/mecab-tokenizer.test.ts @@ -0,0 +1,114 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { EventEmitter } from 'node:events'; +import * as childProcess from 'node:child_process'; +import { PassThrough, Writable } from 'node:stream'; +import { MecabTokenizer } from './mecab-tokenizer'; + +function createFakeMecabProcess(onKill: () => void): ReturnType { + const stdout = new PassThrough(); + const stderr = new PassThrough(); + const stdin = new Writable({ + write(chunk, _encoding, callback) { + const text = String(chunk).replace(/\n+$/, '').trim(); + if (!text) { + stdout.write('EOS\n'); + callback(); + return; + } + + const payload = `${text}\t名詞,一般,*,*,*,*,${text},${text},${text}\nEOS\n`; + stdout.write(payload); + callback(); + }, + }); + + const process = new EventEmitter() as unknown as ReturnType & { + stdin: Writable; + stdout: PassThrough; + stderr: PassThrough; + }; + process.stdin = stdin; + process.stdout = stdout; + process.stderr = stderr; + process.kill = () => { + onKill(); + process.emit('close', 0); + return true; + }; + return process; +} + +test('MecabTokenizer reuses a persistent parser process across subtitle lines', async () => { + let spawnCalls = 0; + let killCalls = 0; + let timerId = 0; + const timers = new Map void>(); + + const tokenizer = new MecabTokenizer({ + execSyncFn: (() => '/usr/bin/mecab') as unknown as typeof childProcess.execSync, + spawnFn: (() => { + spawnCalls += 1; + return createFakeMecabProcess(() => { + killCalls += 1; + }); + }) as unknown as typeof childProcess.spawn, + setTimeoutFn: (callback) => { + timerId += 1; + timers.set(timerId, callback); + return timerId as unknown as ReturnType; + }, + clearTimeoutFn: (timeout) => { + timers.delete(timeout as unknown as number); + }, + idleShutdownMs: 60_000, + }); + + assert.equal(await tokenizer.checkAvailability(), true); + + const first = await tokenizer.tokenize('猫'); + const second = await tokenizer.tokenize('犬'); + + assert.equal(first?.[0]?.word, '猫'); + assert.equal(second?.[0]?.word, '犬'); + assert.equal(spawnCalls, 1); + assert.equal(killCalls, 0); +}); + +test('MecabTokenizer shuts down after idle timeout and restarts on new activity', async () => { + let spawnCalls = 0; + let killCalls = 0; + let timerId = 0; + const timers = new Map void>(); + + const tokenizer = new MecabTokenizer({ + execSyncFn: (() => '/usr/bin/mecab') as unknown as typeof childProcess.execSync, + spawnFn: (() => { + spawnCalls += 1; + return createFakeMecabProcess(() => { + killCalls += 1; + }); + }) as unknown as typeof childProcess.spawn, + setTimeoutFn: (callback) => { + timerId += 1; + timers.set(timerId, callback); + return timerId as unknown as ReturnType; + }, + clearTimeoutFn: (timeout) => { + timers.delete(timeout as unknown as number); + }, + idleShutdownMs: 5_000, + }); + + assert.equal(await tokenizer.checkAvailability(), true); + await tokenizer.tokenize('猫'); + assert.equal(spawnCalls, 1); + + const pendingTimer = [...timers.values()][0]; + assert.ok(pendingTimer, 'expected idle shutdown timer'); + pendingTimer?.(); + assert.equal(killCalls, 1); + + await tokenizer.tokenize('犬'); + assert.equal(spawnCalls, 2); +}); diff --git a/src/mecab-tokenizer.ts b/src/mecab-tokenizer.ts index e4560c6..e64d346 100644 --- a/src/mecab-tokenizer.ts +++ b/src/mecab-tokenizer.ts @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -import { spawn, execSync } from 'child_process'; +import * as childProcess from 'child_process'; import { PartOfSpeech, Token, MecabStatus } from './types'; import { createLogger } from './logger'; @@ -89,18 +89,58 @@ export function parseMecabLine(line: string): Token | null { export interface MecabTokenizerOptions { mecabCommand?: string; dictionaryPath?: string; + idleShutdownMs?: number; + spawnFn?: typeof childProcess.spawn; + execSyncFn?: typeof childProcess.execSync; + setTimeoutFn?: (callback: () => void, delayMs: number) => ReturnType; + clearTimeoutFn?: (timer: ReturnType) => void; +} + +interface MecabQueuedRequest { + text: string; + retryCount: number; + resolve: (tokens: Token[] | null) => void; +} + +interface MecabActiveRequest extends MecabQueuedRequest { + lines: string[]; + stderr: string; } export class MecabTokenizer { + private static readonly DEFAULT_IDLE_SHUTDOWN_MS = 30_000; + private static readonly MAX_RETRY_COUNT = 1; + private mecabPath: string | null = null; private mecabCommand: string; private dictionaryPath: string | null; private available: boolean = false; private enabled: boolean = true; + private idleShutdownMs: number; + private readonly spawnFn: typeof childProcess.spawn; + private readonly execSyncFn: typeof childProcess.execSync; + private readonly setTimeoutFn: ( + callback: () => void, + delayMs: number, + ) => ReturnType; + private readonly clearTimeoutFn: (timer: ReturnType) => void; + private mecabProcess: ReturnType | null = null; + private idleShutdownTimer: ReturnType | null = null; + private stdoutBuffer = ''; + private requestQueue: MecabQueuedRequest[] = []; + private activeRequest: MecabActiveRequest | null = null; constructor(options: MecabTokenizerOptions = {}) { this.mecabCommand = options.mecabCommand?.trim() || 'mecab'; this.dictionaryPath = options.dictionaryPath?.trim() || null; + this.idleShutdownMs = Math.max( + 0, + Math.floor(options.idleShutdownMs ?? MecabTokenizer.DEFAULT_IDLE_SHUTDOWN_MS), + ); + this.spawnFn = options.spawnFn ?? childProcess.spawn; + this.execSyncFn = options.execSyncFn ?? childProcess.execSync; + this.setTimeoutFn = options.setTimeoutFn ?? ((callback, delayMs) => setTimeout(callback, delayMs)); + this.clearTimeoutFn = options.clearTimeoutFn ?? ((timer) => clearTimeout(timer)); } async checkAvailability(): Promise { @@ -108,9 +148,10 @@ export class MecabTokenizer { const command = this.mecabCommand; const result = command.includes('/') ? command - : execSync(`which ${command}`, { encoding: 'utf-8' }).trim(); - if (result) { - this.mecabPath = result; + : this.execSyncFn(`which ${command}`, { encoding: 'utf-8' }); + const resolvedPath = String(result).trim(); + if (resolvedPath) { + this.mecabPath = resolvedPath; this.available = true; log.info('MeCab found at:', this.mecabPath); return true; @@ -119,81 +160,259 @@ export class MecabTokenizer { log.info('MeCab not found on system'); } + this.stopPersistentProcess(); this.available = false; return false; } async tokenize(text: string): Promise { - if (!this.available || !this.enabled || !text) { + const normalizedText = text.replace(/\r?\n/g, ' ').trim(); + if (!this.available || !this.enabled || !normalizedText) { return null; } return new Promise((resolve) => { - const mecabArgs: string[] = []; - if (this.dictionaryPath) { - mecabArgs.push('-d', this.dictionaryPath); - } - const mecab = spawn(this.mecabPath ?? this.mecabCommand, mecabArgs, { + this.clearIdleShutdownTimer(); + this.requestQueue.push({ + text: normalizedText, + retryCount: 0, + resolve, + }); + this.processQueue(); + }); + } + + private processQueue(): void { + if (this.activeRequest) { + return; + } + + const request = this.requestQueue.shift(); + if (!request) { + this.scheduleIdleShutdown(); + return; + } + + if (!this.ensurePersistentProcess()) { + this.retryOrResolveRequest(request); + this.processQueue(); + return; + } + + this.activeRequest = { + ...request, + lines: [], + stderr: '', + }; + + try { + this.mecabProcess?.stdin?.write(`${request.text}\n`); + } catch (error) { + log.error('Failed to write to MeCab process:', (error as Error).message); + this.retryOrResolveRequest(request); + this.activeRequest = null; + this.stopPersistentProcess(); + this.processQueue(); + } + } + + private retryOrResolveRequest(request: MecabQueuedRequest): void { + if ( + request.retryCount < MecabTokenizer.MAX_RETRY_COUNT && + this.enabled && + this.available + ) { + this.requestQueue.push({ + ...request, + retryCount: request.retryCount + 1, + }); + return; + } + request.resolve(null); + } + + private ensurePersistentProcess(): boolean { + if (this.mecabProcess) { + return true; + } + + const mecabArgs: string[] = []; + if (this.dictionaryPath) { + mecabArgs.push('-d', this.dictionaryPath); + } + + let mecab: ReturnType; + try { + mecab = this.spawnFn(this.mecabPath ?? this.mecabCommand, mecabArgs, { stdio: ['pipe', 'pipe', 'pipe'], }); + } catch (error) { + log.error('Failed to spawn MeCab:', (error as Error).message); + return false; + } - let stdout = ''; - let stderr = ''; + if (!mecab.stdin || !mecab.stdout || !mecab.stderr) { + log.error('Failed to spawn MeCab: missing stdio pipes'); + try { + mecab.kill(); + } catch {} + return false; + } - mecab.stdout.on('data', (data: Buffer) => { - stdout += data.toString(); - }); - - mecab.stderr.on('data', (data: Buffer) => { - stderr += data.toString(); - }); - - mecab.on('close', (code: number | null) => { - if (code !== 0) { - log.error('MeCab process exited with code:', code); - if (stderr) { - log.error('MeCab stderr:', stderr); - } - resolve(null); - return; - } - - const lines = stdout.split('\n'); - const tokens: Token[] = []; - - for (const line of lines) { - const token = parseMecabLine(line); - if (token) { - tokens.push(token); - } - } - - if (tokens.length === 0 && text.trim().length > 0) { - const trimmedStdout = stdout.trim(); - const trimmedStderr = stderr.trim(); - if (trimmedStdout) { - log.warn( - 'MeCab returned no parseable tokens.', - `command=${this.mecabPath ?? this.mecabCommand}`, - `stdout=${trimmedStdout.slice(0, 1024)}`, - ); - } - if (trimmedStderr) { - log.warn('MeCab stderr while tokenizing:', trimmedStderr); - } - } - - resolve(tokens); - }); - - mecab.on('error', (err: Error) => { - log.error('Failed to spawn MeCab:', err.message); - resolve(null); - }); - - mecab.stdin.write(text); - mecab.stdin.end(); + this.stdoutBuffer = ''; + mecab.stdout.on('data', (data: Buffer | string) => { + this.handleStdoutChunk(data.toString()); }); + mecab.stderr.on('data', (data: Buffer | string) => { + if (!this.activeRequest) { + return; + } + this.activeRequest.stderr += data.toString(); + }); + mecab.on('error', (error: Error) => { + this.handlePersistentProcessEnded(mecab, `spawn error: ${error.message}`); + }); + mecab.on('close', (code: number | null) => { + this.handlePersistentProcessEnded(mecab, `exit code ${String(code)}`); + }); + + this.mecabProcess = mecab; + return true; + } + + private handleStdoutChunk(chunk: string): void { + this.stdoutBuffer += chunk; + while (true) { + const newlineIndex = this.stdoutBuffer.indexOf('\n'); + if (newlineIndex === -1) { + break; + } + const line = this.stdoutBuffer.slice(0, newlineIndex).replace(/\r$/, ''); + this.stdoutBuffer = this.stdoutBuffer.slice(newlineIndex + 1); + this.handleStdoutLine(line); + } + } + + private handleStdoutLine(line: string): void { + if (!this.activeRequest) { + return; + } + if (line === 'EOS') { + this.resolveActiveRequest(); + return; + } + if (!line.trim()) { + return; + } + this.activeRequest.lines.push(line); + } + + private resolveActiveRequest(): void { + const current = this.activeRequest; + if (!current) { + return; + } + this.activeRequest = null; + + const tokens: Token[] = []; + for (const line of current.lines) { + const token = parseMecabLine(line); + if (token) { + tokens.push(token); + } + } + + if (tokens.length === 0 && current.text.trim().length > 0) { + const trimmedStdout = current.lines.join('\n').trim(); + const trimmedStderr = current.stderr.trim(); + if (trimmedStdout) { + log.warn( + 'MeCab returned no parseable tokens.', + `command=${this.mecabPath ?? this.mecabCommand}`, + `stdout=${trimmedStdout.slice(0, 1024)}`, + ); + } + if (trimmedStderr) { + log.warn('MeCab stderr while tokenizing:', trimmedStderr); + } + } + + current.resolve(tokens); + this.processQueue(); + } + + private handlePersistentProcessEnded( + process: ReturnType, + reason: string, + ): void { + if (this.mecabProcess !== process) { + return; + } + + this.mecabProcess = null; + this.stdoutBuffer = ''; + this.clearIdleShutdownTimer(); + + const pending: MecabQueuedRequest[] = []; + if (this.activeRequest) { + pending.push({ + text: this.activeRequest.text, + retryCount: this.activeRequest.retryCount, + resolve: this.activeRequest.resolve, + }); + } + this.activeRequest = null; + if (this.requestQueue.length > 0) { + pending.push(...this.requestQueue); + } + this.requestQueue = []; + + if (pending.length > 0) { + log.warn(`MeCab parser process ended during active work (${reason}); retrying pending request(s).`); + for (const request of pending) { + this.retryOrResolveRequest(request); + } + this.processQueue(); + } + } + + private scheduleIdleShutdown(): void { + this.clearIdleShutdownTimer(); + if (this.idleShutdownMs <= 0 || !this.mecabProcess) { + return; + } + this.idleShutdownTimer = this.setTimeoutFn(() => { + this.idleShutdownTimer = null; + if (this.activeRequest || this.requestQueue.length > 0) { + return; + } + this.stopPersistentProcess(); + }, this.idleShutdownMs); + const timerWithUnref = this.idleShutdownTimer as { unref?: () => void }; + if (typeof timerWithUnref.unref === 'function') { + timerWithUnref.unref(); + } + } + + private clearIdleShutdownTimer(): void { + if (!this.idleShutdownTimer) { + return; + } + this.clearTimeoutFn(this.idleShutdownTimer); + this.idleShutdownTimer = null; + } + + private stopPersistentProcess(): void { + const process = this.mecabProcess; + if (!process) { + return; + } + this.mecabProcess = null; + this.stdoutBuffer = ''; + this.clearIdleShutdownTimer(); + try { + process.kill(); + } catch {} } getStatus(): MecabStatus { @@ -206,6 +425,25 @@ export class MecabTokenizer { setEnabled(enabled: boolean): void { this.enabled = enabled; + if (!enabled) { + const pending: MecabQueuedRequest[] = []; + if (this.activeRequest) { + pending.push({ + text: this.activeRequest.text, + retryCount: MecabTokenizer.MAX_RETRY_COUNT, + resolve: this.activeRequest.resolve, + }); + } + if (this.requestQueue.length > 0) { + pending.push(...this.requestQueue); + } + this.activeRequest = null; + this.requestQueue = []; + for (const request of pending) { + request.resolve(null); + } + this.stopPersistentProcess(); + } } }