mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-02 06:22:42 -08:00
feat(anki): add proxy transport and tokenizer annotation controls
This commit is contained in:
@@ -41,7 +41,13 @@ test('initializeOverlayRuntime skips Anki integration when ankiConnect.enabled i
|
||||
setIntegrationCalls += 1;
|
||||
},
|
||||
showDesktopNotification: () => {},
|
||||
createFieldGroupingCallback: () => async () => 'auto',
|
||||
createFieldGroupingCallback: () =>
|
||||
async () => ({
|
||||
keepNoteId: 1,
|
||||
deleteNoteId: 2,
|
||||
deleteDuplicate: false,
|
||||
cancelled: false,
|
||||
}),
|
||||
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
|
||||
});
|
||||
|
||||
@@ -90,7 +96,13 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
|
||||
setIntegrationCalls += 1;
|
||||
},
|
||||
showDesktopNotification: () => {},
|
||||
createFieldGroupingCallback: () => async () => 'manual',
|
||||
createFieldGroupingCallback: () =>
|
||||
async () => ({
|
||||
keepNoteId: 3,
|
||||
deleteNoteId: 4,
|
||||
deleteDuplicate: false,
|
||||
cancelled: false,
|
||||
}),
|
||||
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
|
||||
});
|
||||
|
||||
|
||||
@@ -1696,3 +1696,169 @@ test('createTokenizerDepsRuntime checks MeCab availability before first tokenize
|
||||
assert.equal(first?.[0]?.surface, '仮面');
|
||||
assert.equal(second?.[0]?.surface, '仮面');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle uses async MeCab enrichment override when provided', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫',
|
||||
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
headword: '猫',
|
||||
surface: '猫',
|
||||
reading: 'ネコ',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
enrichTokensWithMecab: async (tokens) =>
|
||||
tokens.map((token) => ({
|
||||
...token,
|
||||
pos1: 'override-pos',
|
||||
})),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.pos1, 'override-pos');
|
||||
});
|
||||
|
||||
test('createTokenizerDepsRuntime exposes async MeCab enrichment helper', async () => {
|
||||
const deps = createTokenizerDepsRuntime({
|
||||
getYomitanExt: () => null,
|
||||
getYomitanParserWindow: () => null,
|
||||
setYomitanParserWindow: () => {},
|
||||
getYomitanParserReadyPromise: () => null,
|
||||
setYomitanParserReadyPromise: () => {},
|
||||
getYomitanParserInitPromise: () => null,
|
||||
setYomitanParserInitPromise: () => {},
|
||||
isKnownWord: () => false,
|
||||
getKnownWordMatchMode: () => 'headword',
|
||||
getJlptLevel: () => null,
|
||||
getMecabTokenizer: () => null,
|
||||
});
|
||||
|
||||
const enriched = await deps.enrichTokensWithMecab?.(
|
||||
[
|
||||
{
|
||||
headword: 'は',
|
||||
surface: 'は',
|
||||
reading: 'は',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
headword: 'は',
|
||||
surface: 'は',
|
||||
reading: 'ハ',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
assert.equal(enriched?.[0]?.pos1, '助詞');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle skips all enrichment stages when disabled', async () => {
|
||||
let knownCalls = 0;
|
||||
let mecabCalls = 0;
|
||||
let jlptCalls = 0;
|
||||
let frequencyCalls = 0;
|
||||
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫',
|
||||
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
|
||||
isKnownWord: () => {
|
||||
knownCalls += 1;
|
||||
return true;
|
||||
},
|
||||
getNPlusOneEnabled: () => false,
|
||||
getJlptEnabled: () => false,
|
||||
getFrequencyDictionaryEnabled: () => false,
|
||||
getJlptLevel: () => {
|
||||
jlptCalls += 1;
|
||||
return 'N5';
|
||||
},
|
||||
getFrequencyRank: () => {
|
||||
frequencyCalls += 1;
|
||||
return 10;
|
||||
},
|
||||
tokenizeWithMecab: async () => {
|
||||
mecabCalls += 1;
|
||||
return null;
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
assert.equal(knownCalls, 0);
|
||||
assert.equal(mecabCalls, 0);
|
||||
assert.equal(jlptCalls, 0);
|
||||
assert.equal(frequencyCalls, 0);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async () => {
|
||||
let knownCalls = 0;
|
||||
let mecabCalls = 0;
|
||||
let frequencyCalls = 0;
|
||||
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫',
|
||||
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
|
||||
isKnownWord: () => {
|
||||
knownCalls += 1;
|
||||
return true;
|
||||
},
|
||||
getNPlusOneEnabled: () => false,
|
||||
getJlptEnabled: () => false,
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getFrequencyRank: () => {
|
||||
frequencyCalls += 1;
|
||||
return 7;
|
||||
},
|
||||
tokenizeWithMecab: async () => {
|
||||
mecabCalls += 1;
|
||||
return [
|
||||
{
|
||||
headword: '猫',
|
||||
surface: '猫',
|
||||
reading: 'ネコ',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
];
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 7);
|
||||
assert.equal(result.tokens?.[0]?.isKnown, false);
|
||||
assert.equal(knownCalls, 0);
|
||||
assert.equal(mecabCalls, 1);
|
||||
assert.equal(frequencyCalls, 1);
|
||||
});
|
||||
|
||||
@@ -9,13 +9,16 @@ import {
|
||||
FrequencyDictionaryLookup,
|
||||
JlptLevel,
|
||||
} from '../../types';
|
||||
import { annotateTokens } from './tokenizer/annotation-stage';
|
||||
import { enrichTokensWithMecabPos1 } from './tokenizer/parser-enrichment-stage';
|
||||
import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage';
|
||||
import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime';
|
||||
|
||||
const logger = createLogger('main:tokenizer');
|
||||
|
||||
type MecabTokenEnrichmentFn = (
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
) => Promise<MergedToken[]>;
|
||||
|
||||
export interface TokenizerServiceDeps {
|
||||
getYomitanExt: () => Extension | null;
|
||||
getYomitanParserWindow: () => BrowserWindow | null;
|
||||
@@ -27,12 +30,14 @@ export interface TokenizerServiceDeps {
|
||||
isKnownWord: (text: string) => boolean;
|
||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
getMinSentenceWordsForNPlusOne?: () => number;
|
||||
getYomitanGroupDebugEnabled?: () => boolean;
|
||||
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
|
||||
enrichTokensWithMecab?: MecabTokenEnrichmentFn;
|
||||
}
|
||||
|
||||
interface MecabTokenizerLike {
|
||||
@@ -52,6 +57,7 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
isKnownWord: (text: string) => boolean;
|
||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
@@ -60,6 +66,82 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
getMecabTokenizer: () => MecabTokenizerLike | null;
|
||||
}
|
||||
|
||||
interface TokenizerAnnotationOptions {
|
||||
nPlusOneEnabled: boolean;
|
||||
jlptEnabled: boolean;
|
||||
frequencyEnabled: boolean;
|
||||
minSentenceWordsForNPlusOne: number | undefined;
|
||||
}
|
||||
|
||||
let parserEnrichmentWorkerRuntimeModulePromise:
|
||||
| Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
|
||||
| null = null;
|
||||
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
|
||||
let parserEnrichmentFallbackModulePromise:
|
||||
| Promise<typeof import('./tokenizer/parser-enrichment-stage')>
|
||||
| null = null;
|
||||
|
||||
function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
|
||||
if (!options.nPlusOneEnabled) {
|
||||
return () => false;
|
||||
}
|
||||
return deps.isKnownWord;
|
||||
}
|
||||
|
||||
function needsMecabPosEnrichment(options: TokenizerAnnotationOptions): boolean {
|
||||
return options.jlptEnabled || options.frequencyEnabled;
|
||||
}
|
||||
|
||||
function hasAnyAnnotationEnabled(options: TokenizerAnnotationOptions): boolean {
|
||||
return options.nPlusOneEnabled || options.jlptEnabled || options.frequencyEnabled;
|
||||
}
|
||||
|
||||
async function enrichTokensWithMecabAsync(
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!parserEnrichmentWorkerRuntimeModulePromise) {
|
||||
parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
|
||||
}
|
||||
|
||||
try {
|
||||
const runtime = await parserEnrichmentWorkerRuntimeModulePromise;
|
||||
return await runtime.enrichTokensWithMecabPos1Async(tokens, mecabTokens);
|
||||
} catch {
|
||||
if (!parserEnrichmentFallbackModulePromise) {
|
||||
parserEnrichmentFallbackModulePromise = import('./tokenizer/parser-enrichment-stage');
|
||||
}
|
||||
const fallback = await parserEnrichmentFallbackModulePromise;
|
||||
return fallback.enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
}
|
||||
}
|
||||
|
||||
async function applyAnnotationStage(
|
||||
tokens: MergedToken[],
|
||||
deps: TokenizerServiceDeps,
|
||||
options: TokenizerAnnotationOptions,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!hasAnyAnnotationEnabled(options)) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
if (!annotationStageModulePromise) {
|
||||
annotationStageModulePromise = import('./tokenizer/annotation-stage');
|
||||
}
|
||||
|
||||
const annotationStage = await annotationStageModulePromise;
|
||||
return annotationStage.annotateTokens(
|
||||
tokens,
|
||||
{
|
||||
isKnownWord: getKnownWordLookup(deps, options),
|
||||
knownWordMatchMode: deps.getKnownWordMatchMode(),
|
||||
getJlptLevel: deps.getJlptLevel,
|
||||
getFrequencyRank: deps.getFrequencyRank,
|
||||
},
|
||||
options,
|
||||
);
|
||||
}
|
||||
|
||||
export function createTokenizerDepsRuntime(
|
||||
options: TokenizerDepsRuntimeOptions,
|
||||
): TokenizerServiceDeps {
|
||||
@@ -76,6 +158,7 @@ export function createTokenizerDepsRuntime(
|
||||
isKnownWord: options.isKnownWord,
|
||||
getKnownWordMatchMode: options.getKnownWordMatchMode,
|
||||
getJlptLevel: options.getJlptLevel,
|
||||
getNPlusOneEnabled: options.getNPlusOneEnabled,
|
||||
getJlptEnabled: options.getJlptEnabled,
|
||||
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
|
||||
getFrequencyRank: options.getFrequencyRank,
|
||||
@@ -104,8 +187,11 @@ export function createTokenizerDepsRuntime(
|
||||
return null;
|
||||
}
|
||||
|
||||
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode());
|
||||
const isKnownWordLookup = options.getNPlusOneEnabled?.() === false ? () => false : options.isKnownWord;
|
||||
return mergeTokens(rawTokens, isKnownWordLookup, options.getKnownWordMatchMode());
|
||||
},
|
||||
enrichTokensWithMecab: async (tokens, mecabTokens) =>
|
||||
enrichTokensWithMecabAsync(tokens, mecabTokens),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -128,36 +214,19 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void {
|
||||
});
|
||||
}
|
||||
|
||||
function getAnnotationOptions(deps: TokenizerServiceDeps): {
|
||||
jlptEnabled: boolean;
|
||||
frequencyEnabled: boolean;
|
||||
minSentenceWordsForNPlusOne: number | undefined;
|
||||
} {
|
||||
function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions {
|
||||
return {
|
||||
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
|
||||
jlptEnabled: deps.getJlptEnabled?.() !== false,
|
||||
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
|
||||
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
|
||||
};
|
||||
}
|
||||
|
||||
function applyAnnotationStage(tokens: MergedToken[], deps: TokenizerServiceDeps): MergedToken[] {
|
||||
const options = getAnnotationOptions(deps);
|
||||
|
||||
return annotateTokens(
|
||||
tokens,
|
||||
{
|
||||
isKnownWord: deps.isKnownWord,
|
||||
knownWordMatchMode: deps.getKnownWordMatchMode(),
|
||||
getJlptLevel: deps.getJlptLevel,
|
||||
getFrequencyRank: deps.getFrequencyRank,
|
||||
},
|
||||
options,
|
||||
);
|
||||
}
|
||||
|
||||
async function parseWithYomitanInternalParser(
|
||||
text: string,
|
||||
deps: TokenizerServiceDeps,
|
||||
options: TokenizerAnnotationOptions,
|
||||
): Promise<MergedToken[] | null> {
|
||||
const parseResults = await requestYomitanParseResults(text, deps, logger);
|
||||
if (!parseResults) {
|
||||
@@ -166,7 +235,7 @@ async function parseWithYomitanInternalParser(
|
||||
|
||||
const selectedTokens = selectYomitanParseTokens(
|
||||
parseResults,
|
||||
deps.isKnownWord,
|
||||
getKnownWordLookup(deps, options),
|
||||
deps.getKnownWordMatchMode(),
|
||||
);
|
||||
if (!selectedTokens || selectedTokens.length === 0) {
|
||||
@@ -177,9 +246,14 @@ async function parseWithYomitanInternalParser(
|
||||
logSelectedYomitanGroups(text, selectedTokens);
|
||||
}
|
||||
|
||||
if (!needsMecabPosEnrichment(options)) {
|
||||
return selectedTokens;
|
||||
}
|
||||
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
return enrichTokensWithMecabPos1(selectedTokens, mecabTokens);
|
||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logger.warn(
|
||||
@@ -207,12 +281,13 @@ export async function tokenizeSubtitle(
|
||||
}
|
||||
|
||||
const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
const annotationOptions = getAnnotationOptions(deps);
|
||||
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
return {
|
||||
text: displayText,
|
||||
tokens: applyAnnotationStage(yomitanTokens, deps),
|
||||
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ export interface AnnotationStageDeps {
|
||||
}
|
||||
|
||||
export interface AnnotationStageOptions {
|
||||
nPlusOneEnabled?: boolean;
|
||||
jlptEnabled?: boolean;
|
||||
frequencyEnabled?: boolean;
|
||||
minSentenceWordsForNPlusOne?: number;
|
||||
@@ -340,11 +341,14 @@ export function annotateTokens(
|
||||
deps: AnnotationStageDeps,
|
||||
options: AnnotationStageOptions = {},
|
||||
): MergedToken[] {
|
||||
const knownMarkedTokens = applyKnownWordMarking(
|
||||
tokens,
|
||||
deps.isKnownWord,
|
||||
deps.knownWordMatchMode,
|
||||
);
|
||||
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
|
||||
const knownMarkedTokens = nPlusOneEnabled
|
||||
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: tokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
|
||||
const frequencyEnabled = options.frequencyEnabled !== false;
|
||||
const frequencyMarkedTokens =
|
||||
@@ -363,6 +367,14 @@ export function annotateTokens(
|
||||
jlptLevel: undefined,
|
||||
}));
|
||||
|
||||
if (!nPlusOneEnabled) {
|
||||
return jlptMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
}
|
||||
|
||||
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
|
||||
const sanitizedMinSentenceWordsForNPlusOne =
|
||||
minSentenceWordsForNPlusOne !== undefined &&
|
||||
|
||||
147
src/core/services/tokenizer/parser-enrichment-worker-runtime.ts
Normal file
147
src/core/services/tokenizer/parser-enrichment-worker-runtime.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import type { MergedToken } from '../../../types';
|
||||
import { createLogger } from '../../../logger';
|
||||
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
|
||||
|
||||
const logger = createLogger('main:tokenizer');
|
||||
const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER';
|
||||
|
||||
interface WorkerRequest {
|
||||
id: number;
|
||||
tokens: MergedToken[];
|
||||
mecabTokens: MergedToken[] | null;
|
||||
}
|
||||
|
||||
interface WorkerResponse {
|
||||
id?: unknown;
|
||||
result?: unknown;
|
||||
error?: unknown;
|
||||
}
|
||||
|
||||
type PendingRequest = {
|
||||
resolve: (value: MergedToken[]) => void;
|
||||
reject: (reason?: unknown) => void;
|
||||
};
|
||||
|
||||
class ParserEnrichmentWorkerRuntime {
|
||||
private worker: import('node:worker_threads').Worker | null = null;
|
||||
private nextRequestId = 1;
|
||||
private pending = new Map<number, PendingRequest>();
|
||||
private initAttempted = false;
|
||||
|
||||
async enrichTokens(
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
const worker = await this.getWorker();
|
||||
if (!worker) {
|
||||
return enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
}
|
||||
|
||||
return new Promise<MergedToken[]>((resolve, reject) => {
|
||||
const id = this.nextRequestId++;
|
||||
this.pending.set(id, { resolve, reject });
|
||||
const request: WorkerRequest = { id, tokens, mecabTokens };
|
||||
worker.postMessage(request);
|
||||
});
|
||||
}
|
||||
|
||||
private async getWorker(): Promise<import('node:worker_threads').Worker | null> {
|
||||
if (process.env[DISABLE_WORKER_ENV] === '1') {
|
||||
return null;
|
||||
}
|
||||
if (this.worker) {
|
||||
return this.worker;
|
||||
}
|
||||
if (this.initAttempted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
this.initAttempted = true;
|
||||
|
||||
let workerThreads: typeof import('node:worker_threads');
|
||||
try {
|
||||
workerThreads = await import('node:worker_threads');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
let workerPath = '';
|
||||
try {
|
||||
workerPath = require.resolve('./parser-enrichment-worker-thread.js');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const worker = new workerThreads.Worker(workerPath);
|
||||
worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message));
|
||||
worker.on('error', (error: Error) => this.handleWorkerFailure(error));
|
||||
worker.on('exit', (code: number) => {
|
||||
if (code !== 0) {
|
||||
this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`));
|
||||
} else {
|
||||
this.worker = null;
|
||||
}
|
||||
});
|
||||
this.worker = worker;
|
||||
return worker;
|
||||
} catch (error) {
|
||||
logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private handleWorkerMessage(message: WorkerResponse): void {
|
||||
if (typeof message.id !== 'number') {
|
||||
return;
|
||||
}
|
||||
|
||||
const request = this.pending.get(message.id);
|
||||
if (!request) {
|
||||
return;
|
||||
}
|
||||
this.pending.delete(message.id);
|
||||
|
||||
if (typeof message.error === 'string' && message.error.length > 0) {
|
||||
request.reject(new Error(message.error));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(message.result)) {
|
||||
request.reject(new Error('Parser enrichment worker returned invalid payload'));
|
||||
return;
|
||||
}
|
||||
|
||||
request.resolve(message.result as MergedToken[]);
|
||||
}
|
||||
|
||||
private handleWorkerFailure(error: Error): void {
|
||||
logger.debug(`Parser enrichment worker unavailable, falling back to main thread: ${error.message}`);
|
||||
for (const pending of this.pending.values()) {
|
||||
pending.reject(error);
|
||||
}
|
||||
this.pending.clear();
|
||||
|
||||
if (this.worker) {
|
||||
this.worker.removeAllListeners();
|
||||
this.worker = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let runtime: ParserEnrichmentWorkerRuntime | null = null;
|
||||
|
||||
export async function enrichTokensWithMecabPos1Async(
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!runtime) {
|
||||
runtime = new ParserEnrichmentWorkerRuntime();
|
||||
}
|
||||
|
||||
try {
|
||||
return await runtime.enrichTokens(tokens, mecabTokens);
|
||||
} catch {
|
||||
return enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { parentPort } from 'node:worker_threads';
|
||||
import type { MergedToken } from '../../../types';
|
||||
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
|
||||
|
||||
interface WorkerRequest {
|
||||
id: number;
|
||||
tokens: MergedToken[];
|
||||
mecabTokens: MergedToken[] | null;
|
||||
}
|
||||
|
||||
if (!parentPort) {
|
||||
throw new Error('parser-enrichment worker missing parent port');
|
||||
}
|
||||
|
||||
const port = parentPort;
|
||||
|
||||
port.on('message', (message: WorkerRequest) => {
|
||||
try {
|
||||
const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens);
|
||||
port.postMessage({ id: message.id, result });
|
||||
} catch (error) {
|
||||
const messageText = error instanceof Error ? error.message : String(error);
|
||||
port.postMessage({ id: message.id, error: messageText });
|
||||
}
|
||||
});
|
||||
83
src/core/services/tokenizer/yomitan-parser-runtime.test.ts
Normal file
83
src/core/services/tokenizer/yomitan-parser-runtime.test.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime';
|
||||
|
||||
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
|
||||
const parserWindow = {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => await executeJavaScript(script),
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
getYomitanExt: () => ({ id: 'ext-id' }) as never,
|
||||
getYomitanParserWindow: () => parserWindow as never,
|
||||
setYomitanParserWindow: () => undefined,
|
||||
getYomitanParserReadyPromise: () => null,
|
||||
setYomitanParserReadyPromise: () => undefined,
|
||||
getYomitanParserInitPromise: () => null,
|
||||
setYomitanParserInitPromise: () => undefined,
|
||||
};
|
||||
}
|
||||
|
||||
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
scriptValue = script;
|
||||
return { updated: true };
|
||||
});
|
||||
|
||||
const infoLogs: string[] = [];
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: () => undefined,
|
||||
info: (message) => infoLogs.push(message),
|
||||
});
|
||||
|
||||
assert.equal(updated, true);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
assert.match(scriptValue, /setAllSettings/);
|
||||
assert.equal(infoLogs.length, 1);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => {
|
||||
const deps = createDeps(async () => ({ updated: false }));
|
||||
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: () => undefined,
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
|
||||
const deps = createDeps(async () => {
|
||||
throw new Error('execute failed');
|
||||
});
|
||||
|
||||
const errorLogs: string[] = [];
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: (message) => errorLogs.push(message),
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
assert.equal(errorLogs.length, 1);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
|
||||
let executeCount = 0;
|
||||
const deps = createDeps(async () => {
|
||||
executeCount += 1;
|
||||
return { updated: true };
|
||||
});
|
||||
|
||||
const updated = await syncYomitanDefaultAnkiServer(' ', deps, {
|
||||
error: () => undefined,
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
assert.equal(executeCount, 0);
|
||||
});
|
||||
@@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron';
|
||||
|
||||
interface LoggerLike {
|
||||
error: (message: string, ...args: unknown[]) => void;
|
||||
info?: (message: string, ...args: unknown[]) => void;
|
||||
}
|
||||
|
||||
interface YomitanParserRuntimeDeps {
|
||||
@@ -152,3 +153,90 @@ export async function requestYomitanParseResults(
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function syncYomitanDefaultAnkiServer(
|
||||
serverUrl: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedTargetServer = serverUrl.trim();
|
||||
if (!normalizedTargetServer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const targetServer = ${JSON.stringify(normalizedTargetServer)};
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
|
||||
if (profiles.length === 0) {
|
||||
return { updated: false, reason: "no-profiles" };
|
||||
}
|
||||
|
||||
const defaultProfile = profiles[0];
|
||||
if (!defaultProfile || typeof defaultProfile !== "object") {
|
||||
return { updated: false, reason: "invalid-default-profile" };
|
||||
}
|
||||
|
||||
defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object"
|
||||
? defaultProfile.options
|
||||
: {};
|
||||
defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object"
|
||||
? defaultProfile.options.anki
|
||||
: {};
|
||||
|
||||
const currentServerRaw = defaultProfile.options.anki.server;
|
||||
const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
|
||||
const canReplaceDefault =
|
||||
currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
|
||||
if (!canReplaceDefault || currentServer === targetServer) {
|
||||
return { updated: false, reason: "no-change", currentServer, targetServer };
|
||||
}
|
||||
|
||||
defaultProfile.options.anki.server = targetServer;
|
||||
await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
|
||||
return { updated: true, currentServer, targetServer };
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const updated =
|
||||
typeof result === 'object' &&
|
||||
result !== null &&
|
||||
(result as { updated?: unknown }).updated === true;
|
||||
if (updated) {
|
||||
logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} catch (err) {
|
||||
logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user