mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-01 18:22:41 -08:00
perf(subtitles): parallelize annotation stages and cache tokenizer lookups
This commit is contained in:
@@ -64,6 +64,32 @@ test('subtitle processing skips duplicate subtitle emission', async () => {
|
|||||||
assert.equal(tokenizeCalls, 1);
|
assert.equal(tokenizeCalls, 1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('subtitle processing reuses cached tokenization for repeated subtitle text', async () => {
|
||||||
|
const emitted: SubtitleData[] = [];
|
||||||
|
let tokenizeCalls = 0;
|
||||||
|
const controller = createSubtitleProcessingController({
|
||||||
|
tokenizeSubtitle: async (text) => {
|
||||||
|
tokenizeCalls += 1;
|
||||||
|
return { text, tokens: [] };
|
||||||
|
},
|
||||||
|
emitSubtitle: (payload) => emitted.push(payload),
|
||||||
|
});
|
||||||
|
|
||||||
|
controller.onSubtitleChange('first');
|
||||||
|
await flushMicrotasks();
|
||||||
|
controller.onSubtitleChange('second');
|
||||||
|
await flushMicrotasks();
|
||||||
|
controller.onSubtitleChange('first');
|
||||||
|
await flushMicrotasks();
|
||||||
|
|
||||||
|
assert.equal(tokenizeCalls, 2);
|
||||||
|
assert.deepEqual(emitted, [
|
||||||
|
{ text: 'first', tokens: [] },
|
||||||
|
{ text: 'second', tokens: [] },
|
||||||
|
{ text: 'first', tokens: [] },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
|
test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
|
||||||
const emitted: SubtitleData[] = [];
|
const emitted: SubtitleData[] = [];
|
||||||
const controller = createSubtitleProcessingController({
|
const controller = createSubtitleProcessingController({
|
||||||
|
|||||||
@@ -15,13 +15,36 @@ export interface SubtitleProcessingController {
|
|||||||
export function createSubtitleProcessingController(
|
export function createSubtitleProcessingController(
|
||||||
deps: SubtitleProcessingControllerDeps,
|
deps: SubtitleProcessingControllerDeps,
|
||||||
): SubtitleProcessingController {
|
): SubtitleProcessingController {
|
||||||
|
const SUBTITLE_TOKENIZATION_CACHE_LIMIT = 256;
|
||||||
let latestText = '';
|
let latestText = '';
|
||||||
let lastEmittedText = '';
|
let lastEmittedText = '';
|
||||||
let processing = false;
|
let processing = false;
|
||||||
let staleDropCount = 0;
|
let staleDropCount = 0;
|
||||||
let refreshRequested = false;
|
let refreshRequested = false;
|
||||||
|
const tokenizationCache = new Map<string, SubtitleData>();
|
||||||
const now = deps.now ?? (() => Date.now());
|
const now = deps.now ?? (() => Date.now());
|
||||||
|
|
||||||
|
const getCachedTokenization = (text: string): SubtitleData | null => {
|
||||||
|
const cached = tokenizationCache.get(text);
|
||||||
|
if (!cached) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizationCache.delete(text);
|
||||||
|
tokenizationCache.set(text, cached);
|
||||||
|
return cached;
|
||||||
|
};
|
||||||
|
|
||||||
|
const setCachedTokenization = (text: string, payload: SubtitleData): void => {
|
||||||
|
tokenizationCache.set(text, payload);
|
||||||
|
while (tokenizationCache.size > SUBTITLE_TOKENIZATION_CACHE_LIMIT) {
|
||||||
|
const firstKey = tokenizationCache.keys().next().value;
|
||||||
|
if (firstKey !== undefined) {
|
||||||
|
tokenizationCache.delete(firstKey);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const processLatest = (): void => {
|
const processLatest = (): void => {
|
||||||
if (processing) {
|
if (processing) {
|
||||||
return;
|
return;
|
||||||
@@ -44,9 +67,15 @@ export function createSubtitleProcessingController(
|
|||||||
|
|
||||||
let output: SubtitleData = { text, tokens: null };
|
let output: SubtitleData = { text, tokens: null };
|
||||||
try {
|
try {
|
||||||
const tokenized = await deps.tokenizeSubtitle(text);
|
const cachedTokenized = forceRefresh ? null : getCachedTokenization(text);
|
||||||
if (tokenized) {
|
if (cachedTokenized) {
|
||||||
output = tokenized;
|
output = cachedTokenized;
|
||||||
|
} else {
|
||||||
|
const tokenized = await deps.tokenizeSubtitle(text);
|
||||||
|
if (tokenized) {
|
||||||
|
output = tokenized;
|
||||||
|
}
|
||||||
|
setCachedTokenization(text, output);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
|
deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
|
||||||
|
|||||||
@@ -55,6 +55,19 @@ function makeDepsFromYomitanTokens(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createDeferred<T>() {
|
||||||
|
let resolve: ((value: T) => void) | null = null;
|
||||||
|
const promise = new Promise<T>((innerResolve) => {
|
||||||
|
resolve = innerResolve;
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
promise,
|
||||||
|
resolve: (value: T) => {
|
||||||
|
resolve?.(value);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () => {
|
test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () => {
|
||||||
const result = await tokenizeSubtitle(
|
const result = await tokenizeSubtitle(
|
||||||
'猫です',
|
'猫です',
|
||||||
@@ -218,6 +231,72 @@ test('tokenizeSubtitle loads frequency ranks from Yomitan installed dictionaries
|
|||||||
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('tokenizeSubtitle starts Yomitan frequency lookup and MeCab enrichment in parallel', async () => {
|
||||||
|
const frequencyDeferred = createDeferred<unknown[]>();
|
||||||
|
const mecabDeferred = createDeferred<null>();
|
||||||
|
let frequencyRequested = false;
|
||||||
|
let mecabRequested = false;
|
||||||
|
|
||||||
|
const pendingResult = tokenizeSubtitle(
|
||||||
|
'猫',
|
||||||
|
makeDeps({
|
||||||
|
getFrequencyDictionaryEnabled: () => true,
|
||||||
|
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
|
||||||
|
getYomitanParserWindow: () =>
|
||||||
|
({
|
||||||
|
isDestroyed: () => false,
|
||||||
|
webContents: {
|
||||||
|
executeJavaScript: async (script: string) => {
|
||||||
|
if (script.includes('getTermFrequencies')) {
|
||||||
|
frequencyRequested = true;
|
||||||
|
return await frequencyDeferred.promise;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
source: 'scanning-parser',
|
||||||
|
index: 0,
|
||||||
|
content: [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
text: '猫',
|
||||||
|
reading: 'ねこ',
|
||||||
|
headwords: [[{ term: '猫' }]],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}) as unknown as Electron.BrowserWindow,
|
||||||
|
tokenizeWithMecab: async () => {
|
||||||
|
mecabRequested = true;
|
||||||
|
return await mecabDeferred.promise;
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
||||||
|
assert.equal(frequencyRequested, true);
|
||||||
|
assert.equal(mecabRequested, true);
|
||||||
|
|
||||||
|
frequencyDeferred.resolve([
|
||||||
|
{
|
||||||
|
term: '猫',
|
||||||
|
reading: 'ねこ',
|
||||||
|
dictionary: 'freq-dict',
|
||||||
|
frequency: 77,
|
||||||
|
displayValue: '77',
|
||||||
|
displayValueParsed: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
mecabDeferred.resolve(null);
|
||||||
|
|
||||||
|
const result = await pendingResult;
|
||||||
|
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||||
|
});
|
||||||
|
|
||||||
test('tokenizeSubtitle queries headword frequencies without forcing surface reading', async () => {
|
test('tokenizeSubtitle queries headword frequencies without forcing surface reading', async () => {
|
||||||
const result = await tokenizeSubtitle(
|
const result = await tokenizeSubtitle(
|
||||||
'鍛えた',
|
'鍛えた',
|
||||||
|
|||||||
@@ -433,33 +433,41 @@ async function parseWithYomitanInternalParser(
|
|||||||
logSelectedYomitanGroups(text, selectedTokens);
|
logSelectedYomitanGroups(text, selectedTokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
let yomitanRankByTerm = new Map<string, number>();
|
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
|
||||||
if (options.frequencyEnabled) {
|
? (async () => {
|
||||||
const frequencyMatchMode = options.frequencyMatchMode;
|
const frequencyMatchMode = options.frequencyMatchMode;
|
||||||
const termReadingList = buildYomitanFrequencyTermReadingList(
|
const termReadingList = buildYomitanFrequencyTermReadingList(
|
||||||
selectedTokens,
|
selectedTokens,
|
||||||
frequencyMatchMode,
|
frequencyMatchMode,
|
||||||
);
|
);
|
||||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||||
yomitanRankByTerm = buildYomitanFrequencyRankMap(yomitanFrequencies);
|
return buildYomitanFrequencyRankMap(yomitanFrequencies);
|
||||||
}
|
})()
|
||||||
|
: Promise.resolve(new Map<string, number>());
|
||||||
|
|
||||||
let enrichedTokens = selectedTokens;
|
const mecabEnrichmentPromise: Promise<MergedToken[]> = needsMecabPosEnrichment(options)
|
||||||
if (needsMecabPosEnrichment(options)) {
|
? (async () => {
|
||||||
try {
|
try {
|
||||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||||
enrichedTokens = await enrichTokensWithMecab(enrichedTokens, mecabTokens);
|
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const error = err as Error;
|
const error = err as Error;
|
||||||
logger.warn(
|
logger.warn(
|
||||||
'Failed to enrich Yomitan tokens with MeCab POS:',
|
'Failed to enrich Yomitan tokens with MeCab POS:',
|
||||||
error.message,
|
error.message,
|
||||||
`tokenCount=${selectedTokens.length}`,
|
`tokenCount=${selectedTokens.length}`,
|
||||||
`textLength=${text.length}`,
|
`textLength=${text.length}`,
|
||||||
);
|
);
|
||||||
}
|
return selectedTokens;
|
||||||
}
|
}
|
||||||
|
})()
|
||||||
|
: Promise.resolve(selectedTokens);
|
||||||
|
|
||||||
|
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
|
||||||
|
frequencyRankPromise,
|
||||||
|
mecabEnrichmentPromise,
|
||||||
|
]);
|
||||||
|
|
||||||
if (options.frequencyEnabled) {
|
if (options.frequencyEnabled) {
|
||||||
return applyFrequencyRanks(
|
return applyFrequencyRanks(
|
||||||
|
|||||||
@@ -129,3 +129,98 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
|
|||||||
assert.match(scriptValue, /getTermFrequencies/);
|
assert.match(scriptValue, /getTermFrequencies/);
|
||||||
assert.match(scriptValue, /optionsGetFull/);
|
assert.match(scriptValue, /optionsGetFull/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('requestYomitanTermFrequencies caches profile metadata between calls', async () => {
|
||||||
|
const scripts: string[] = [];
|
||||||
|
const deps = createDeps(async (script) => {
|
||||||
|
scripts.push(script);
|
||||||
|
if (script.includes('optionsGetFull')) {
|
||||||
|
return {
|
||||||
|
profileCurrent: 0,
|
||||||
|
profiles: [
|
||||||
|
{
|
||||||
|
options: {
|
||||||
|
scanning: { length: 40 },
|
||||||
|
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (script.includes('"term":"犬"')) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
term: '犬',
|
||||||
|
reading: 'いぬ',
|
||||||
|
dictionary: 'freq-dict',
|
||||||
|
frequency: 12,
|
||||||
|
displayValue: '12',
|
||||||
|
displayValueParsed: true,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
term: '猫',
|
||||||
|
reading: 'ねこ',
|
||||||
|
dictionary: 'freq-dict',
|
||||||
|
frequency: 77,
|
||||||
|
displayValue: '77',
|
||||||
|
displayValueParsed: true,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||||
|
error: () => undefined,
|
||||||
|
});
|
||||||
|
await requestYomitanTermFrequencies([{ term: '犬', reading: 'いぬ' }], deps, {
|
||||||
|
error: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
const optionsCalls = scripts.filter((script) => script.includes('optionsGetFull')).length;
|
||||||
|
assert.equal(optionsCalls, 1);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('requestYomitanTermFrequencies caches repeated term+reading lookups', async () => {
|
||||||
|
const scripts: string[] = [];
|
||||||
|
const deps = createDeps(async (script) => {
|
||||||
|
scripts.push(script);
|
||||||
|
if (script.includes('optionsGetFull')) {
|
||||||
|
return {
|
||||||
|
profileCurrent: 0,
|
||||||
|
profiles: [
|
||||||
|
{
|
||||||
|
options: {
|
||||||
|
scanning: { length: 40 },
|
||||||
|
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
term: '猫',
|
||||||
|
reading: 'ねこ',
|
||||||
|
dictionary: 'freq-dict',
|
||||||
|
frequency: 77,
|
||||||
|
displayValue: '77',
|
||||||
|
displayValueParsed: true,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||||
|
error: () => undefined,
|
||||||
|
});
|
||||||
|
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||||
|
error: () => undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
const frequencyCalls = scripts.filter((script) => script.includes('getTermFrequencies')).length;
|
||||||
|
assert.equal(frequencyCalls, 1);
|
||||||
|
});
|
||||||
|
|||||||
@@ -30,10 +30,39 @@ export interface YomitanTermReadingPair {
|
|||||||
reading: string | null;
|
reading: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface YomitanProfileMetadata {
|
||||||
|
profileIndex: number;
|
||||||
|
scanLength: number;
|
||||||
|
dictionaries: string[];
|
||||||
|
dictionaryPriorityByName: Record<string, number>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
|
||||||
|
const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
|
||||||
|
const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
|
||||||
|
|
||||||
function isObject(value: unknown): value is Record<string, unknown> {
|
function isObject(value: unknown): value is Record<string, unknown> {
|
||||||
return Boolean(value && typeof value === 'object');
|
return Boolean(value && typeof value === 'object');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function makeTermReadingCacheKey(term: string, reading: string | null): string {
|
||||||
|
return `${term}\u0000${reading ?? ''}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWindowFrequencyCache(window: BrowserWindow): Map<string, YomitanTermFrequency[]> {
|
||||||
|
let cache = yomitanFrequencyCacheByWindow.get(window);
|
||||||
|
if (!cache) {
|
||||||
|
cache = new Map<string, YomitanTermFrequency[]>();
|
||||||
|
yomitanFrequencyCacheByWindow.set(window, cache);
|
||||||
|
}
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearWindowCaches(window: BrowserWindow): void {
|
||||||
|
yomitanProfileMetadataByWindow.delete(window);
|
||||||
|
yomitanFrequencyCacheByWindow.delete(window);
|
||||||
|
}
|
||||||
|
|
||||||
function asPositiveInteger(value: unknown): number | null {
|
function asPositiveInteger(value: unknown): number | null {
|
||||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
||||||
return null;
|
return null;
|
||||||
@@ -135,6 +164,224 @@ function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): Yo
|
|||||||
return normalized;
|
return normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null {
|
||||||
|
if (!isObject(value)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const profileIndexRaw = value.profileIndex ?? value.profileCurrent;
|
||||||
|
const profileIndex =
|
||||||
|
typeof profileIndexRaw === 'number' && Number.isFinite(profileIndexRaw)
|
||||||
|
? Math.max(0, Math.floor(profileIndexRaw))
|
||||||
|
: 0;
|
||||||
|
const scanLengthRaw =
|
||||||
|
value.scanLength ??
|
||||||
|
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||||
|
? (value.profiles[profileIndex] as { options?: { scanning?: { length?: unknown } } }).options
|
||||||
|
?.scanning?.length
|
||||||
|
: undefined);
|
||||||
|
const scanLength =
|
||||||
|
typeof scanLengthRaw === 'number' && Number.isFinite(scanLengthRaw)
|
||||||
|
? Math.max(1, Math.floor(scanLengthRaw))
|
||||||
|
: DEFAULT_YOMITAN_SCAN_LENGTH;
|
||||||
|
const dictionariesRaw =
|
||||||
|
value.dictionaries ??
|
||||||
|
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||||
|
? (value.profiles[profileIndex] as { options?: { dictionaries?: unknown[] } }).options
|
||||||
|
?.dictionaries
|
||||||
|
: undefined);
|
||||||
|
const dictionaries = Array.isArray(dictionariesRaw)
|
||||||
|
? dictionariesRaw
|
||||||
|
.map((entry, index) => {
|
||||||
|
if (typeof entry === 'string') {
|
||||||
|
return { name: entry.trim(), priority: index };
|
||||||
|
}
|
||||||
|
if (!isObject(entry) || entry.enabled === false || typeof entry.name !== 'string') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const normalizedName = entry.name.trim();
|
||||||
|
if (!normalizedName) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const priorityRaw = (entry as { id?: unknown }).id;
|
||||||
|
const priority =
|
||||||
|
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
|
||||||
|
? Math.max(0, Math.floor(priorityRaw))
|
||||||
|
: index;
|
||||||
|
return { name: normalizedName, priority };
|
||||||
|
})
|
||||||
|
.filter((entry): entry is { name: string; priority: number } => entry !== null)
|
||||||
|
.sort((a, b) => a.priority - b.priority)
|
||||||
|
.map((entry) => entry.name)
|
||||||
|
.filter((entry) => entry.length > 0)
|
||||||
|
: [];
|
||||||
|
const dictionaryPriorityByNameRaw = value.dictionaryPriorityByName;
|
||||||
|
const dictionaryPriorityByName: Record<string, number> = {};
|
||||||
|
if (isObject(dictionaryPriorityByNameRaw)) {
|
||||||
|
for (const [name, priorityRaw] of Object.entries(dictionaryPriorityByNameRaw)) {
|
||||||
|
if (typeof priorityRaw !== 'number' || !Number.isFinite(priorityRaw)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const normalizedName = name.trim();
|
||||||
|
if (!normalizedName) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
dictionaryPriorityByName[normalizedName] = Math.max(0, Math.floor(priorityRaw));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let index = 0; index < dictionaries.length; index += 1) {
|
||||||
|
const dictionary = dictionaries[index];
|
||||||
|
if (!dictionary) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (dictionaryPriorityByName[dictionary] === undefined) {
|
||||||
|
dictionaryPriorityByName[dictionary] = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
profileIndex,
|
||||||
|
scanLength,
|
||||||
|
dictionaries,
|
||||||
|
dictionaryPriorityByName,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeFrequencyEntriesWithPriority(
|
||||||
|
rawResult: unknown[],
|
||||||
|
dictionaryPriorityByName: Record<string, number>,
|
||||||
|
): YomitanTermFrequency[] {
|
||||||
|
const normalized: YomitanTermFrequency[] = [];
|
||||||
|
for (const entry of rawResult) {
|
||||||
|
const frequency = toYomitanTermFrequency(entry);
|
||||||
|
if (!frequency) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
|
||||||
|
normalized.push({
|
||||||
|
...frequency,
|
||||||
|
dictionaryPriority:
|
||||||
|
dictionaryPriority !== undefined ? dictionaryPriority : frequency.dictionaryPriority,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
function groupFrequencyEntriesByPair(
|
||||||
|
entries: YomitanTermFrequency[],
|
||||||
|
): Map<string, YomitanTermFrequency[]> {
|
||||||
|
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||||
|
for (const entry of entries) {
|
||||||
|
const reading =
|
||||||
|
typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
|
||||||
|
const key = makeTermReadingCacheKey(entry.term.trim(), reading);
|
||||||
|
const existing = grouped.get(key);
|
||||||
|
if (existing) {
|
||||||
|
existing.push(entry);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
grouped.set(key, [entry]);
|
||||||
|
}
|
||||||
|
return grouped;
|
||||||
|
}
|
||||||
|
|
||||||
|
function groupFrequencyEntriesByTerm(
|
||||||
|
entries: YomitanTermFrequency[],
|
||||||
|
): Map<string, YomitanTermFrequency[]> {
|
||||||
|
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||||
|
for (const entry of entries) {
|
||||||
|
const term = entry.term.trim();
|
||||||
|
if (!term) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const existing = grouped.get(term);
|
||||||
|
if (existing) {
|
||||||
|
existing.push(entry);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
grouped.set(term, [entry]);
|
||||||
|
}
|
||||||
|
return grouped;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function requestYomitanProfileMetadata(
|
||||||
|
parserWindow: BrowserWindow,
|
||||||
|
logger: LoggerLike,
|
||||||
|
): Promise<YomitanProfileMetadata | null> {
|
||||||
|
const cached = yomitanProfileMetadataByWindow.get(parserWindow);
|
||||||
|
if (cached) {
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
const script = `
|
||||||
|
(async () => {
|
||||||
|
const invoke = (action, params) =>
|
||||||
|
new Promise((resolve, reject) => {
|
||||||
|
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||||
|
if (chrome.runtime.lastError) {
|
||||||
|
reject(new Error(chrome.runtime.lastError.message));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!response || typeof response !== "object") {
|
||||||
|
reject(new Error("Invalid response from Yomitan backend"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (response.error) {
|
||||||
|
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve(response.result);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||||
|
const profileIndex =
|
||||||
|
typeof optionsFull.profileCurrent === "number" && Number.isFinite(optionsFull.profileCurrent)
|
||||||
|
? Math.max(0, Math.floor(optionsFull.profileCurrent))
|
||||||
|
: 0;
|
||||||
|
const scanLengthRaw = optionsFull.profiles?.[profileIndex]?.options?.scanning?.length;
|
||||||
|
const scanLength =
|
||||||
|
typeof scanLengthRaw === "number" && Number.isFinite(scanLengthRaw)
|
||||||
|
? Math.max(1, Math.floor(scanLengthRaw))
|
||||||
|
: ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||||
|
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
|
||||||
|
const dictionaryEntries = Array.isArray(dictionariesRaw)
|
||||||
|
? dictionariesRaw
|
||||||
|
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
|
||||||
|
.map((entry, index) => ({
|
||||||
|
name: entry.name,
|
||||||
|
id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.max(0, Math.floor(entry.id)) : index
|
||||||
|
}))
|
||||||
|
.sort((a, b) => a.id - b.id)
|
||||||
|
: [];
|
||||||
|
const dictionaries = dictionaryEntries.map((entry) => entry.name);
|
||||||
|
const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
|
||||||
|
acc[entry.name] = index;
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
|
||||||
|
})();
|
||||||
|
`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const rawMetadata = await parserWindow.webContents.executeJavaScript(script, true);
|
||||||
|
const metadata = toYomitanProfileMetadata(rawMetadata);
|
||||||
|
if (!metadata) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
yomitanProfileMetadataByWindow.set(parserWindow, metadata);
|
||||||
|
return metadata;
|
||||||
|
} catch (err) {
|
||||||
|
logger.error('Yomitan parser metadata request failed:', (err as Error).message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function ensureYomitanParserWindow(
|
async function ensureYomitanParserWindow(
|
||||||
deps: YomitanParserRuntimeDeps,
|
deps: YomitanParserRuntimeDeps,
|
||||||
logger: LoggerLike,
|
logger: LoggerLike,
|
||||||
@@ -179,6 +426,7 @@ async function ensureYomitanParserWindow(
|
|||||||
);
|
);
|
||||||
|
|
||||||
parserWindow.on('closed', () => {
|
parserWindow.on('closed', () => {
|
||||||
|
clearWindowCaches(parserWindow);
|
||||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||||
deps.setYomitanParserWindow(null);
|
deps.setYomitanParserWindow(null);
|
||||||
deps.setYomitanParserReadyPromise(null);
|
deps.setYomitanParserReadyPromise(null);
|
||||||
@@ -198,6 +446,7 @@ async function ensureYomitanParserWindow(
|
|||||||
if (!parserWindow.isDestroyed()) {
|
if (!parserWindow.isDestroyed()) {
|
||||||
parserWindow.destroy();
|
parserWindow.destroy();
|
||||||
}
|
}
|
||||||
|
clearWindowCaches(parserWindow);
|
||||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||||
deps.setYomitanParserWindow(null);
|
deps.setYomitanParserWindow(null);
|
||||||
deps.setYomitanParserReadyPromise(null);
|
deps.setYomitanParserReadyPromise(null);
|
||||||
@@ -229,7 +478,40 @@ export async function requestYomitanParseResults(
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const script = `
|
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||||
|
const script =
|
||||||
|
metadata !== null
|
||||||
|
? `
|
||||||
|
(async () => {
|
||||||
|
const invoke = (action, params) =>
|
||||||
|
new Promise((resolve, reject) => {
|
||||||
|
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||||
|
if (chrome.runtime.lastError) {
|
||||||
|
reject(new Error(chrome.runtime.lastError.message));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!response || typeof response !== "object") {
|
||||||
|
reject(new Error("Invalid response from Yomitan backend"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (response.error) {
|
||||||
|
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve(response.result);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return await invoke("parseText", {
|
||||||
|
text: ${JSON.stringify(text)},
|
||||||
|
optionsContext: { index: ${metadata.profileIndex} },
|
||||||
|
scanLength: ${metadata.scanLength},
|
||||||
|
useInternalParser: true,
|
||||||
|
useMecabParser: true
|
||||||
|
});
|
||||||
|
})();
|
||||||
|
`
|
||||||
|
: `
|
||||||
(async () => {
|
(async () => {
|
||||||
const invoke = (action, params) =>
|
const invoke = (action, params) =>
|
||||||
new Promise((resolve, reject) => {
|
new Promise((resolve, reject) => {
|
||||||
@@ -253,7 +535,7 @@ export async function requestYomitanParseResults(
|
|||||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||||
const profileIndex = optionsFull.profileCurrent;
|
const profileIndex = optionsFull.profileCurrent;
|
||||||
const scanLength =
|
const scanLength =
|
||||||
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? 40;
|
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||||
|
|
||||||
return await invoke("parseText", {
|
return await invoke("parseText", {
|
||||||
text: ${JSON.stringify(text)},
|
text: ${JSON.stringify(text)},
|
||||||
@@ -291,6 +573,88 @@ export async function requestYomitanTermFrequencies(
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||||
|
const frequencyCache = getWindowFrequencyCache(parserWindow);
|
||||||
|
const missingTermReadingList: YomitanTermReadingPair[] = [];
|
||||||
|
|
||||||
|
const buildCachedResult = (): YomitanTermFrequency[] => {
|
||||||
|
const result: YomitanTermFrequency[] = [];
|
||||||
|
for (const pair of normalizedTermReadingList) {
|
||||||
|
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||||
|
const cached = frequencyCache.get(key);
|
||||||
|
if (cached && cached.length > 0) {
|
||||||
|
result.push(...cached);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const pair of normalizedTermReadingList) {
|
||||||
|
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||||
|
if (!frequencyCache.has(key)) {
|
||||||
|
missingTermReadingList.push(pair);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (missingTermReadingList.length === 0) {
|
||||||
|
return buildCachedResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (metadata && metadata.dictionaries.length > 0) {
|
||||||
|
const script = `
|
||||||
|
(async () => {
|
||||||
|
const invoke = (action, params) =>
|
||||||
|
new Promise((resolve, reject) => {
|
||||||
|
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||||
|
if (chrome.runtime.lastError) {
|
||||||
|
reject(new Error(chrome.runtime.lastError.message));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!response || typeof response !== "object") {
|
||||||
|
reject(new Error("Invalid response from Yomitan backend"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (response.error) {
|
||||||
|
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resolve(response.result);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return await invoke("getTermFrequencies", {
|
||||||
|
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||||
|
dictionaries: ${JSON.stringify(metadata.dictionaries)}
|
||||||
|
});
|
||||||
|
})();
|
||||||
|
`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||||
|
const fetchedEntries = Array.isArray(rawResult)
|
||||||
|
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
|
||||||
|
: [];
|
||||||
|
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||||
|
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||||
|
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||||
|
|
||||||
|
for (const pair of missingTermReadingList) {
|
||||||
|
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||||
|
const exactEntries = groupedByPair.get(key);
|
||||||
|
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||||
|
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cachedResult = buildCachedResult();
|
||||||
|
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||||
|
return [...cachedResult, ...unmatchedEntries];
|
||||||
|
} catch (err) {
|
||||||
|
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return buildCachedResult();
|
||||||
|
}
|
||||||
|
|
||||||
const script = `
|
const script = `
|
||||||
(async () => {
|
(async () => {
|
||||||
const invoke = (action, params) =>
|
const invoke = (action, params) =>
|
||||||
@@ -335,7 +699,7 @@ export async function requestYomitanTermFrequencies(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const rawFrequencies = await invoke("getTermFrequencies", {
|
const rawFrequencies = await invoke("getTermFrequencies", {
|
||||||
termReadingList: ${JSON.stringify(normalizedTermReadingList)},
|
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||||
dictionaries
|
dictionaries
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -357,16 +721,26 @@ export async function requestYomitanTermFrequencies(
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||||
if (!Array.isArray(rawResult)) {
|
const fetchedEntries = Array.isArray(rawResult)
|
||||||
return [];
|
? rawResult
|
||||||
|
.map((entry) => toYomitanTermFrequency(entry))
|
||||||
|
.filter((entry): entry is YomitanTermFrequency => entry !== null)
|
||||||
|
: [];
|
||||||
|
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||||
|
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||||
|
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||||
|
for (const pair of missingTermReadingList) {
|
||||||
|
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||||
|
const exactEntries = groupedByPair.get(key);
|
||||||
|
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||||
|
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||||
}
|
}
|
||||||
|
const cachedResult = buildCachedResult();
|
||||||
return rawResult
|
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||||
.map((entry) => toYomitanTermFrequency(entry))
|
return [...cachedResult, ...unmatchedEntries];
|
||||||
.filter((entry): entry is YomitanTermFrequency => entry !== null);
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||||
return [];
|
return buildCachedResult();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user