mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-02 06:22:42 -08:00
perf(subtitles): parallelize annotation stages and cache tokenizer lookups
This commit is contained in:
@@ -64,6 +64,32 @@ test('subtitle processing skips duplicate subtitle emission', async () => {
|
||||
assert.equal(tokenizeCalls, 1);
|
||||
});
|
||||
|
||||
test('subtitle processing reuses cached tokenization for repeated subtitle text', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.onSubtitleChange('first');
|
||||
await flushMicrotasks();
|
||||
controller.onSubtitleChange('second');
|
||||
await flushMicrotasks();
|
||||
controller.onSubtitleChange('first');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.equal(tokenizeCalls, 2);
|
||||
assert.deepEqual(emitted, [
|
||||
{ text: 'first', tokens: [] },
|
||||
{ text: 'second', tokens: [] },
|
||||
{ text: 'first', tokens: [] },
|
||||
]);
|
||||
});
|
||||
|
||||
test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
const controller = createSubtitleProcessingController({
|
||||
|
||||
@@ -15,13 +15,36 @@ export interface SubtitleProcessingController {
|
||||
export function createSubtitleProcessingController(
|
||||
deps: SubtitleProcessingControllerDeps,
|
||||
): SubtitleProcessingController {
|
||||
const SUBTITLE_TOKENIZATION_CACHE_LIMIT = 256;
|
||||
let latestText = '';
|
||||
let lastEmittedText = '';
|
||||
let processing = false;
|
||||
let staleDropCount = 0;
|
||||
let refreshRequested = false;
|
||||
const tokenizationCache = new Map<string, SubtitleData>();
|
||||
const now = deps.now ?? (() => Date.now());
|
||||
|
||||
const getCachedTokenization = (text: string): SubtitleData | null => {
|
||||
const cached = tokenizationCache.get(text);
|
||||
if (!cached) {
|
||||
return null;
|
||||
}
|
||||
|
||||
tokenizationCache.delete(text);
|
||||
tokenizationCache.set(text, cached);
|
||||
return cached;
|
||||
};
|
||||
|
||||
const setCachedTokenization = (text: string, payload: SubtitleData): void => {
|
||||
tokenizationCache.set(text, payload);
|
||||
while (tokenizationCache.size > SUBTITLE_TOKENIZATION_CACHE_LIMIT) {
|
||||
const firstKey = tokenizationCache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
tokenizationCache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const processLatest = (): void => {
|
||||
if (processing) {
|
||||
return;
|
||||
@@ -44,9 +67,15 @@ export function createSubtitleProcessingController(
|
||||
|
||||
let output: SubtitleData = { text, tokens: null };
|
||||
try {
|
||||
const tokenized = await deps.tokenizeSubtitle(text);
|
||||
if (tokenized) {
|
||||
output = tokenized;
|
||||
const cachedTokenized = forceRefresh ? null : getCachedTokenization(text);
|
||||
if (cachedTokenized) {
|
||||
output = cachedTokenized;
|
||||
} else {
|
||||
const tokenized = await deps.tokenizeSubtitle(text);
|
||||
if (tokenized) {
|
||||
output = tokenized;
|
||||
}
|
||||
setCachedTokenization(text, output);
|
||||
}
|
||||
} catch (error) {
|
||||
deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
|
||||
|
||||
@@ -55,6 +55,19 @@ function makeDepsFromYomitanTokens(
|
||||
});
|
||||
}
|
||||
|
||||
function createDeferred<T>() {
|
||||
let resolve: ((value: T) => void) | null = null;
|
||||
const promise = new Promise<T>((innerResolve) => {
|
||||
resolve = innerResolve;
|
||||
});
|
||||
return {
|
||||
promise,
|
||||
resolve: (value: T) => {
|
||||
resolve?.(value);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫です',
|
||||
@@ -218,6 +231,72 @@ test('tokenizeSubtitle loads frequency ranks from Yomitan installed dictionaries
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle starts Yomitan frequency lookup and MeCab enrichment in parallel', async () => {
|
||||
const frequencyDeferred = createDeferred<unknown[]>();
|
||||
const mecabDeferred = createDeferred<null>();
|
||||
let frequencyRequested = false;
|
||||
let mecabRequested = false;
|
||||
|
||||
const pendingResult = tokenizeSubtitle(
|
||||
'猫',
|
||||
makeDeps({
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
|
||||
getYomitanParserWindow: () =>
|
||||
({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => {
|
||||
if (script.includes('getTermFrequencies')) {
|
||||
frequencyRequested = true;
|
||||
return await frequencyDeferred.promise;
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
source: 'scanning-parser',
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: '猫',
|
||||
reading: 'ねこ',
|
||||
headwords: [[{ term: '猫' }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
];
|
||||
},
|
||||
},
|
||||
}) as unknown as Electron.BrowserWindow,
|
||||
tokenizeWithMecab: async () => {
|
||||
mecabRequested = true;
|
||||
return await mecabDeferred.promise;
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 0));
|
||||
assert.equal(frequencyRequested, true);
|
||||
assert.equal(mecabRequested, true);
|
||||
|
||||
frequencyDeferred.resolve([
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
]);
|
||||
mecabDeferred.resolve(null);
|
||||
|
||||
const result = await pendingResult;
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle queries headword frequencies without forcing surface reading', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'鍛えた',
|
||||
|
||||
@@ -433,33 +433,41 @@ async function parseWithYomitanInternalParser(
|
||||
logSelectedYomitanGroups(text, selectedTokens);
|
||||
}
|
||||
|
||||
let yomitanRankByTerm = new Map<string, number>();
|
||||
if (options.frequencyEnabled) {
|
||||
const frequencyMatchMode = options.frequencyMatchMode;
|
||||
const termReadingList = buildYomitanFrequencyTermReadingList(
|
||||
selectedTokens,
|
||||
frequencyMatchMode,
|
||||
);
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||
yomitanRankByTerm = buildYomitanFrequencyRankMap(yomitanFrequencies);
|
||||
}
|
||||
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
|
||||
? (async () => {
|
||||
const frequencyMatchMode = options.frequencyMatchMode;
|
||||
const termReadingList = buildYomitanFrequencyTermReadingList(
|
||||
selectedTokens,
|
||||
frequencyMatchMode,
|
||||
);
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||
return buildYomitanFrequencyRankMap(yomitanFrequencies);
|
||||
})()
|
||||
: Promise.resolve(new Map<string, number>());
|
||||
|
||||
let enrichedTokens = selectedTokens;
|
||||
if (needsMecabPosEnrichment(options)) {
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||
enrichedTokens = await enrichTokensWithMecab(enrichedTokens, mecabTokens);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logger.warn(
|
||||
'Failed to enrich Yomitan tokens with MeCab POS:',
|
||||
error.message,
|
||||
`tokenCount=${selectedTokens.length}`,
|
||||
`textLength=${text.length}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const mecabEnrichmentPromise: Promise<MergedToken[]> = needsMecabPosEnrichment(options)
|
||||
? (async () => {
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logger.warn(
|
||||
'Failed to enrich Yomitan tokens with MeCab POS:',
|
||||
error.message,
|
||||
`tokenCount=${selectedTokens.length}`,
|
||||
`textLength=${text.length}`,
|
||||
);
|
||||
return selectedTokens;
|
||||
}
|
||||
})()
|
||||
: Promise.resolve(selectedTokens);
|
||||
|
||||
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
|
||||
frequencyRankPromise,
|
||||
mecabEnrichmentPromise,
|
||||
]);
|
||||
|
||||
if (options.frequencyEnabled) {
|
||||
return applyFrequencyRanks(
|
||||
|
||||
@@ -129,3 +129,98 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
|
||||
assert.match(scriptValue, /getTermFrequencies/);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies caches profile metadata between calls', async () => {
|
||||
const scripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
if (script.includes('"term":"犬"')) {
|
||||
return [
|
||||
{
|
||||
term: '犬',
|
||||
reading: 'いぬ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 12,
|
||||
displayValue: '12',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
await requestYomitanTermFrequencies([{ term: '犬', reading: 'いぬ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const optionsCalls = scripts.filter((script) => script.includes('optionsGetFull')).length;
|
||||
assert.equal(optionsCalls, 1);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies caches repeated term+reading lookups', async () => {
|
||||
const scripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const frequencyCalls = scripts.filter((script) => script.includes('getTermFrequencies')).length;
|
||||
assert.equal(frequencyCalls, 1);
|
||||
});
|
||||
|
||||
@@ -30,10 +30,39 @@ export interface YomitanTermReadingPair {
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
profileIndex: number;
|
||||
scanLength: number;
|
||||
dictionaries: string[];
|
||||
dictionaryPriorityByName: Record<string, number>;
|
||||
}
|
||||
|
||||
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
|
||||
const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
|
||||
const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === 'object');
|
||||
}
|
||||
|
||||
function makeTermReadingCacheKey(term: string, reading: string | null): string {
|
||||
return `${term}\u0000${reading ?? ''}`;
|
||||
}
|
||||
|
||||
function getWindowFrequencyCache(window: BrowserWindow): Map<string, YomitanTermFrequency[]> {
|
||||
let cache = yomitanFrequencyCacheByWindow.get(window);
|
||||
if (!cache) {
|
||||
cache = new Map<string, YomitanTermFrequency[]>();
|
||||
yomitanFrequencyCacheByWindow.set(window, cache);
|
||||
}
|
||||
return cache;
|
||||
}
|
||||
|
||||
function clearWindowCaches(window: BrowserWindow): void {
|
||||
yomitanProfileMetadataByWindow.delete(window);
|
||||
yomitanFrequencyCacheByWindow.delete(window);
|
||||
}
|
||||
|
||||
function asPositiveInteger(value: unknown): number | null {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
||||
return null;
|
||||
@@ -135,6 +164,224 @@ function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): Yo
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null {
|
||||
if (!isObject(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const profileIndexRaw = value.profileIndex ?? value.profileCurrent;
|
||||
const profileIndex =
|
||||
typeof profileIndexRaw === 'number' && Number.isFinite(profileIndexRaw)
|
||||
? Math.max(0, Math.floor(profileIndexRaw))
|
||||
: 0;
|
||||
const scanLengthRaw =
|
||||
value.scanLength ??
|
||||
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||
? (value.profiles[profileIndex] as { options?: { scanning?: { length?: unknown } } }).options
|
||||
?.scanning?.length
|
||||
: undefined);
|
||||
const scanLength =
|
||||
typeof scanLengthRaw === 'number' && Number.isFinite(scanLengthRaw)
|
||||
? Math.max(1, Math.floor(scanLengthRaw))
|
||||
: DEFAULT_YOMITAN_SCAN_LENGTH;
|
||||
const dictionariesRaw =
|
||||
value.dictionaries ??
|
||||
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||
? (value.profiles[profileIndex] as { options?: { dictionaries?: unknown[] } }).options
|
||||
?.dictionaries
|
||||
: undefined);
|
||||
const dictionaries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.map((entry, index) => {
|
||||
if (typeof entry === 'string') {
|
||||
return { name: entry.trim(), priority: index };
|
||||
}
|
||||
if (!isObject(entry) || entry.enabled === false || typeof entry.name !== 'string') {
|
||||
return null;
|
||||
}
|
||||
const normalizedName = entry.name.trim();
|
||||
if (!normalizedName) {
|
||||
return null;
|
||||
}
|
||||
const priorityRaw = (entry as { id?: unknown }).id;
|
||||
const priority =
|
||||
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
|
||||
? Math.max(0, Math.floor(priorityRaw))
|
||||
: index;
|
||||
return { name: normalizedName, priority };
|
||||
})
|
||||
.filter((entry): entry is { name: string; priority: number } => entry !== null)
|
||||
.sort((a, b) => a.priority - b.priority)
|
||||
.map((entry) => entry.name)
|
||||
.filter((entry) => entry.length > 0)
|
||||
: [];
|
||||
const dictionaryPriorityByNameRaw = value.dictionaryPriorityByName;
|
||||
const dictionaryPriorityByName: Record<string, number> = {};
|
||||
if (isObject(dictionaryPriorityByNameRaw)) {
|
||||
for (const [name, priorityRaw] of Object.entries(dictionaryPriorityByNameRaw)) {
|
||||
if (typeof priorityRaw !== 'number' || !Number.isFinite(priorityRaw)) {
|
||||
continue;
|
||||
}
|
||||
const normalizedName = name.trim();
|
||||
if (!normalizedName) {
|
||||
continue;
|
||||
}
|
||||
dictionaryPriorityByName[normalizedName] = Math.max(0, Math.floor(priorityRaw));
|
||||
}
|
||||
}
|
||||
|
||||
for (let index = 0; index < dictionaries.length; index += 1) {
|
||||
const dictionary = dictionaries[index];
|
||||
if (!dictionary) {
|
||||
continue;
|
||||
}
|
||||
if (dictionaryPriorityByName[dictionary] === undefined) {
|
||||
dictionaryPriorityByName[dictionary] = index;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
profileIndex,
|
||||
scanLength,
|
||||
dictionaries,
|
||||
dictionaryPriorityByName,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeFrequencyEntriesWithPriority(
|
||||
rawResult: unknown[],
|
||||
dictionaryPriorityByName: Record<string, number>,
|
||||
): YomitanTermFrequency[] {
|
||||
const normalized: YomitanTermFrequency[] = [];
|
||||
for (const entry of rawResult) {
|
||||
const frequency = toYomitanTermFrequency(entry);
|
||||
if (!frequency) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
|
||||
normalized.push({
|
||||
...frequency,
|
||||
dictionaryPriority:
|
||||
dictionaryPriority !== undefined ? dictionaryPriority : frequency.dictionaryPriority,
|
||||
});
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function groupFrequencyEntriesByPair(
|
||||
entries: YomitanTermFrequency[],
|
||||
): Map<string, YomitanTermFrequency[]> {
|
||||
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||
for (const entry of entries) {
|
||||
const reading =
|
||||
typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
|
||||
const key = makeTermReadingCacheKey(entry.term.trim(), reading);
|
||||
const existing = grouped.get(key);
|
||||
if (existing) {
|
||||
existing.push(entry);
|
||||
continue;
|
||||
}
|
||||
grouped.set(key, [entry]);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function groupFrequencyEntriesByTerm(
|
||||
entries: YomitanTermFrequency[],
|
||||
): Map<string, YomitanTermFrequency[]> {
|
||||
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||
for (const entry of entries) {
|
||||
const term = entry.term.trim();
|
||||
if (!term) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = grouped.get(term);
|
||||
if (existing) {
|
||||
existing.push(entry);
|
||||
continue;
|
||||
}
|
||||
grouped.set(term, [entry]);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
async function requestYomitanProfileMetadata(
|
||||
parserWindow: BrowserWindow,
|
||||
logger: LoggerLike,
|
||||
): Promise<YomitanProfileMetadata | null> {
|
||||
const cached = yomitanProfileMetadataByWindow.get(parserWindow);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex =
|
||||
typeof optionsFull.profileCurrent === "number" && Number.isFinite(optionsFull.profileCurrent)
|
||||
? Math.max(0, Math.floor(optionsFull.profileCurrent))
|
||||
: 0;
|
||||
const scanLengthRaw = optionsFull.profiles?.[profileIndex]?.options?.scanning?.length;
|
||||
const scanLength =
|
||||
typeof scanLengthRaw === "number" && Number.isFinite(scanLengthRaw)
|
||||
? Math.max(1, Math.floor(scanLengthRaw))
|
||||
: ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
|
||||
const dictionaryEntries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
|
||||
.map((entry, index) => ({
|
||||
name: entry.name,
|
||||
id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.max(0, Math.floor(entry.id)) : index
|
||||
}))
|
||||
.sort((a, b) => a.id - b.id)
|
||||
: [];
|
||||
const dictionaries = dictionaryEntries.map((entry) => entry.name);
|
||||
const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
|
||||
acc[entry.name] = index;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawMetadata = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const metadata = toYomitanProfileMetadata(rawMetadata);
|
||||
if (!metadata) {
|
||||
return null;
|
||||
}
|
||||
yomitanProfileMetadataByWindow.set(parserWindow, metadata);
|
||||
return metadata;
|
||||
} catch (err) {
|
||||
logger.error('Yomitan parser metadata request failed:', (err as Error).message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function ensureYomitanParserWindow(
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
@@ -179,6 +426,7 @@ async function ensureYomitanParserWindow(
|
||||
);
|
||||
|
||||
parserWindow.on('closed', () => {
|
||||
clearWindowCaches(parserWindow);
|
||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||
deps.setYomitanParserWindow(null);
|
||||
deps.setYomitanParserReadyPromise(null);
|
||||
@@ -198,6 +446,7 @@ async function ensureYomitanParserWindow(
|
||||
if (!parserWindow.isDestroyed()) {
|
||||
parserWindow.destroy();
|
||||
}
|
||||
clearWindowCaches(parserWindow);
|
||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||
deps.setYomitanParserWindow(null);
|
||||
deps.setYomitanParserReadyPromise(null);
|
||||
@@ -229,7 +478,40 @@ export async function requestYomitanParseResults(
|
||||
return null;
|
||||
}
|
||||
|
||||
const script = `
|
||||
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||
const script =
|
||||
metadata !== null
|
||||
? `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
return await invoke("parseText", {
|
||||
text: ${JSON.stringify(text)},
|
||||
optionsContext: { index: ${metadata.profileIndex} },
|
||||
scanLength: ${metadata.scanLength},
|
||||
useInternalParser: true,
|
||||
useMecabParser: true
|
||||
});
|
||||
})();
|
||||
`
|
||||
: `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
@@ -253,7 +535,7 @@ export async function requestYomitanParseResults(
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex = optionsFull.profileCurrent;
|
||||
const scanLength =
|
||||
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? 40;
|
||||
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||
|
||||
return await invoke("parseText", {
|
||||
text: ${JSON.stringify(text)},
|
||||
@@ -291,6 +573,88 @@ export async function requestYomitanTermFrequencies(
|
||||
return [];
|
||||
}
|
||||
|
||||
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||
const frequencyCache = getWindowFrequencyCache(parserWindow);
|
||||
const missingTermReadingList: YomitanTermReadingPair[] = [];
|
||||
|
||||
const buildCachedResult = (): YomitanTermFrequency[] => {
|
||||
const result: YomitanTermFrequency[] = [];
|
||||
for (const pair of normalizedTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const cached = frequencyCache.get(key);
|
||||
if (cached && cached.length > 0) {
|
||||
result.push(...cached);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
for (const pair of normalizedTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
if (!frequencyCache.has(key)) {
|
||||
missingTermReadingList.push(pair);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingTermReadingList.length === 0) {
|
||||
return buildCachedResult();
|
||||
}
|
||||
|
||||
if (metadata && metadata.dictionaries.length > 0) {
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
return await invoke("getTermFrequencies", {
|
||||
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||
dictionaries: ${JSON.stringify(metadata.dictionaries)}
|
||||
});
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const fetchedEntries = Array.isArray(rawResult)
|
||||
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
|
||||
: [];
|
||||
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||
|
||||
for (const pair of missingTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const exactEntries = groupedByPair.get(key);
|
||||
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||
}
|
||||
|
||||
const cachedResult = buildCachedResult();
|
||||
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||
return [...cachedResult, ...unmatchedEntries];
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
}
|
||||
|
||||
return buildCachedResult();
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
@@ -335,7 +699,7 @@ export async function requestYomitanTermFrequencies(
|
||||
}
|
||||
|
||||
const rawFrequencies = await invoke("getTermFrequencies", {
|
||||
termReadingList: ${JSON.stringify(normalizedTermReadingList)},
|
||||
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||
dictionaries
|
||||
});
|
||||
|
||||
@@ -357,16 +721,26 @@ export async function requestYomitanTermFrequencies(
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
if (!Array.isArray(rawResult)) {
|
||||
return [];
|
||||
const fetchedEntries = Array.isArray(rawResult)
|
||||
? rawResult
|
||||
.map((entry) => toYomitanTermFrequency(entry))
|
||||
.filter((entry): entry is YomitanTermFrequency => entry !== null)
|
||||
: [];
|
||||
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||
for (const pair of missingTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const exactEntries = groupedByPair.get(key);
|
||||
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||
}
|
||||
|
||||
return rawResult
|
||||
.map((entry) => toYomitanTermFrequency(entry))
|
||||
.filter((entry): entry is YomitanTermFrequency => entry !== null);
|
||||
const cachedResult = buildCachedResult();
|
||||
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||
return [...cachedResult, ...unmatchedEntries];
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
return [];
|
||||
return buildCachedResult();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user