mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-07 03:22:17 -08:00
Overlay 2.0 (#12)
This commit is contained in:
@@ -51,15 +51,20 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
|
||||
});
|
||||
|
||||
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
|
||||
const lookupCalls: string[] = [];
|
||||
const tokens = [
|
||||
makeToken({ surface: 'は', headword: 'は', partOfSpeech: PartOfSpeech.particle }),
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
frequencyRank: 3,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'です',
|
||||
headword: 'です',
|
||||
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||
startPos: 1,
|
||||
endPos: 3,
|
||||
frequencyRank: 4,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'の',
|
||||
@@ -68,6 +73,7 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
|
||||
pos1: '助詞',
|
||||
startPos: 3,
|
||||
endPos: 4,
|
||||
frequencyRank: 5,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '猫',
|
||||
@@ -75,24 +81,36 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
startPos: 4,
|
||||
endPos: 5,
|
||||
frequencyRank: 11,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
getFrequencyRank: (text) => {
|
||||
lookupCalls.push(text);
|
||||
return text === '猫' ? 11 : 999;
|
||||
},
|
||||
}),
|
||||
);
|
||||
const result = annotateTokens(tokens, makeDeps());
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[1]?.frequencyRank, undefined);
|
||||
assert.equal(result[2]?.frequencyRank, undefined);
|
||||
assert.equal(result[3]?.frequencyRank, 11);
|
||||
assert.deepEqual(lookupCalls, ['猫']);
|
||||
});
|
||||
|
||||
test('annotateTokens preserves existing frequency rank when frequency is enabled', () => {
|
||||
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps());
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 42);
|
||||
});
|
||||
|
||||
test('annotateTokens drops invalid frequency rank values', () => {
|
||||
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: Number.NaN })];
|
||||
const result = annotateTokens(tokens, makeDeps());
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears frequency rank when frequency is disabled', () => {
|
||||
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
|
||||
const result = annotateTokens(tokens, makeDeps(), { frequencyEnabled: false });
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => {
|
||||
@@ -157,3 +175,206 @@ test('annotateTokens N+1 handoff marks expected target when threshold is satisfi
|
||||
assert.equal(result[1]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[2]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens N+1 minimum sentence words counts only eligible word tokens', () => {
|
||||
const tokens = [
|
||||
makeToken({ surface: '猫', headword: '猫', startPos: 0, endPos: 1 }),
|
||||
makeToken({
|
||||
surface: 'が',
|
||||
headword: 'が',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'です',
|
||||
headword: 'です',
|
||||
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||
pos1: '助動詞',
|
||||
startPos: 2,
|
||||
endPos: 4,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'が' || text === 'です',
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 3 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[1]?.isKnown, true);
|
||||
assert.equal(result[2]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens applies configured pos1 exclusions to both frequency and N+1', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '猫',
|
||||
headword: '猫',
|
||||
pos1: '名詞',
|
||||
frequencyRank: 21,
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '走る',
|
||||
headword: '走る',
|
||||
pos1: '動詞',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
startPos: 1,
|
||||
endPos: 3,
|
||||
frequencyRank: 22,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '走る',
|
||||
}),
|
||||
{
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
pos1Exclusions: new Set(['名詞']),
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[1]?.frequencyRank, 22);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[1]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens allows previously default-excluded pos1 when removed from effective set', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '助詞',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
frequencyRank: 8,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
pos1Exclusions: new Set(),
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 8);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes default non-independent pos2 from frequency and N+1', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'になれば',
|
||||
headword: 'なる',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞',
|
||||
pos2: '非自立',
|
||||
startPos: 0,
|
||||
endPos: 4,
|
||||
frequencyRank: 7,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'ぐわっ',
|
||||
reading: 'ぐわっ',
|
||||
headword: 'ぐわっ',
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
frequencyRank: 12,
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens allows previously default-excluded pos2 when removed from effective set', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'になれば',
|
||||
headword: 'なる',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞',
|
||||
pos2: '非自立',
|
||||
startPos: 0,
|
||||
endPos: 4,
|
||||
frequencyRank: 9,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
pos2Exclusions: new Set(),
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 9);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
});
|
||||
|
||||
test('annotateTokens keeps composite tokens when any component pos tag is content-bearing', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'になれば',
|
||||
headword: 'なる',
|
||||
pos1: '助詞|動詞',
|
||||
pos2: '格助詞|自立|接続助詞',
|
||||
startPos: 0,
|
||||
endPos: 4,
|
||||
frequencyRank: 5,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 5);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes composite tokens when all component pos tags are excluded', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'けど',
|
||||
headword: 'けど',
|
||||
pos1: '助詞|助詞',
|
||||
pos2: '接続助詞|終助詞',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
frequencyRank: 6,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
@@ -1,39 +1,38 @@
|
||||
import { markNPlusOneTargets } from '../../../token-merger';
|
||||
import {
|
||||
FrequencyDictionaryLookup,
|
||||
JlptLevel,
|
||||
MergedToken,
|
||||
NPlusOneMatchMode,
|
||||
PartOfSpeech,
|
||||
} from '../../../types';
|
||||
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
|
||||
resolveAnnotationPos1ExclusionSet,
|
||||
} from '../../../token-pos1-exclusions';
|
||||
import {
|
||||
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
|
||||
resolveAnnotationPos2ExclusionSet,
|
||||
} from '../../../token-pos2-exclusions';
|
||||
import { JlptLevel, MergedToken, NPlusOneMatchMode, PartOfSpeech } from '../../../types';
|
||||
import { shouldIgnoreJlptByTerm, shouldIgnoreJlptForMecabPos1 } from '../jlpt-token-filter';
|
||||
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
|
||||
const FREQUENCY_RANK_LOOKUP_CACHE_LIMIT = 2048;
|
||||
|
||||
const jlptLevelLookupCaches = new WeakMap<
|
||||
(text: string) => JlptLevel | null,
|
||||
Map<string, JlptLevel | null>
|
||||
>();
|
||||
const frequencyRankLookupCaches = new WeakMap<
|
||||
FrequencyDictionaryLookup,
|
||||
Map<string, number | null>
|
||||
>();
|
||||
|
||||
export interface AnnotationStageDeps {
|
||||
isKnownWord: (text: string) => boolean;
|
||||
knownWordMatchMode: NPlusOneMatchMode;
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
}
|
||||
|
||||
export interface AnnotationStageOptions {
|
||||
nPlusOneEnabled?: boolean;
|
||||
jlptEnabled?: boolean;
|
||||
frequencyEnabled?: boolean;
|
||||
minSentenceWordsForNPlusOne?: number;
|
||||
pos1Exclusions?: ReadonlySet<string>;
|
||||
pos2Exclusions?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
function resolveKnownWordText(
|
||||
@@ -59,106 +58,94 @@ function applyKnownWordMarking(
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeFrequencyLookupText(rawText: string): string {
|
||||
return rawText.trim().toLowerCase();
|
||||
function normalizePos1Tag(pos1: string | undefined): string {
|
||||
return typeof pos1 === 'string' ? pos1.trim() : '';
|
||||
}
|
||||
|
||||
function getCachedFrequencyRank(
|
||||
lookupText: string,
|
||||
getFrequencyRank: FrequencyDictionaryLookup,
|
||||
): number | null {
|
||||
const normalizedText = normalizeFrequencyLookupText(lookupText);
|
||||
if (!normalizedText) {
|
||||
return null;
|
||||
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
|
||||
if (!normalizedTag) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let cache = frequencyRankLookupCaches.get(getFrequencyRank);
|
||||
if (!cache) {
|
||||
cache = new Map<string, number | null>();
|
||||
frequencyRankLookupCaches.set(getFrequencyRank, cache);
|
||||
const parts = normalizedTag
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
if (parts.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cache.has(normalizedText)) {
|
||||
return cache.get(normalizedText) ?? null;
|
||||
}
|
||||
|
||||
let rank: number | null;
|
||||
try {
|
||||
rank = getFrequencyRank(normalizedText);
|
||||
} catch {
|
||||
rank = null;
|
||||
}
|
||||
if (rank !== null) {
|
||||
if (!Number.isFinite(rank) || rank <= 0) {
|
||||
rank = null;
|
||||
}
|
||||
}
|
||||
|
||||
cache.set(normalizedText, rank);
|
||||
while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {
|
||||
const firstKey = cache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
|
||||
return rank;
|
||||
// Composite tags like "助詞|名詞" stay eligible unless every component is excluded.
|
||||
return parts.every((part) => exclusions.has(part));
|
||||
}
|
||||
|
||||
function resolveFrequencyLookupText(token: MergedToken): string {
|
||||
if (token.headword && token.headword.length > 0) {
|
||||
return token.headword;
|
||||
function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
|
||||
if (options.pos1Exclusions) {
|
||||
return options.pos1Exclusions;
|
||||
}
|
||||
if (token.reading && token.reading.length > 0) {
|
||||
return token.reading;
|
||||
}
|
||||
return token.surface;
|
||||
|
||||
return resolveAnnotationPos1ExclusionSet(DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG);
|
||||
}
|
||||
|
||||
function getFrequencyLookupTextCandidates(token: MergedToken): string[] {
|
||||
const lookupText = resolveFrequencyLookupText(token).trim();
|
||||
return lookupText ? [lookupText] : [];
|
||||
function resolvePos2Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
|
||||
if (options.pos2Exclusions) {
|
||||
return options.pos2Exclusions;
|
||||
}
|
||||
|
||||
return resolveAnnotationPos2ExclusionSet(DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG);
|
||||
}
|
||||
|
||||
function isFrequencyExcludedByPos(token: MergedToken): boolean {
|
||||
if (
|
||||
token.partOfSpeech === PartOfSpeech.particle ||
|
||||
token.partOfSpeech === PartOfSpeech.bound_auxiliary
|
||||
) {
|
||||
function normalizePos2Tag(pos2: string | undefined): string {
|
||||
return typeof pos2 === 'string' ? pos2.trim() : '';
|
||||
}
|
||||
|
||||
function isFrequencyExcludedByPos(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): boolean {
|
||||
const normalizedPos1 = normalizePos1Tag(token.pos1);
|
||||
const hasPos1 = normalizedPos1.length > 0;
|
||||
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return token.pos1 === '助詞' || token.pos1 === '助動詞';
|
||||
const normalizedPos2 = normalizePos2Tag(token.pos2);
|
||||
const hasPos2 = normalizedPos2.length > 0;
|
||||
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (hasPos1 || hasPos2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isLikelyFrequencyNoiseToken(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (
|
||||
token.partOfSpeech === PartOfSpeech.particle ||
|
||||
token.partOfSpeech === PartOfSpeech.bound_auxiliary
|
||||
);
|
||||
}
|
||||
|
||||
function applyFrequencyMarking(
|
||||
tokens: MergedToken[],
|
||||
getFrequencyRank: FrequencyDictionaryLookup,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
if (isFrequencyExcludedByPos(token)) {
|
||||
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
|
||||
return { ...token, frequencyRank: undefined };
|
||||
}
|
||||
|
||||
const lookupTexts = getFrequencyLookupTextCandidates(token);
|
||||
if (lookupTexts.length === 0) {
|
||||
return { ...token, frequencyRank: undefined };
|
||||
}
|
||||
|
||||
let bestRank: number | null = null;
|
||||
for (const lookupText of lookupTexts) {
|
||||
const rank = getCachedFrequencyRank(lookupText, getFrequencyRank);
|
||||
if (rank === null) {
|
||||
continue;
|
||||
}
|
||||
if (bestRank === null || rank < bestRank) {
|
||||
bestRank = rank;
|
||||
}
|
||||
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
|
||||
const rank = Math.max(1, Math.floor(token.frequencyRank));
|
||||
return { ...token, frequencyRank: rank };
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
frequencyRank: bestRank ?? undefined,
|
||||
frequencyRank: undefined,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -282,6 +269,98 @@ function isRepeatedKanaSfx(text: string): boolean {
|
||||
return topCount >= Math.ceil(chars.length / 2);
|
||||
}
|
||||
|
||||
function isTrailingSmallTsuKanaSfx(text: string): boolean {
|
||||
const normalized = normalizeJlptTextForExclusion(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (chars.length < 2 || chars.length > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return chars[chars.length - 1] === 'っ';
|
||||
}
|
||||
|
||||
function isReduplicatedKanaSfx(text: string): boolean {
|
||||
const normalized = normalizeJlptTextForExclusion(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (chars.length < 4 || chars.length % 2 !== 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const half = chars.length / 2;
|
||||
return chars.slice(0, half).join('') === chars.slice(half).join('');
|
||||
}
|
||||
|
||||
function hasAdjacentKanaRepeat(text: string): boolean {
|
||||
const normalized = normalizeJlptTextForExclusion(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (let i = 1; i < chars.length; i += 1) {
|
||||
if (chars[i] === chars[i - 1]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function isLikelyFrequencyNoiseToken(token: MergedToken): boolean {
|
||||
const candidates = [token.headword, token.surface].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const trimmedCandidate = candidate.trim();
|
||||
if (!trimmedCandidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const normalizedCandidate = normalizeJlptTextForExclusion(trimmedCandidate);
|
||||
if (!normalizedCandidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shouldIgnoreJlptByTerm(trimmedCandidate) || shouldIgnoreJlptByTerm(normalizedCandidate)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
hasAdjacentKanaRepeat(trimmedCandidate) ||
|
||||
hasAdjacentKanaRepeat(normalizedCandidate) ||
|
||||
isReduplicatedKanaSfx(trimmedCandidate) ||
|
||||
isReduplicatedKanaSfx(normalizedCandidate) ||
|
||||
isTrailingSmallTsuKanaSfx(trimmedCandidate) ||
|
||||
isTrailingSmallTsuKanaSfx(normalizedCandidate)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) {
|
||||
return false;
|
||||
@@ -340,20 +419,24 @@ export function annotateTokens(
|
||||
deps: AnnotationStageDeps,
|
||||
options: AnnotationStageOptions = {},
|
||||
): MergedToken[] {
|
||||
const knownMarkedTokens = applyKnownWordMarking(
|
||||
tokens,
|
||||
deps.isKnownWord,
|
||||
deps.knownWordMatchMode,
|
||||
);
|
||||
const pos1Exclusions = resolvePos1Exclusions(options);
|
||||
const pos2Exclusions = resolvePos2Exclusions(options);
|
||||
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
|
||||
const knownMarkedTokens = nPlusOneEnabled
|
||||
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: tokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
|
||||
const frequencyEnabled = options.frequencyEnabled !== false;
|
||||
const frequencyMarkedTokens =
|
||||
frequencyEnabled && deps.getFrequencyRank
|
||||
? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank)
|
||||
: knownMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
frequencyRank: undefined,
|
||||
}));
|
||||
const frequencyMarkedTokens = frequencyEnabled
|
||||
? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
|
||||
: knownMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
frequencyRank: undefined,
|
||||
}));
|
||||
|
||||
const jlptEnabled = options.jlptEnabled !== false;
|
||||
const jlptMarkedTokens = jlptEnabled
|
||||
@@ -363,6 +446,14 @@ export function annotateTokens(
|
||||
jlptLevel: undefined,
|
||||
}));
|
||||
|
||||
if (!nPlusOneEnabled) {
|
||||
return jlptMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
}
|
||||
|
||||
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
|
||||
const sanitizedMinSentenceWordsForNPlusOne =
|
||||
minSentenceWordsForNPlusOne !== undefined &&
|
||||
@@ -371,5 +462,10 @@ export function annotateTokens(
|
||||
? minSentenceWordsForNPlusOne
|
||||
: 3;
|
||||
|
||||
return markNPlusOneTargets(jlptMarkedTokens, sanitizedMinSentenceWordsForNPlusOne);
|
||||
return markNPlusOneTargets(
|
||||
jlptMarkedTokens,
|
||||
sanitizedMinSentenceWordsForNPlusOne,
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -22,12 +22,13 @@ function makeToken(overrides: Partial<MergedToken>): MergedToken {
|
||||
test('enrichTokensWithMecabPos1 picks pos1 by best overlap when no surface match exists', () => {
|
||||
const tokens = [makeToken({ surface: 'grouped', startPos: 2, endPos: 7 })];
|
||||
const mecabTokens = [
|
||||
makeToken({ surface: 'left', startPos: 0, endPos: 4, pos1: 'A' }),
|
||||
makeToken({ surface: 'right', startPos: 2, endPos: 6, pos1: 'B' }),
|
||||
makeToken({ surface: 'left', startPos: 0, endPos: 4, pos1: 'A', pos2: 'L2' }),
|
||||
makeToken({ surface: 'right', startPos: 2, endPos: 6, pos1: 'B', pos2: '非自立' }),
|
||||
];
|
||||
|
||||
const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
assert.equal(enriched[0]?.pos1, 'B');
|
||||
assert.equal(enriched[0]?.pos1, 'A|B');
|
||||
assert.equal(enriched[0]?.pos2, 'L2|非自立');
|
||||
});
|
||||
|
||||
test('enrichTokensWithMecabPos1 fills missing pos1 using surface-sequence fallback', () => {
|
||||
|
||||
@@ -1,13 +1,45 @@
|
||||
import { MergedToken } from '../../../types';
|
||||
|
||||
function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): string | undefined {
|
||||
if (mecabTokens.length === 0) {
|
||||
type MecabPosMetadata = {
|
||||
pos1: string;
|
||||
pos2?: string;
|
||||
pos3?: string;
|
||||
};
|
||||
|
||||
function joinUniqueTags(values: Array<string | undefined>): string | undefined {
|
||||
const unique: string[] = [];
|
||||
for (const value of values) {
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
if (!unique.includes(trimmed)) {
|
||||
unique.push(trimmed);
|
||||
}
|
||||
}
|
||||
if (unique.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
if (unique.length === 1) {
|
||||
return unique[0];
|
||||
}
|
||||
return unique.join('|');
|
||||
}
|
||||
|
||||
function pickClosestMecabPosMetadata(
|
||||
token: MergedToken,
|
||||
mecabTokens: MergedToken[],
|
||||
): MecabPosMetadata | null {
|
||||
if (mecabTokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const tokenStart = token.startPos ?? 0;
|
||||
const tokenEnd = token.endPos ?? tokenStart + token.surface.length;
|
||||
let bestSurfaceMatchPos1: string | undefined;
|
||||
let bestSurfaceMatchToken: MergedToken | null = null;
|
||||
let bestSurfaceMatchDistance = Number.MAX_SAFE_INTEGER;
|
||||
let bestSurfaceMatchEndDistance = Number.MAX_SAFE_INTEGER;
|
||||
|
||||
@@ -31,19 +63,24 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
|
||||
) {
|
||||
bestSurfaceMatchDistance = startDistance;
|
||||
bestSurfaceMatchEndDistance = endDistance;
|
||||
bestSurfaceMatchPos1 = mecabToken.pos1;
|
||||
bestSurfaceMatchToken = mecabToken;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestSurfaceMatchPos1) {
|
||||
return bestSurfaceMatchPos1;
|
||||
if (bestSurfaceMatchToken) {
|
||||
return {
|
||||
pos1: bestSurfaceMatchToken.pos1 as string,
|
||||
pos2: bestSurfaceMatchToken.pos2,
|
||||
pos3: bestSurfaceMatchToken.pos3,
|
||||
};
|
||||
}
|
||||
|
||||
let bestPos1: string | undefined;
|
||||
let bestToken: MergedToken | null = null;
|
||||
let bestOverlap = 0;
|
||||
let bestSpan = 0;
|
||||
let bestStartDistance = Number.MAX_SAFE_INTEGER;
|
||||
let bestStart = Number.MAX_SAFE_INTEGER;
|
||||
const overlappingTokens: MergedToken[] = [];
|
||||
|
||||
for (const mecabToken of mecabTokens) {
|
||||
if (!mecabToken.pos1) {
|
||||
@@ -58,6 +95,7 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
|
||||
if (overlap === 0) {
|
||||
continue;
|
||||
}
|
||||
overlappingTokens.push(mecabToken);
|
||||
|
||||
const span = mecabEnd - mecabStart;
|
||||
if (
|
||||
@@ -71,11 +109,23 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
|
||||
bestSpan = span;
|
||||
bestStartDistance = Math.abs(mecabStart - tokenStart);
|
||||
bestStart = mecabStart;
|
||||
bestPos1 = mecabToken.pos1;
|
||||
bestToken = mecabToken;
|
||||
}
|
||||
}
|
||||
|
||||
return bestOverlap > 0 ? bestPos1 : undefined;
|
||||
if (bestOverlap === 0 || !bestToken) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const overlapPos1 = joinUniqueTags(overlappingTokens.map((token) => token.pos1));
|
||||
const overlapPos2 = joinUniqueTags(overlappingTokens.map((token) => token.pos2));
|
||||
const overlapPos3 = joinUniqueTags(overlappingTokens.map((token) => token.pos3));
|
||||
|
||||
return {
|
||||
pos1: overlapPos1 ?? (bestToken.pos1 as string),
|
||||
pos2: overlapPos2 ?? bestToken.pos2,
|
||||
pos3: overlapPos3 ?? bestToken.pos3,
|
||||
};
|
||||
}
|
||||
|
||||
function fillMissingPos1BySurfaceSequence(
|
||||
@@ -101,7 +151,7 @@ function fillMissingPos1BySurfaceSequence(
|
||||
return token;
|
||||
}
|
||||
|
||||
let best: { pos1: string; index: number } | null = null;
|
||||
let best: { token: MergedToken; index: number } | null = null;
|
||||
for (const candidate of indexedMecabTokens) {
|
||||
if (candidate.token.surface !== surface) {
|
||||
continue;
|
||||
@@ -109,7 +159,7 @@ function fillMissingPos1BySurfaceSequence(
|
||||
if (candidate.index < cursor) {
|
||||
continue;
|
||||
}
|
||||
best = { pos1: candidate.token.pos1 as string, index: candidate.index };
|
||||
best = { token: candidate.token, index: candidate.index };
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -118,7 +168,7 @@ function fillMissingPos1BySurfaceSequence(
|
||||
if (candidate.token.surface !== surface) {
|
||||
continue;
|
||||
}
|
||||
best = { pos1: candidate.token.pos1 as string, index: candidate.index };
|
||||
best = { token: candidate.token, index: candidate.index };
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -130,7 +180,9 @@ function fillMissingPos1BySurfaceSequence(
|
||||
cursor = best.index + 1;
|
||||
return {
|
||||
...token,
|
||||
pos1: best.pos1,
|
||||
pos1: best.token.pos1,
|
||||
pos2: best.token.pos2,
|
||||
pos3: best.token.pos3,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -152,14 +204,16 @@ export function enrichTokensWithMecabPos1(
|
||||
return token;
|
||||
}
|
||||
|
||||
const pos1 = pickClosestMecabPos1(token, mecabTokens);
|
||||
if (!pos1) {
|
||||
const metadata = pickClosestMecabPosMetadata(token, mecabTokens);
|
||||
if (!metadata) {
|
||||
return token;
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
pos1,
|
||||
pos1: metadata.pos1,
|
||||
pos2: metadata.pos2,
|
||||
pos3: metadata.pos3,
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
149
src/core/services/tokenizer/parser-enrichment-worker-runtime.ts
Normal file
149
src/core/services/tokenizer/parser-enrichment-worker-runtime.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
import type { MergedToken } from '../../../types';
|
||||
import { createLogger } from '../../../logger';
|
||||
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
|
||||
|
||||
const logger = createLogger('main:tokenizer');
|
||||
const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER';
|
||||
|
||||
interface WorkerRequest {
|
||||
id: number;
|
||||
tokens: MergedToken[];
|
||||
mecabTokens: MergedToken[] | null;
|
||||
}
|
||||
|
||||
interface WorkerResponse {
|
||||
id?: unknown;
|
||||
result?: unknown;
|
||||
error?: unknown;
|
||||
}
|
||||
|
||||
type PendingRequest = {
|
||||
resolve: (value: MergedToken[]) => void;
|
||||
reject: (reason?: unknown) => void;
|
||||
};
|
||||
|
||||
class ParserEnrichmentWorkerRuntime {
|
||||
private worker: import('node:worker_threads').Worker | null = null;
|
||||
private nextRequestId = 1;
|
||||
private pending = new Map<number, PendingRequest>();
|
||||
private initAttempted = false;
|
||||
|
||||
async enrichTokens(
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
const worker = await this.getWorker();
|
||||
if (!worker) {
|
||||
return enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
}
|
||||
|
||||
return new Promise<MergedToken[]>((resolve, reject) => {
|
||||
const id = this.nextRequestId++;
|
||||
this.pending.set(id, { resolve, reject });
|
||||
const request: WorkerRequest = { id, tokens, mecabTokens };
|
||||
worker.postMessage(request);
|
||||
});
|
||||
}
|
||||
|
||||
private async getWorker(): Promise<import('node:worker_threads').Worker | null> {
|
||||
if (process.env[DISABLE_WORKER_ENV] === '1') {
|
||||
return null;
|
||||
}
|
||||
if (this.worker) {
|
||||
return this.worker;
|
||||
}
|
||||
if (this.initAttempted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
this.initAttempted = true;
|
||||
|
||||
let workerThreads: typeof import('node:worker_threads');
|
||||
try {
|
||||
workerThreads = await import('node:worker_threads');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
let workerPath = '';
|
||||
try {
|
||||
workerPath = require.resolve('./parser-enrichment-worker-thread.js');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const worker = new workerThreads.Worker(workerPath);
|
||||
worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message));
|
||||
worker.on('error', (error: Error) => this.handleWorkerFailure(error));
|
||||
worker.on('exit', (code: number) => {
|
||||
if (code !== 0) {
|
||||
this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`));
|
||||
} else {
|
||||
this.worker = null;
|
||||
}
|
||||
});
|
||||
this.worker = worker;
|
||||
return worker;
|
||||
} catch (error) {
|
||||
logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private handleWorkerMessage(message: WorkerResponse): void {
|
||||
if (typeof message.id !== 'number') {
|
||||
return;
|
||||
}
|
||||
|
||||
const request = this.pending.get(message.id);
|
||||
if (!request) {
|
||||
return;
|
||||
}
|
||||
this.pending.delete(message.id);
|
||||
|
||||
if (typeof message.error === 'string' && message.error.length > 0) {
|
||||
request.reject(new Error(message.error));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!Array.isArray(message.result)) {
|
||||
request.reject(new Error('Parser enrichment worker returned invalid payload'));
|
||||
return;
|
||||
}
|
||||
|
||||
request.resolve(message.result as MergedToken[]);
|
||||
}
|
||||
|
||||
private handleWorkerFailure(error: Error): void {
|
||||
logger.debug(
|
||||
`Parser enrichment worker unavailable, falling back to main thread: ${error.message}`,
|
||||
);
|
||||
for (const pending of this.pending.values()) {
|
||||
pending.reject(error);
|
||||
}
|
||||
this.pending.clear();
|
||||
|
||||
if (this.worker) {
|
||||
this.worker.removeAllListeners();
|
||||
this.worker = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let runtime: ParserEnrichmentWorkerRuntime | null = null;
|
||||
|
||||
export async function enrichTokensWithMecabPos1Async(
|
||||
tokens: MergedToken[],
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!runtime) {
|
||||
runtime = new ParserEnrichmentWorkerRuntime();
|
||||
}
|
||||
|
||||
try {
|
||||
return await runtime.enrichTokens(tokens, mecabTokens);
|
||||
} catch {
|
||||
return enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { parentPort } from 'node:worker_threads';
|
||||
import type { MergedToken } from '../../../types';
|
||||
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
|
||||
|
||||
interface WorkerRequest {
|
||||
id: number;
|
||||
tokens: MergedToken[];
|
||||
mecabTokens: MergedToken[] | null;
|
||||
}
|
||||
|
||||
if (!parentPort) {
|
||||
throw new Error('parser-enrichment worker missing parent port');
|
||||
}
|
||||
|
||||
const port = parentPort;
|
||||
|
||||
port.on('message', (message: WorkerRequest) => {
|
||||
try {
|
||||
const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens);
|
||||
port.postMessage({ id: message.id, result });
|
||||
} catch (error) {
|
||||
const messageText = error instanceof Error ? error.message : String(error);
|
||||
port.postMessage({ id: message.id, error: messageText });
|
||||
}
|
||||
});
|
||||
248
src/core/services/tokenizer/yomitan-parser-runtime.test.ts
Normal file
248
src/core/services/tokenizer/yomitan-parser-runtime.test.ts
Normal file
@@ -0,0 +1,248 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import {
|
||||
requestYomitanTermFrequencies,
|
||||
syncYomitanDefaultAnkiServer,
|
||||
} from './yomitan-parser-runtime';
|
||||
|
||||
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
|
||||
const parserWindow = {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => await executeJavaScript(script),
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
getYomitanExt: () => ({ id: 'ext-id' }) as never,
|
||||
getYomitanParserWindow: () => parserWindow as never,
|
||||
setYomitanParserWindow: () => undefined,
|
||||
getYomitanParserReadyPromise: () => null,
|
||||
setYomitanParserReadyPromise: () => undefined,
|
||||
getYomitanParserInitPromise: () => null,
|
||||
setYomitanParserInitPromise: () => undefined,
|
||||
};
|
||||
}
|
||||
|
||||
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
scriptValue = script;
|
||||
return { updated: true };
|
||||
});
|
||||
|
||||
const infoLogs: string[] = [];
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: () => undefined,
|
||||
info: (message) => infoLogs.push(message),
|
||||
});
|
||||
|
||||
assert.equal(updated, true);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
assert.match(scriptValue, /setAllSettings/);
|
||||
assert.equal(infoLogs.length, 1);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => {
|
||||
const deps = createDeps(async () => ({ updated: false }));
|
||||
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: () => undefined,
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
|
||||
const deps = createDeps(async () => {
|
||||
throw new Error('execute failed');
|
||||
});
|
||||
|
||||
const errorLogs: string[] = [];
|
||||
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
|
||||
error: (message) => errorLogs.push(message),
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
assert.equal(errorLogs.length, 1);
|
||||
});
|
||||
|
||||
test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
|
||||
let executeCount = 0;
|
||||
const deps = createDeps(async () => {
|
||||
executeCount += 1;
|
||||
return { updated: true };
|
||||
});
|
||||
|
||||
const updated = await syncYomitanDefaultAnkiServer(' ', deps, {
|
||||
error: () => undefined,
|
||||
info: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(updated, false);
|
||||
assert.equal(executeCount, 0);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies returns normalized frequency entries', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
scriptValue = script;
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 0,
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
{
|
||||
term: '鍛える',
|
||||
reading: 'きたえる',
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 1,
|
||||
frequency: 46961,
|
||||
displayValue: '2847,46961',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
{
|
||||
term: 'invalid',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 0,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result.length, 2);
|
||||
assert.equal(result[0]?.term, '猫');
|
||||
assert.equal(result[0]?.frequency, 77);
|
||||
assert.equal(result[0]?.dictionaryPriority, 0);
|
||||
assert.equal(result[1]?.term, '鍛える');
|
||||
assert.equal(result[1]?.frequency, 2847);
|
||||
assert.match(scriptValue, /getTermFrequencies/);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies prefers primary rank from displayValue array pair', async () => {
|
||||
const deps = createDeps(async () => [
|
||||
{
|
||||
term: '無人',
|
||||
reading: 'むじん',
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 0,
|
||||
frequency: 157632,
|
||||
displayValue: [7141, 157632],
|
||||
displayValueParsed: true,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '無人', reading: 'むじん' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0]?.term, '無人');
|
||||
assert.equal(result[0]?.frequency, 7141);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies caches profile metadata between calls', async () => {
|
||||
const scripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
if (script.includes('"term":"犬"')) {
|
||||
return [
|
||||
{
|
||||
term: '犬',
|
||||
reading: 'いぬ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 12,
|
||||
displayValue: '12',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
await requestYomitanTermFrequencies([{ term: '犬', reading: 'いぬ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const optionsCalls = scripts.filter((script) => script.includes('optionsGetFull')).length;
|
||||
assert.equal(optionsCalls, 1);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies caches repeated term+reading lookups', async () => {
|
||||
const scripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const frequencyCalls = scripts.filter((script) => script.includes('getTermFrequencies')).length;
|
||||
assert.equal(frequencyCalls, 1);
|
||||
});
|
||||
@@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron';
|
||||
|
||||
interface LoggerLike {
|
||||
error: (message: string, ...args: unknown[]) => void;
|
||||
info?: (message: string, ...args: unknown[]) => void;
|
||||
}
|
||||
|
||||
interface YomitanParserRuntimeDeps {
|
||||
@@ -14,6 +15,395 @@ interface YomitanParserRuntimeDeps {
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
}
|
||||
|
||||
export interface YomitanTermFrequency {
|
||||
term: string;
|
||||
reading: string | null;
|
||||
dictionary: string;
|
||||
dictionaryPriority: number;
|
||||
frequency: number;
|
||||
displayValue: string | null;
|
||||
displayValueParsed: boolean;
|
||||
}
|
||||
|
||||
export interface YomitanTermReadingPair {
|
||||
term: string;
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
profileIndex: number;
|
||||
scanLength: number;
|
||||
dictionaries: string[];
|
||||
dictionaryPriorityByName: Record<string, number>;
|
||||
}
|
||||
|
||||
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
|
||||
const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
|
||||
const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === 'object');
|
||||
}
|
||||
|
||||
function makeTermReadingCacheKey(term: string, reading: string | null): string {
|
||||
return `${term}\u0000${reading ?? ''}`;
|
||||
}
|
||||
|
||||
function getWindowFrequencyCache(window: BrowserWindow): Map<string, YomitanTermFrequency[]> {
|
||||
let cache = yomitanFrequencyCacheByWindow.get(window);
|
||||
if (!cache) {
|
||||
cache = new Map<string, YomitanTermFrequency[]>();
|
||||
yomitanFrequencyCacheByWindow.set(window, cache);
|
||||
}
|
||||
return cache;
|
||||
}
|
||||
|
||||
function clearWindowCaches(window: BrowserWindow): void {
|
||||
yomitanProfileMetadataByWindow.delete(window);
|
||||
yomitanFrequencyCacheByWindow.delete(window);
|
||||
}
|
||||
export function clearYomitanParserCachesForWindow(window: BrowserWindow): void {
|
||||
clearWindowCaches(window);
|
||||
}
|
||||
|
||||
function asPositiveInteger(value: unknown): number | null {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
||||
return null;
|
||||
}
|
||||
return Math.max(1, Math.floor(value));
|
||||
}
|
||||
|
||||
function parsePositiveFrequencyString(value: string): number | null {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
|
||||
if (!numericPrefix) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const chunks = numericPrefix.split(',');
|
||||
const normalizedNumber =
|
||||
chunks.length <= 1
|
||||
? chunks[0] ?? ''
|
||||
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
|
||||
? chunks.join('')
|
||||
: (chunks[0] ?? '');
|
||||
const parsed = Number.parseInt(normalizedNumber, 10);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function parsePositiveFrequencyValue(value: unknown): number | null {
|
||||
const numeric = asPositiveInteger(value);
|
||||
if (numeric !== null) {
|
||||
return numeric;
|
||||
}
|
||||
|
||||
if (typeof value === 'string') {
|
||||
return parsePositiveFrequencyString(value);
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) {
|
||||
const parsed = parsePositiveFrequencyValue(item);
|
||||
if (parsed !== null) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
if (!isObject(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const term = typeof value.term === 'string' ? value.term.trim() : '';
|
||||
const dictionary = typeof value.dictionary === 'string' ? value.dictionary.trim() : '';
|
||||
const rawFrequency = parsePositiveFrequencyValue(value.frequency);
|
||||
const displayValueRaw = value.displayValue;
|
||||
const parsedDisplayFrequency =
|
||||
displayValueRaw !== null && displayValueRaw !== undefined
|
||||
? parsePositiveFrequencyValue(displayValueRaw)
|
||||
: null;
|
||||
const frequency = parsedDisplayFrequency ?? rawFrequency;
|
||||
if (!term || !dictionary || frequency === null) {
|
||||
return null;
|
||||
}
|
||||
const dictionaryPriorityRaw = (value as { dictionaryPriority?: unknown }).dictionaryPriority;
|
||||
const dictionaryPriority =
|
||||
typeof dictionaryPriorityRaw === 'number' && Number.isFinite(dictionaryPriorityRaw)
|
||||
? Math.max(0, Math.floor(dictionaryPriorityRaw))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
|
||||
const reading =
|
||||
value.reading === null
|
||||
? null
|
||||
: typeof value.reading === 'string'
|
||||
? value.reading
|
||||
: null;
|
||||
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
|
||||
const displayValueParsed = value.displayValueParsed === true;
|
||||
|
||||
return {
|
||||
term,
|
||||
reading,
|
||||
dictionary,
|
||||
dictionaryPriority,
|
||||
frequency,
|
||||
displayValue,
|
||||
displayValueParsed,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
|
||||
const normalized: YomitanTermReadingPair[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const pair of termReadingList) {
|
||||
const term = typeof pair.term === 'string' ? pair.term.trim() : '';
|
||||
if (!term) {
|
||||
continue;
|
||||
}
|
||||
const reading =
|
||||
typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
|
||||
const key = `${term}\u0000${reading ?? ''}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
normalized.push({ term, reading });
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null {
|
||||
if (!isObject(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const profileIndexRaw = value.profileIndex ?? value.profileCurrent;
|
||||
const profileIndex =
|
||||
typeof profileIndexRaw === 'number' && Number.isFinite(profileIndexRaw)
|
||||
? Math.max(0, Math.floor(profileIndexRaw))
|
||||
: 0;
|
||||
const scanLengthRaw =
|
||||
value.scanLength ??
|
||||
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||
? (value.profiles[profileIndex] as { options?: { scanning?: { length?: unknown } } }).options
|
||||
?.scanning?.length
|
||||
: undefined);
|
||||
const scanLength =
|
||||
typeof scanLengthRaw === 'number' && Number.isFinite(scanLengthRaw)
|
||||
? Math.max(1, Math.floor(scanLengthRaw))
|
||||
: DEFAULT_YOMITAN_SCAN_LENGTH;
|
||||
const dictionariesRaw =
|
||||
value.dictionaries ??
|
||||
(Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
|
||||
? (value.profiles[profileIndex] as { options?: { dictionaries?: unknown[] } }).options
|
||||
?.dictionaries
|
||||
: undefined);
|
||||
const dictionaries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.map((entry, index) => {
|
||||
if (typeof entry === 'string') {
|
||||
return { name: entry.trim(), priority: index };
|
||||
}
|
||||
if (!isObject(entry) || entry.enabled === false || typeof entry.name !== 'string') {
|
||||
return null;
|
||||
}
|
||||
const normalizedName = entry.name.trim();
|
||||
if (!normalizedName) {
|
||||
return null;
|
||||
}
|
||||
const priorityRaw = (entry as { id?: unknown }).id;
|
||||
const priority =
|
||||
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
|
||||
? Math.max(0, Math.floor(priorityRaw))
|
||||
: index;
|
||||
return { name: normalizedName, priority };
|
||||
})
|
||||
.filter((entry): entry is { name: string; priority: number } => entry !== null)
|
||||
.sort((a, b) => a.priority - b.priority)
|
||||
.map((entry) => entry.name)
|
||||
.filter((entry) => entry.length > 0)
|
||||
: [];
|
||||
const dictionaryPriorityByNameRaw = value.dictionaryPriorityByName;
|
||||
const dictionaryPriorityByName: Record<string, number> = {};
|
||||
if (isObject(dictionaryPriorityByNameRaw)) {
|
||||
for (const [name, priorityRaw] of Object.entries(dictionaryPriorityByNameRaw)) {
|
||||
if (typeof priorityRaw !== 'number' || !Number.isFinite(priorityRaw)) {
|
||||
continue;
|
||||
}
|
||||
const normalizedName = name.trim();
|
||||
if (!normalizedName) {
|
||||
continue;
|
||||
}
|
||||
dictionaryPriorityByName[normalizedName] = Math.max(0, Math.floor(priorityRaw));
|
||||
}
|
||||
}
|
||||
|
||||
for (let index = 0; index < dictionaries.length; index += 1) {
|
||||
const dictionary = dictionaries[index];
|
||||
if (!dictionary) {
|
||||
continue;
|
||||
}
|
||||
if (dictionaryPriorityByName[dictionary] === undefined) {
|
||||
dictionaryPriorityByName[dictionary] = index;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
profileIndex,
|
||||
scanLength,
|
||||
dictionaries,
|
||||
dictionaryPriorityByName,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeFrequencyEntriesWithPriority(
|
||||
rawResult: unknown[],
|
||||
dictionaryPriorityByName: Record<string, number>,
|
||||
): YomitanTermFrequency[] {
|
||||
const normalized: YomitanTermFrequency[] = [];
|
||||
for (const entry of rawResult) {
|
||||
const frequency = toYomitanTermFrequency(entry);
|
||||
if (!frequency) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
|
||||
normalized.push({
|
||||
...frequency,
|
||||
dictionaryPriority:
|
||||
dictionaryPriority !== undefined ? dictionaryPriority : frequency.dictionaryPriority,
|
||||
});
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function groupFrequencyEntriesByPair(
|
||||
entries: YomitanTermFrequency[],
|
||||
): Map<string, YomitanTermFrequency[]> {
|
||||
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||
for (const entry of entries) {
|
||||
const reading =
|
||||
typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
|
||||
const key = makeTermReadingCacheKey(entry.term.trim(), reading);
|
||||
const existing = grouped.get(key);
|
||||
if (existing) {
|
||||
existing.push(entry);
|
||||
continue;
|
||||
}
|
||||
grouped.set(key, [entry]);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function groupFrequencyEntriesByTerm(
|
||||
entries: YomitanTermFrequency[],
|
||||
): Map<string, YomitanTermFrequency[]> {
|
||||
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||
for (const entry of entries) {
|
||||
const term = entry.term.trim();
|
||||
if (!term) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = grouped.get(term);
|
||||
if (existing) {
|
||||
existing.push(entry);
|
||||
continue;
|
||||
}
|
||||
grouped.set(term, [entry]);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
async function requestYomitanProfileMetadata(
|
||||
parserWindow: BrowserWindow,
|
||||
logger: LoggerLike,
|
||||
): Promise<YomitanProfileMetadata | null> {
|
||||
const cached = yomitanProfileMetadataByWindow.get(parserWindow);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex =
|
||||
typeof optionsFull.profileCurrent === "number" && Number.isFinite(optionsFull.profileCurrent)
|
||||
? Math.max(0, Math.floor(optionsFull.profileCurrent))
|
||||
: 0;
|
||||
const scanLengthRaw = optionsFull.profiles?.[profileIndex]?.options?.scanning?.length;
|
||||
const scanLength =
|
||||
typeof scanLengthRaw === "number" && Number.isFinite(scanLengthRaw)
|
||||
? Math.max(1, Math.floor(scanLengthRaw))
|
||||
: ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
|
||||
const dictionaryEntries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
|
||||
.map((entry, index) => ({
|
||||
name: entry.name,
|
||||
id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.max(0, Math.floor(entry.id)) : index
|
||||
}))
|
||||
.sort((a, b) => a.id - b.id)
|
||||
: [];
|
||||
const dictionaries = dictionaryEntries.map((entry) => entry.name);
|
||||
const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
|
||||
acc[entry.name] = index;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawMetadata = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const metadata = toYomitanProfileMetadata(rawMetadata);
|
||||
if (!metadata) {
|
||||
return null;
|
||||
}
|
||||
yomitanProfileMetadataByWindow.set(parserWindow, metadata);
|
||||
return metadata;
|
||||
} catch (err) {
|
||||
logger.error('Yomitan parser metadata request failed:', (err as Error).message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function ensureYomitanParserWindow(
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
@@ -58,6 +448,7 @@ async function ensureYomitanParserWindow(
|
||||
);
|
||||
|
||||
parserWindow.on('closed', () => {
|
||||
clearWindowCaches(parserWindow);
|
||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||
deps.setYomitanParserWindow(null);
|
||||
deps.setYomitanParserReadyPromise(null);
|
||||
@@ -77,6 +468,7 @@ async function ensureYomitanParserWindow(
|
||||
if (!parserWindow.isDestroyed()) {
|
||||
parserWindow.destroy();
|
||||
}
|
||||
clearWindowCaches(parserWindow);
|
||||
if (deps.getYomitanParserWindow() === parserWindow) {
|
||||
deps.setYomitanParserWindow(null);
|
||||
deps.setYomitanParserReadyPromise(null);
|
||||
@@ -108,7 +500,40 @@ export async function requestYomitanParseResults(
|
||||
return null;
|
||||
}
|
||||
|
||||
const script = `
|
||||
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||
const script =
|
||||
metadata !== null
|
||||
? `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
return await invoke("parseText", {
|
||||
text: ${JSON.stringify(text)},
|
||||
optionsContext: { index: ${metadata.profileIndex} },
|
||||
scanLength: ${metadata.scanLength},
|
||||
useInternalParser: true,
|
||||
useMecabParser: true
|
||||
});
|
||||
})();
|
||||
`
|
||||
: `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
@@ -132,7 +557,7 @@ export async function requestYomitanParseResults(
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex = optionsFull.profileCurrent;
|
||||
const scanLength =
|
||||
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? 40;
|
||||
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? ${DEFAULT_YOMITAN_SCAN_LENGTH};
|
||||
|
||||
return await invoke("parseText", {
|
||||
text: ${JSON.stringify(text)},
|
||||
@@ -152,3 +577,278 @@ export async function requestYomitanParseResults(
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function requestYomitanTermFrequencies(
|
||||
termReadingList: YomitanTermReadingPair[],
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<YomitanTermFrequency[]> {
|
||||
const normalizedTermReadingList = normalizeTermReadingList(termReadingList);
|
||||
const yomitanExt = deps.getYomitanExt();
|
||||
if (normalizedTermReadingList.length === 0 || !yomitanExt) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||
const frequencyCache = getWindowFrequencyCache(parserWindow);
|
||||
const missingTermReadingList: YomitanTermReadingPair[] = [];
|
||||
|
||||
const buildCachedResult = (): YomitanTermFrequency[] => {
|
||||
const result: YomitanTermFrequency[] = [];
|
||||
for (const pair of normalizedTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const cached = frequencyCache.get(key);
|
||||
if (cached && cached.length > 0) {
|
||||
result.push(...cached);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
for (const pair of normalizedTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
if (!frequencyCache.has(key)) {
|
||||
missingTermReadingList.push(pair);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingTermReadingList.length === 0) {
|
||||
return buildCachedResult();
|
||||
}
|
||||
|
||||
if (metadata && metadata.dictionaries.length > 0) {
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
return await invoke("getTermFrequencies", {
|
||||
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||
dictionaries: ${JSON.stringify(metadata.dictionaries)}
|
||||
});
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const fetchedEntries = Array.isArray(rawResult)
|
||||
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
|
||||
: [];
|
||||
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||
|
||||
for (const pair of missingTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const exactEntries = groupedByPair.get(key);
|
||||
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||
}
|
||||
|
||||
const cachedResult = buildCachedResult();
|
||||
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||
return [...cachedResult, ...unmatchedEntries];
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
}
|
||||
|
||||
return buildCachedResult();
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex = optionsFull.profileCurrent;
|
||||
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
|
||||
const dictionaryEntries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
|
||||
.map((entry, index) => ({
|
||||
name: entry.name,
|
||||
id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.floor(entry.id) : index
|
||||
}))
|
||||
.sort((a, b) => a.id - b.id)
|
||||
: [];
|
||||
const dictionaries = dictionaryEntries.map((entry) => entry.name);
|
||||
const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
|
||||
acc[entry.name] = index;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
if (dictionaries.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const rawFrequencies = await invoke("getTermFrequencies", {
|
||||
termReadingList: ${JSON.stringify(missingTermReadingList)},
|
||||
dictionaries
|
||||
});
|
||||
|
||||
if (!Array.isArray(rawFrequencies)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawFrequencies
|
||||
.filter((entry) => entry && typeof entry === "object")
|
||||
.map((entry) => ({
|
||||
...entry,
|
||||
dictionaryPriority:
|
||||
typeof entry.dictionary === "string" && dictionaryPriorityByName[entry.dictionary] !== undefined
|
||||
? dictionaryPriorityByName[entry.dictionary]
|
||||
: Number.MAX_SAFE_INTEGER
|
||||
}));
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const fetchedEntries = Array.isArray(rawResult)
|
||||
? rawResult
|
||||
.map((entry) => toYomitanTermFrequency(entry))
|
||||
.filter((entry): entry is YomitanTermFrequency => entry !== null)
|
||||
: [];
|
||||
const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
|
||||
const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
|
||||
const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
|
||||
for (const pair of missingTermReadingList) {
|
||||
const key = makeTermReadingCacheKey(pair.term, pair.reading);
|
||||
const exactEntries = groupedByPair.get(key);
|
||||
const termEntries = groupedByTerm.get(pair.term) ?? [];
|
||||
frequencyCache.set(key, exactEntries ?? termEntries);
|
||||
}
|
||||
const cachedResult = buildCachedResult();
|
||||
const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
|
||||
return [...cachedResult, ...unmatchedEntries];
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
return buildCachedResult();
|
||||
}
|
||||
}
|
||||
|
||||
export async function syncYomitanDefaultAnkiServer(
|
||||
serverUrl: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedTargetServer = serverUrl.trim();
|
||||
if (!normalizedTargetServer) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const targetServer = ${JSON.stringify(normalizedTargetServer)};
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
|
||||
if (profiles.length === 0) {
|
||||
return { updated: false, reason: "no-profiles" };
|
||||
}
|
||||
|
||||
const defaultProfile = profiles[0];
|
||||
if (!defaultProfile || typeof defaultProfile !== "object") {
|
||||
return { updated: false, reason: "invalid-default-profile" };
|
||||
}
|
||||
|
||||
defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object"
|
||||
? defaultProfile.options
|
||||
: {};
|
||||
defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object"
|
||||
? defaultProfile.options.anki
|
||||
: {};
|
||||
|
||||
const currentServerRaw = defaultProfile.options.anki.server;
|
||||
const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
|
||||
const canReplaceDefault =
|
||||
currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
|
||||
if (!canReplaceDefault || currentServer === targetServer) {
|
||||
return { updated: false, reason: "no-change", currentServer, targetServer };
|
||||
}
|
||||
|
||||
defaultProfile.options.anki.server = targetServer;
|
||||
await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
|
||||
return { updated: true, currentServer, targetServer };
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
const updated =
|
||||
typeof result === 'object' &&
|
||||
result !== null &&
|
||||
(result as { updated?: unknown }).updated === true;
|
||||
if (updated) {
|
||||
logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
} catch (err) {
|
||||
logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user