Add vendor dict fallback logic

This commit is contained in:
2026-02-15 22:45:03 -08:00
parent dae1f817e0
commit 01a48f4714
21 changed files with 1194 additions and 19 deletions

View File

@@ -190,6 +190,75 @@ test("tokenizeSubtitleService skips JLPT lookups when disabled", async () => {
assert.equal(lookupCalls, 0);
});
test("tokenizeSubtitleService applies frequency dictionary ranks", async () => {
const result = await tokenizeSubtitleService(
"猫です",
makeDeps({
getFrequencyDictionaryEnabled: () => true,
tokenizeWithMecab: async () => [
{
headword: "猫",
surface: "猫",
reading: "ネコ",
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: "です",
surface: "です",
reading: "デス",
startPos: 1,
endPos: 2,
partOfSpeech: PartOfSpeech.bound_auxiliary,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
getFrequencyRank: (text) => (text === "猫" ? 23 : 1200),
}),
);
assert.equal(result.tokens?.length, 2);
assert.equal(result.tokens?.[0]?.frequencyRank, 23);
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
});
test("tokenizeSubtitleService skips frequency lookups when disabled", async () => {
let frequencyCalls = 0;
const result = await tokenizeSubtitleService(
"猫",
makeDeps({
getFrequencyDictionaryEnabled: () => false,
tokenizeWithMecab: async () => [
{
headword: "猫",
surface: "猫",
reading: "ネコ",
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
getFrequencyRank: () => {
frequencyCalls += 1;
return 10;
},
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
assert.equal(frequencyCalls, 0);
});
test("tokenizeSubtitleService skips JLPT level for excluded demonstratives", async () => {
const result = await tokenizeSubtitleService(
"この",