fix(subtitle): restore known and JLPT token annotations

This commit is contained in:
2026-03-19 18:03:20 -07:00
parent 1b5f0c6999
commit 43a0d11446
16 changed files with 258 additions and 43 deletions

View File

@@ -55,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
assert.equal(surfaceResult[0]?.isKnown, false);
});
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
const tokens = [
makeToken({
surface: '大体',
headword: '大体',
reading: 'だいたい',
frequencyRank: 1895,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'だいたい',
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
}),
);
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.jlptLevel, 'N4');
assert.equal(result[0]?.frequencyRank, 1895);
});
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
const tokens = [
makeToken({

View File

@@ -560,12 +560,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return false;
}
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.reading,
token.headword,
].filter(
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -659,7 +654,16 @@ function computeTokenKnownStatus(
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
return true;
}
const normalizedReading = token.reading.trim();
if (!normalizedReading) {
return false;
}
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
}
function filterTokenFrequencyRank(