mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-12 04:19:25 -07:00
fix(subtitle): restore known and JLPT token annotations
This commit is contained in:
@@ -55,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
|
||||
assert.equal(surfaceResult[0]?.isKnown, false);
|
||||
});
|
||||
|
||||
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '大体',
|
||||
headword: '大体',
|
||||
reading: 'だいたい',
|
||||
frequencyRank: 1895,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'だいたい',
|
||||
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.jlptLevel, 'N4');
|
||||
assert.equal(result[0]?.frequencyRank, 1895);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
|
||||
@@ -560,12 +560,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
const candidates = [
|
||||
resolveJlptLookupText(token),
|
||||
token.surface,
|
||||
token.reading,
|
||||
token.headword,
|
||||
].filter(
|
||||
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
@@ -659,7 +654,16 @@ function computeTokenKnownStatus(
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
): boolean {
|
||||
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
|
||||
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
|
||||
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedReading = token.reading.trim();
|
||||
if (!normalizedReading) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
|
||||
}
|
||||
|
||||
function filterTokenFrequencyRank(
|
||||
|
||||
Reference in New Issue
Block a user