fix(tokenizer): preserve known-word highlight when POS filters suppress

- Known-word cache matches now set isKnown=true even for tokens excluded by POS filters
- POS exclusion gate suppresses N+1, frequency, and JLPT only; known status is computed before the gate
- Jellyfin subtitle preload continues after cleanup failures instead of aborting
- Update config docs and option description to document the known-word bypass behavior
This commit is contained in:
2026-05-22 02:27:40 -07:00
parent 3de7ed8b54
commit 9ba7f909b5
9 changed files with 111 additions and 52 deletions
+10 -10
View File
@@ -129,7 +129,7 @@ test('tokenizeSubtitle splits same-line grammar endings before applying annotati
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
assert.equal(result.tokens?.[0]?.frequencyRank, 40);
assert.equal(result.tokens?.[1]?.surface, 'です');
assert.equal(result.tokens?.[1]?.isKnown, false);
assert.equal(result.tokens?.[1]?.isKnown, true);
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
@@ -3365,7 +3365,7 @@ test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and freque
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
});
test('tokenizeSubtitle clears known-word highlight for exact non-independent kanji noun tokens', async () => {
test('tokenizeSubtitle keeps known-word highlight for exact non-independent kanji noun tokens', async () => {
const result = await tokenizeSubtitle(
'その点',
makeDepsFromYomitanTokens(
@@ -3413,7 +3413,7 @@ test('tokenizeSubtitle clears known-word highlight for exact non-independent kan
assert.equal(result.tokens?.length, 2);
assert.equal(result.tokens?.[0]?.isKnown, false);
assert.equal(result.tokens?.[1]?.surface, '点');
assert.equal(result.tokens?.[1]?.isKnown, false);
assert.equal(result.tokens?.[1]?.isKnown, true);
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
@@ -4028,7 +4028,7 @@ test('tokenizeSubtitle clears all annotations for kana-only demonstrative helper
{
surface: 'これで',
headword: 'これ',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -4143,7 +4143,7 @@ test('tokenizeSubtitle clears all annotations for explanatory pondering endings'
{
surface: 'のかな',
headword: 'の',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -4672,7 +4672,7 @@ test('tokenizeSubtitle clears annotations for ja-nai explanatory endings and aru
{
surface: 'ある',
headword: 'ある',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -4717,7 +4717,7 @@ test('tokenizeSubtitle clears annotations for standalone polite copula endings w
{
surface: 'ですよ',
headword: 'です',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -5044,7 +5044,7 @@ test('tokenizeSubtitle clears annotations for auxiliary inflection fragments whi
{
surface: 'れた',
headword: 'れる',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -5181,7 +5181,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
{
surface: 'てく',
headword: 'てく',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
@@ -5192,7 +5192,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
{
surface: 'れた',
headword: 'れる',
isKnown: false,
isKnown: true,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,