fix: preserve known highlighting for filtered tokens

This commit is contained in:
2026-05-03 22:03:42 -07:00
parent 00a94d6bd1
commit 9bcea2fc5f
6 changed files with 158 additions and 68 deletions
@@ -0,0 +1,53 @@
---
id: TASK-333
title: Suppress aru subtitle annotations
status: Done
assignee: []
created_date: '2026-05-04 04:39'
updated_date: '2026-05-04 05:02'
labels:
- tokenizer
- annotations
- bug
dependencies: []
priority: medium
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Add `ある` / `有る` to the subtitle annotation suppression path so `aru` tokens remain hoverable and never receive N+1, JLPT, frequency, or name-match annotation metadata. Known-word highlighting is special: if a filtered `aru` token is known and known highlighting is enabled, it should still render as known.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 `ある` and kanji headword/surface variants such as `有る` are excluded by the subtitle annotation filter.
- [x] #2 Annotation stripping clears N+1, JLPT, frequency, and name metadata for `aru` tokens while preserving token hover data.
- [x] #3 Known-word highlighting still applies to filtered tokens, including `aru`, when known-word lookup marks them known.
- [x] #4 Regression coverage fails before the fix and passes after.
<!-- AC:END -->
## Implementation Plan
<!-- SECTION:PLAN:BEGIN -->
1. Add `ある`/`有る`/`在る` to the shared subtitle annotation hard-exclusion terms.
2. Preserve/recompute known-word status for filtered tokens while stripping N+1, JLPT, frequency, and name metadata.
3. Add RED/GREEN unit and tokenizer regression coverage, plus a changelog fragment.
4. Run targeted tests and full handoff gate.
<!-- SECTION:PLAN:END -->
## Implementation Notes
<!-- SECTION:NOTES:BEGIN -->
TDD path: added failing annotation-stage coverage first. Initial implementation made targeted tests pass, then broader tokenizer coverage revealed an older fixture expecting `ある` to remain lexical; updated that integration expectation to the new requested behavior. Follow-up correction: known-word highlighting is the lone annotation exception for filtered tokens, so the strip path now preserves known state and `annotateTokens` recomputes known status for filtered tokens while still clearing N+1/JLPT/frequency/name metadata.
<!-- SECTION:NOTES:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Suppressed non-known subtitle annotations for `aru` existence verbs by adding `ある`, `有る`, and `在る` to the shared hard-exclusion list. Corrected the filtered-token path so known-word highlighting still applies whenever known highlighting is enabled; filtered tokens now keep/gain `isKnown` but still lose N+1, JLPT, frequency, and name metadata.
Added and updated annotation-stage and tokenizer regression coverage for `aru`, particles, helper fragments, interjections, and other filtered known tokens. Added `changes/333-aru-annotation-filter.md`.
Validation passed: RED failures observed before implementation/correction; `bun test src/core/services/tokenizer/annotation-stage.test.ts`; `bun test src/core/services/tokenizer.test.ts`; `bun run typecheck`; `bun run format:check:src`; `bun run changelog:lint`; `bun run test:fast`; `bun run test:env`; `bun run build`; `bun run test:smoke:dist`.
<!-- SECTION:FINAL_SUMMARY:END -->
+4
View File
@@ -0,0 +1,4 @@
type: fixed
area: tokenizer
- Suppressed N+1, JLPT, frequency, and name styling for `ある` / `有る` existence verbs while still allowing known-word highlighting.
+12 -12
View File
@@ -129,7 +129,7 @@ test('tokenizeSubtitle splits same-line grammar endings before applying annotati
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5'); assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
assert.equal(result.tokens?.[0]?.frequencyRank, 40); assert.equal(result.tokens?.[0]?.frequencyRank, 40);
assert.equal(result.tokens?.[1]?.surface, 'です'); assert.equal(result.tokens?.[1]?.surface, 'です');
assert.equal(result.tokens?.[1]?.isKnown, false); assert.equal(result.tokens?.[1]?.isKnown, true);
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false); assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined); assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
assert.equal(result.tokens?.[1]?.jlptLevel, undefined); assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
@@ -3893,7 +3893,7 @@ test('tokenizeSubtitle clears all annotations for kana-only demonstrative helper
{ {
surface: 'これで', surface: 'これで',
headword: 'これ', headword: 'これ',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -4008,7 +4008,7 @@ test('tokenizeSubtitle clears all annotations for explanatory pondering endings'
{ {
surface: 'のかな', surface: 'のかな',
headword: 'の', headword: 'の',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -4306,7 +4306,7 @@ test('tokenizeSubtitle clears all annotations for explanatory contrast endings',
); );
}); });
test('tokenizeSubtitle clears annotations for ja-nai explanatory endings while preserving lexical content', async () => { test('tokenizeSubtitle clears annotations for ja-nai explanatory endings and aru verbs', async () => {
const result = await tokenizeSubtitle( const result = await tokenizeSubtitle(
'みたいなのあるじゃないですか', 'みたいなのあるじゃないですか',
makeDepsFromYomitanTokens( makeDepsFromYomitanTokens(
@@ -4322,7 +4322,7 @@ test('tokenizeSubtitle clears annotations for ja-nai explanatory endings while p
text === 'みたい' ? 320 : text === 'ある' ? 240 : text === 'じゃない' ? 80 : null, text === 'みたい' ? 320 : text === 'ある' ? 240 : text === 'じゃない' ? 80 : null,
getJlptLevel: (text) => getJlptLevel: (text) =>
text === 'みたい' ? 'N4' : text === 'ある' ? 'N5' : text === 'じゃない' ? 'N5' : null, text === 'みたい' ? 'N4' : text === 'ある' ? 'N5' : text === 'じゃない' ? 'N5' : null,
isKnownWord: (text) => text === 'みたい' || text === 'の', isKnownWord: (text) => text === 'みたい' || text === 'の' || text === 'ある',
getMinSentenceWordsForNPlusOne: () => 1, getMinSentenceWordsForNPlusOne: () => 1,
tokenizeWithMecab: async () => [ tokenizeWithMecab: async () => [
{ {
@@ -4447,10 +4447,10 @@ test('tokenizeSubtitle clears annotations for ja-nai explanatory endings while p
{ {
surface: 'ある', surface: 'ある',
headword: 'ある', headword: 'ある',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: 240, frequencyRank: undefined,
jlptLevel: 'N5', jlptLevel: undefined,
}, },
); );
}); });
@@ -4492,7 +4492,7 @@ test('tokenizeSubtitle clears annotations for standalone polite copula endings w
{ {
surface: 'ですよ', surface: 'ですよ',
headword: 'です', headword: 'です',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -4819,7 +4819,7 @@ test('tokenizeSubtitle clears annotations for auxiliary inflection fragments whi
{ {
surface: 'れた', surface: 'れた',
headword: 'れる', headword: 'れる',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -4956,7 +4956,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
{ {
surface: 'てく', surface: 'てく',
headword: 'てく', headword: 'てく',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -4967,7 +4967,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
{ {
surface: 'れた', surface: 'れた',
headword: 'れる', headword: 'れる',
isKnown: false, isKnown: true,
isNPlusOneTarget: false, isNPlusOneTarget: false,
frequencyRank: undefined, frequencyRank: undefined,
jlptLevel: undefined, jlptLevel: undefined,
@@ -608,6 +608,29 @@ test('shouldExcludeTokenFromSubtitleAnnotations excludes bare くれ auxiliary f
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true); assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
}); });
test('shouldExcludeTokenFromSubtitleAnnotations excludes aru existence verbs', () => {
for (const token of [
makeToken({
surface: 'ある',
headword: 'ある',
reading: 'アル',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
}),
makeToken({
surface: '有る',
headword: '有る',
reading: 'アル',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
}),
]) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone quote particle and auxiliary grammar terms', () => { test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone quote particle and auxiliary grammar terms', () => {
for (const token of [ for (const token of [
makeToken({ makeToken({
@@ -654,7 +677,7 @@ test('shouldExcludeTokenFromSubtitleAnnotations excludes single-kana surface fra
} }
}); });
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => { test('stripSubtitleAnnotationMetadata keeps known hover data while clearing non-known annotation fields', () => {
const token = makeToken({ const token = makeToken({
surface: 'は', surface: 'は',
headword: 'は', headword: 'は',
@@ -670,7 +693,6 @@ test('stripSubtitleAnnotationMetadata keeps token hover data while clearing anno
assert.deepEqual(stripSubtitleAnnotationMetadata(token), { assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
...token, ...token,
isKnown: false,
isNPlusOneTarget: false, isNPlusOneTarget: false,
isNameMatch: false, isNameMatch: false,
jlptLevel: undefined, jlptLevel: undefined,
@@ -876,8 +898,8 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, false);
assert.equal(result[1]?.isKnown, false); assert.equal(result[1]?.isKnown, true);
assert.equal(result[2]?.isKnown, false); assert.equal(result[2]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
}); });
@@ -1330,13 +1352,13 @@ test('annotateTokens applies one shared exclusion gate across known N+1 frequenc
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for kana-only non-independent noun helper merges', () => { test('annotateTokens keeps known status while clearing other annotations for kana-only non-independent noun helper merges', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'ことに', surface: 'ことに',
@@ -1360,13 +1382,13 @@ test('annotateTokens clears all annotations for kana-only non-independent noun h
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for standalone auxiliary inflection fragments', () => { test('annotateTokens keeps known status while clearing other annotations for standalone auxiliary inflection fragments', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'れる', surface: 'れる',
@@ -1402,14 +1424,14 @@ test('annotateTokens clears all annotations for standalone auxiliary inflection
); );
for (const token of result) { for (const token of result) {
assert.equal(token.isKnown, false, token.surface); assert.equal(token.isKnown, true, token.surface);
assert.equal(token.isNPlusOneTarget, false, token.surface); assert.equal(token.isNPlusOneTarget, false, token.surface);
assert.equal(token.frequencyRank, undefined, token.surface); assert.equal(token.frequencyRank, undefined, token.surface);
assert.equal(token.jlptLevel, undefined, token.surface); assert.equal(token.jlptLevel, undefined, token.surface);
} }
}); });
test('annotateTokens clears all annotations for auxiliary-only te-kureru helper spans', () => { test('annotateTokens keeps known status while clearing other annotations for auxiliary-only te-kureru helper spans', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'てく', surface: 'てく',
@@ -1445,7 +1467,7 @@ test('annotateTokens clears all annotations for auxiliary-only te-kureru helper
); );
for (const token of result) { for (const token of result) {
assert.equal(token.isKnown, false, token.surface); assert.equal(token.isKnown, true, token.surface);
assert.equal(token.isNPlusOneTarget, false, token.surface); assert.equal(token.isNPlusOneTarget, false, token.surface);
assert.equal(token.frequencyRank, undefined, token.surface); assert.equal(token.frequencyRank, undefined, token.surface);
assert.equal(token.jlptLevel, undefined, token.surface); assert.equal(token.jlptLevel, undefined, token.surface);
@@ -1481,7 +1503,7 @@ test('annotateTokens keeps lexical くれる forms eligible for annotation', ()
assert.equal(result[0]?.jlptLevel, 'N4'); assert.equal(result[0]?.jlptLevel, 'N4');
}); });
test('annotateTokens clears all annotations for standalone して helper fragments', () => { test('annotateTokens keeps known status while clearing other annotations for standalone して helper fragments', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'してる', surface: 'してる',
@@ -1505,13 +1527,13 @@ test('annotateTokens clears all annotations for standalone して helper fragmen
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for standalone particle fragments without POS tags', () => { test('annotateTokens keeps known status while clearing other annotations for standalone particle fragments without POS tags', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'と', surface: 'と',
@@ -1535,7 +1557,7 @@ test('annotateTokens clears all annotations for standalone particle fragments wi
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
@@ -1591,7 +1613,7 @@ test('annotateTokens does not mark standalone connective particles as N+1', () =
assert.equal(result[1]?.jlptLevel, undefined); assert.equal(result[1]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for rhetorical もんか grammar particle phrases', () => { test('annotateTokens keeps known status while clearing other annotations for rhetorical もんか grammar particle phrases', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'もんか', surface: 'もんか',
@@ -1615,13 +1637,13 @@ test('annotateTokens clears all annotations for rhetorical もんか grammar par
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for bare くれ auxiliary fragments', () => { test('annotateTokens keeps known status while clearing other annotations for bare くれ auxiliary fragments', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'くれ', surface: 'くれ',
@@ -1645,13 +1667,50 @@ test('annotateTokens clears all annotations for bare くれ auxiliary fragments'
{ minSentenceWordsForNPlusOne: 1 }, { minSentenceWordsForNPlusOne: 1 },
); );
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
}); });
test('annotateTokens clears all annotations for standalone quote particle and auxiliary grammar terms', () => { test('annotateTokens keeps known status while clearing other annotations for aru existence verbs', () => {
const tokens = [
makeToken({
surface: '有る',
headword: '有る',
reading: 'アル',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
startPos: 0,
endPos: 2,
frequencyRank: 8447,
isKnown: true,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === '有る' || text === 'ある',
getJlptLevel: (text) => (text === '有る' || text === 'ある' ? 'N5' : null),
}),
{ minSentenceWordsForNPlusOne: 1 },
);
assert.equal(result[0]?.surface, '有る');
assert.equal(result[0]?.headword, '有る');
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.isNameMatch, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
});
test('annotateTokens keeps known status while clearing other annotations for standalone quote particle and auxiliary grammar terms', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'って', surface: 'って',
@@ -1687,14 +1746,14 @@ test('annotateTokens clears all annotations for standalone quote particle and au
); );
for (const token of result) { for (const token of result) {
assert.equal(token.isKnown, false, token.surface); assert.equal(token.isKnown, true, token.surface);
assert.equal(token.isNPlusOneTarget, false, token.surface); assert.equal(token.isNPlusOneTarget, false, token.surface);
assert.equal(token.frequencyRank, undefined, token.surface); assert.equal(token.frequencyRank, undefined, token.surface);
assert.equal(token.jlptLevel, undefined, token.surface); assert.equal(token.jlptLevel, undefined, token.surface);
} }
}); });
test('annotateTokens clears all annotations from standalone あ interjections without POS tags', () => { test('annotateTokens keeps known status while clearing other annotations from standalone あ interjections without POS tags', () => {
const tokens = [ const tokens = [
makeToken({ makeToken({
surface: 'あ', surface: 'あ',
@@ -1724,7 +1783,7 @@ test('annotateTokens clears all annotations from standalone あ interjections wi
assert.equal(result[0]?.surface, 'あ'); assert.equal(result[0]?.surface, 'あ');
assert.equal(result[0]?.headword, 'あ'); assert.equal(result[0]?.headword, 'あ');
assert.equal(result[0]?.reading, 'あ'); assert.equal(result[0]?.reading, 'あ');
assert.equal(result[0]?.isKnown, false); assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false); assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined); assert.equal(result[0]?.jlptLevel, undefined);
@@ -1786,7 +1845,7 @@ test('annotateTokens clears all annotations from expressive subtitle interjectio
); );
for (const token of result.slice(0, 2)) { for (const token of result.slice(0, 2)) {
assert.equal(token.isKnown, false, token.surface); assert.equal(token.isKnown, true, token.surface);
assert.equal(token.isNPlusOneTarget, false, token.surface); assert.equal(token.isNPlusOneTarget, false, token.surface);
assert.equal(token.frequencyRank, undefined, token.surface); assert.equal(token.frequencyRank, undefined, token.surface);
assert.equal(token.jlptLevel, undefined, token.surface); assert.equal(token.jlptLevel, undefined, token.surface);
@@ -559,36 +559,6 @@ function computeTokenKnownStatus(
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading); return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
} }
function computeExcludedTokenKnownStatus(
token: MergedToken,
isKnownWord: (text: string) => boolean,
): boolean {
const normalizedSurface = token.surface.trim();
if (!hasKanjiChar(normalizedSurface)) {
return false;
}
if (normalizedSurface && isKnownWord(normalizedSurface)) {
return true;
}
const normalizedReading = token.reading.trim();
if (
normalizedReading &&
normalizedReading !== normalizedSurface &&
isKnownWord(normalizedReading)
) {
return true;
}
const normalizedHeadword = token.headword.trim();
return (
normalizedHeadword.length > 0 &&
normalizedHeadword === normalizedSurface &&
isKnownWord(normalizedHeadword)
);
}
function filterTokenFrequencyRank( function filterTokenFrequencyRank(
token: MergedToken, token: MergedToken,
pos1Exclusions: ReadonlySet<string>, pos1Exclusions: ReadonlySet<string>,
@@ -657,7 +627,9 @@ export function annotateTokens(
}); });
return { return {
...strippedToken, ...strippedToken,
isKnown: nPlusOneEnabled && computeExcludedTokenKnownStatus(token, deps.isKnownWord), isKnown: nPlusOneEnabled
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
: false,
}; };
} }
@@ -22,6 +22,7 @@ const STANDALONE_GRAMMAR_PARTICLE_PHRASES_SET: ReadonlySet<string> = new Set(
export const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([ export const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'あ', 'あ',
'ああ', 'ああ',
'ある',
'あなた', 'あなた',
'あんた', 'あんた',
'ええ', 'ええ',
@@ -51,6 +52,8 @@ export const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'何だ', '何だ',
'何も', '何も',
'如何した', '如何した',
'有る',
'在る',
'様', '様',
'確かに', '確かに',
'誰も', '誰も',
@@ -507,7 +510,6 @@ export function stripSubtitleAnnotationMetadata(
return { return {
...token, ...token,
isKnown: false,
isNPlusOneTarget: false, isNPlusOneTarget: false,
isNameMatch: false, isNameMatch: false,
jlptLevel: undefined, jlptLevel: undefined,