Decouple stats daemon and preserve final mine OSD status

- Run `subminer stats -b` as a dedicated daemon process, independent from the overlay app - Stop Anki progress spinner before showing final `✓`/`x` mine result so it is not overwritten - Keep grammar/noise subtitle tokens hoverable while stripping annotation metadata
2026-05-28 00:55:16 -07:00 · 2026-03-18 23:49:27 -07:00
parent 4d96ebf5c0
commit a954f62f55
32 changed files with 1879 additions and 78 deletions
@@ -130,6 +130,30 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
  assert.doesNotMatch(markup, /data-frequency-rank="12"|data-jlpt-level="N5"|word-jlpt-n5/);
 });

+test('serializeSubtitleMarkup keeps filtered tokens hoverable without annotation attrs', () => {
+  const payload: SubtitleData = {
+    text: 'は',
+    tokens: [
+      {
+        surface: 'は',
+        reading: 'は',
+        headword: 'は',
+        startPos: 0,
+        endPos: 1,
+        partOfSpeech: PartOfSpeech.particle,
+        pos1: '助詞',
+        isMerged: false,
+        isKnown: false,
+        isNPlusOneTarget: false,
+        isNameMatch: false,
+      },
+    ],
+  };
+
+  const markup = serializeSubtitleMarkup(payload, frequencyOptions);
+  assert.equal(markup, '<span class="word" data-reading="は" data-headword="は">は</span>');
+});
+
 test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
  const payload: SubtitleData = {
    text: '字幕',
@@ -1305,7 +1305,7 @@ test('tokenizeSubtitle ignores frequency lookup failures', async () => {
  assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
 });

-test('tokenizeSubtitle skips frequency rank when Yomitan token is enriched as particle by mecab pos1', async () => {
+test('tokenizeSubtitle keeps standalone particle token hoverable while clearing annotation metadata', async () => {
  const result = await tokenizeSubtitle(
    'は',
    makeDeps({
@@ -1350,9 +1350,33 @@ test('tokenizeSubtitle skips frequency rank when Yomitan token is enriched as pa
    }),
  );

-  assert.equal(result.tokens?.length, 1);
-  assert.equal(result.tokens?.[0]?.pos1, '助詞');
-  assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
+  assert.equal(result.text, 'は');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      reading: token.reading,
+      headword: token.headword,
+      pos1: token.pos1,
+      isKnown: token.isKnown,
+      isNPlusOneTarget: token.isNPlusOneTarget,
+      isNameMatch: token.isNameMatch,
+      jlptLevel: token.jlptLevel,
+      frequencyRank: token.frequencyRank,
+    })),
+    [
+      {
+        surface: 'は',
+        reading: 'は',
+        headword: 'は',
+        pos1: '助詞',
+        isKnown: false,
+        isNPlusOneTarget: false,
+        isNameMatch: false,
+        jlptLevel: undefined,
+        frequencyRank: undefined,
+      },
+    ],
+  );
 });

 test('tokenizeSubtitle keeps frequency rank when mecab tags classify token as content-bearing', async () => {
@@ -1460,7 +1484,7 @@ test('tokenizeSubtitle skips JLPT level for excluded demonstratives', async () =
  assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
 });

-test('tokenizeSubtitle excludes repeated kana interjections from annotation payloads entirely', async () => {
+test('tokenizeSubtitle keeps repeated kana interjections tokenized while clearing annotation metadata', async () => {
  const result = await tokenizeSubtitle(
    'ああ',
    makeDeps({
@@ -1491,7 +1515,29 @@ test('tokenizeSubtitle excludes repeated kana interjections from annotation payl
    }),
  );

-  assert.deepEqual(result, { text: 'ああ', tokens: null });
+  assert.equal(result.text, 'ああ');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      headword: token.headword,
+      reading: token.reading,
+      jlptLevel: token.jlptLevel,
+      frequencyRank: token.frequencyRank,
+      isKnown: token.isKnown,
+      isNPlusOneTarget: token.isNPlusOneTarget,
+    })),
+    [
+      {
+        surface: 'ああ',
+        headword: 'ああ',
+        reading: 'ああ',
+        jlptLevel: undefined,
+        frequencyRank: undefined,
+        isKnown: false,
+        isNPlusOneTarget: false,
+      },
+    ],
+  );
 });

 test('tokenizeSubtitle assigns JLPT level to Yomitan tokens', async () => {
@@ -2578,7 +2624,15 @@ test('tokenizeSubtitle keeps correct MeCab pos1 enrichment when Yomitan offsets
  const gaToken = result.tokens?.find((token) => token.surface === 'が');
  const desuToken = result.tokens?.find((token) => token.surface === 'です');
  assert.equal(gaToken?.pos1, '助詞');
+  assert.equal(gaToken?.isKnown, false);
+  assert.equal(gaToken?.isNPlusOneTarget, false);
+  assert.equal(gaToken?.jlptLevel, undefined);
+  assert.equal(gaToken?.frequencyRank, undefined);
  assert.equal(desuToken?.pos1, '助動詞');
+  assert.equal(desuToken?.isKnown, false);
+  assert.equal(desuToken?.isNPlusOneTarget, false);
+  assert.equal(desuToken?.jlptLevel, undefined);
+  assert.equal(desuToken?.frequencyRank, undefined);
  assert.equal(targets.length, 1);
  assert.equal(targets[0]?.surface, '仮面');
 });
@@ -3056,7 +3110,7 @@ test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and freque
  assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
 });

-test('tokenizeSubtitle excludes mecab-tagged interjections from annotation payloads entirely', async () => {
+test('tokenizeSubtitle keeps mecab-tagged interjections tokenized while clearing annotation metadata', async () => {
  const result = await tokenizeSubtitle(
    'ぐはっ',
    makeDepsFromYomitanTokens([{ surface: 'ぐはっ', reading: 'ぐはっ', headword: 'ぐはっ' }], {
@@ -3080,10 +3134,34 @@ test('tokenizeSubtitle excludes mecab-tagged interjections from annotation paylo
    }),
  );

-  assert.deepEqual(result, { text: 'ぐはっ', tokens: null });
+  assert.equal(result.text, 'ぐはっ');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      headword: token.headword,
+      reading: token.reading,
+      pos1: token.pos1,
+      jlptLevel: token.jlptLevel,
+      frequencyRank: token.frequencyRank,
+      isKnown: token.isKnown,
+      isNPlusOneTarget: token.isNPlusOneTarget,
+    })),
+    [
+      {
+        surface: 'ぐはっ',
+        headword: 'ぐはっ',
+        reading: 'ぐはっ',
+        pos1: '感動詞',
+        jlptLevel: undefined,
+        frequencyRank: undefined,
+        isKnown: false,
+        isNPlusOneTarget: false,
+      },
+    ],
+  );
 });

-test('tokenizeSubtitle keeps visible text while excluding interjections from mixed annotation payloads', async () => {
+test('tokenizeSubtitle keeps excluded interjections hoverable while clearing only their annotation metadata', async () => {
  const result = await tokenizeSubtitle(
    'ぐはっ 猫',
    makeDeps({
@@ -3147,8 +3225,261 @@ test('tokenizeSubtitle keeps visible text while excluding interjections from mix
    result.tokens?.map((token) => ({
      surface: token.surface,
      headword: token.headword,
+      frequencyRank: token.frequencyRank,
+      jlptLevel: token.jlptLevel,
    })),
-    [{ surface: '猫', headword: '猫' }],
+    [
+      { surface: 'ぐはっ', headword: 'ぐはっ', frequencyRank: undefined, jlptLevel: undefined },
+      { surface: '猫', headword: '猫', frequencyRank: 11, jlptLevel: 'N5' },
+    ],
+  );
+});
+
+test('tokenizeSubtitle keeps explanatory ending variants hoverable while clearing only their annotation metadata', async () => {
+  const result = await tokenizeSubtitle(
+    '猫んです',
+    makeDepsFromYomitanTokens(
+      [
+        { surface: '猫', reading: 'ねこ', headword: '猫' },
+        { surface: 'んです', reading: 'んです', headword: 'ん' },
+      ],
+      {
+        getFrequencyDictionaryEnabled: () => true,
+        getFrequencyRank: (text) => (text === '猫' ? 11 : 500),
+        getJlptLevel: (text) => (text === '猫' ? 'N5' : null),
+        tokenizeWithMecab: async () => [
+          {
+            headword: '猫',
+            surface: '猫',
+            reading: 'ネコ',
+            startPos: 0,
+            endPos: 1,
+            partOfSpeech: PartOfSpeech.noun,
+            pos1: '名詞',
+            pos2: '一般',
+            isMerged: false,
+            isKnown: false,
+            isNPlusOneTarget: false,
+          },
+          {
+            headword: 'ん',
+            surface: 'ん',
+            reading: 'ン',
+            startPos: 1,
+            endPos: 2,
+            partOfSpeech: PartOfSpeech.other,
+            pos1: '名詞',
+            pos2: '非自立',
+            isMerged: false,
+            isKnown: false,
+            isNPlusOneTarget: false,
+          },
+          {
+            headword: 'です',
+            surface: 'です',
+            reading: 'デス',
+            startPos: 2,
+            endPos: 4,
+            partOfSpeech: PartOfSpeech.bound_auxiliary,
+            pos1: '助動詞',
+            isMerged: false,
+            isKnown: false,
+            isNPlusOneTarget: false,
+          },
+        ],
+      },
+    ),
+  );
+
+  assert.equal(result.text, '猫んです');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      headword: token.headword,
+      jlptLevel: token.jlptLevel,
+      frequencyRank: token.frequencyRank,
+    })),
+    [
+      { surface: '猫', headword: '猫', jlptLevel: 'N5', frequencyRank: 11 },
+      { surface: 'んです', headword: 'ん', jlptLevel: undefined, frequencyRank: undefined },
+    ],
+  );
+});
+
+test('tokenizeSubtitle keeps standalone grammar-only tokens hoverable while clearing only their annotation metadata', async () => {
+  const result = await tokenizeSubtitle(
+    '私はこの猫です',
+    makeDeps({
+      getFrequencyDictionaryEnabled: () => true,
+      getFrequencyRank: (text) => (text === '私' ? 50 : text === '猫' ? 11 : 500),
+      getJlptLevel: (text) => (text === '私' ? 'N5' : text === '猫' ? 'N5' : null),
+      getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
+      getYomitanParserWindow: () =>
+        ({
+          isDestroyed: () => false,
+          webContents: {
+            executeJavaScript: async (script: string) => {
+              if (script.includes('getTermFrequencies')) {
+                return [];
+              }
+
+              return [
+                {
+                  source: 'scanning-parser',
+                  index: 0,
+                  content: [
+                    [{ text: '私', reading: 'わたし', headwords: [[{ term: '私' }]] }],
+                    [{ text: 'は', reading: 'は', headwords: [[{ term: 'は' }]] }],
+                    [{ text: 'この', reading: 'この', headwords: [[{ term: 'この' }]] }],
+                    [{ text: '猫', reading: 'ねこ', headwords: [[{ term: '猫' }]] }],
+                    [{ text: 'です', reading: 'です', headwords: [[{ term: 'です' }]] }],
+                  ],
+                },
+              ];
+            },
+          },
+        }) as unknown as Electron.BrowserWindow,
+      tokenizeWithMecab: async () => [
+        {
+          headword: '私',
+          surface: '私',
+          reading: 'ワタシ',
+          startPos: 0,
+          endPos: 1,
+          partOfSpeech: PartOfSpeech.noun,
+          pos1: '名詞',
+          pos2: '代名詞',
+          isMerged: true,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+        {
+          headword: 'は',
+          surface: 'は',
+          reading: 'ハ',
+          startPos: 1,
+          endPos: 2,
+          partOfSpeech: PartOfSpeech.particle,
+          pos1: '助詞',
+          pos2: '係助詞',
+          isMerged: true,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+        {
+          headword: 'この',
+          surface: 'この',
+          reading: 'コノ',
+          startPos: 2,
+          endPos: 4,
+          partOfSpeech: PartOfSpeech.other,
+          pos1: '連体詞',
+          isMerged: true,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+        {
+          headword: '猫',
+          surface: '猫',
+          reading: 'ネコ',
+          startPos: 4,
+          endPos: 5,
+          partOfSpeech: PartOfSpeech.noun,
+          pos1: '名詞',
+          pos2: '一般',
+          isMerged: true,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+        {
+          headword: 'です',
+          surface: 'です',
+          reading: 'デス',
+          startPos: 5,
+          endPos: 7,
+          partOfSpeech: PartOfSpeech.bound_auxiliary,
+          pos1: '助動詞',
+          isMerged: true,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+      ],
+    }),
+  );
+
+  assert.equal(result.text, '私はこの猫です');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      headword: token.headword,
+      frequencyRank: token.frequencyRank,
+      jlptLevel: token.jlptLevel,
+    })),
+    [
+      { surface: '私', headword: '私', frequencyRank: 50, jlptLevel: 'N5' },
+      { surface: 'は', headword: 'は', frequencyRank: undefined, jlptLevel: undefined },
+      { surface: 'この', headword: 'この', frequencyRank: undefined, jlptLevel: undefined },
+      { surface: '猫', headword: '猫', frequencyRank: 11, jlptLevel: 'N5' },
+      { surface: 'です', headword: 'です', frequencyRank: undefined, jlptLevel: undefined },
+    ],
+  );
+});
+
+test('tokenizeSubtitle keeps trailing quote-particle merged tokens hoverable while clearing only their annotation metadata', async () => {
+  const result = await tokenizeSubtitle(
+    'どうしてもって',
+    makeDepsFromYomitanTokens([{ surface: 'どうしてもって', reading: 'どうしてもって', headword: 'どうしても' }], {
+      getFrequencyDictionaryEnabled: () => true,
+      getFrequencyRank: (text) => (text === 'どうしても' ? 123 : null),
+      getJlptLevel: (text) => (text === 'どうしても' ? 'N3' : null),
+      tokenizeWithMecab: async () => [
+        {
+          headword: 'どうしても',
+          surface: 'どうしても',
+          reading: 'ドウシテモ',
+          startPos: 0,
+          endPos: 5,
+          partOfSpeech: PartOfSpeech.other,
+          pos1: '副詞',
+          pos2: '一般',
+          isMerged: false,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+        {
+          headword: 'って',
+          surface: 'って',
+          reading: 'ッテ',
+          startPos: 5,
+          endPos: 7,
+          partOfSpeech: PartOfSpeech.particle,
+          pos1: '助詞',
+          pos2: '格助詞',
+          isMerged: false,
+          isKnown: false,
+          isNPlusOneTarget: false,
+        },
+      ],
+      getMinSentenceWordsForNPlusOne: () => 1,
+    }),
+  );
+
+  assert.equal(result.text, 'どうしてもって');
+  assert.deepEqual(
+    result.tokens?.map((token) => ({
+      surface: token.surface,
+      headword: token.headword,
+      jlptLevel: token.jlptLevel,
+      frequencyRank: token.frequencyRank,
+    })),
+    [
+      {
+        surface: 'どうしてもって',
+        headword: 'どうしても',
+        jlptLevel: undefined,
+        frequencyRank: undefined,
+      },
+    ],
  );
 });

@@ -178,7 +178,7 @@ async function applyAnnotationStage(
  );
 }

-async function filterSubtitleAnnotationTokens(tokens: MergedToken[]): Promise<MergedToken[]> {
+async function stripSubtitleAnnotationMetadata(tokens: MergedToken[]): Promise<MergedToken[]> {
  if (tokens.length === 0) {
    return tokens;
  }
@@ -188,9 +188,7 @@ async function filterSubtitleAnnotationTokens(tokens: MergedToken[]): Promise<Me
  }

  const annotationStage = await annotationStageModulePromise;
-  return tokens.filter(
-    (token) => !annotationStage.shouldExcludeTokenFromSubtitleAnnotations(token),
-  );
+  return tokens.map((token) => annotationStage.stripSubtitleAnnotationMetadata(token));
 }

 export function createTokenizerDepsRuntime(
@@ -721,12 +719,12 @@ export async function tokenizeSubtitle(

  const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
  if (yomitanTokens && yomitanTokens.length > 0) {
-    const filteredTokens = await filterSubtitleAnnotationTokens(
+    const annotatedTokens = await stripSubtitleAnnotationMetadata(
      await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
    );
    return {
      text: displayText,
-      tokens: filteredTokens.length > 0 ? filteredTokens : null,
+      tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
    };
  }

@@ -1,7 +1,12 @@
 import assert from 'node:assert/strict';
 import test from 'node:test';
 import { MergedToken, PartOfSpeech } from '../../../types';
-import { annotateTokens, AnnotationStageDeps } from './annotation-stage';
+import {
+  annotateTokens,
+  AnnotationStageDeps,
+  shouldExcludeTokenFromSubtitleAnnotations,
+  stripSubtitleAnnotationMetadata,
+} from './annotation-stage';

 function makeToken(overrides: Partial<MergedToken> = {}): MergedToken {
  return {
@@ -150,6 +155,170 @@ test('annotateTokens handles JLPT disabled and eligibility exclusion paths', ()
  assert.equal(excludedLookupCalls, 0);
 });

+test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory ending variants', () => {
+  const tokens = [
+    makeToken({
+      surface: 'んです',
+      headword: 'ん',
+      reading: 'ンデス',
+      pos1: '名詞|助動詞',
+      pos2: '非自立',
+    }),
+    makeToken({
+      surface: 'のだ',
+      headword: 'の',
+      reading: 'ノダ',
+      pos1: '名詞|助動詞',
+      pos2: '非自立',
+    }),
+    makeToken({
+      surface: 'んだ',
+      headword: 'ん',
+      reading: 'ンダ',
+      pos1: '名詞|助動詞',
+      pos2: '非自立',
+    }),
+    makeToken({
+      surface: 'のです',
+      headword: 'の',
+      reading: 'ノデス',
+      pos1: '名詞|助動詞',
+      pos2: '非自立',
+    }),
+    makeToken({
+      surface: 'なんです',
+      headword: 'だ',
+      reading: 'ナンデス',
+      pos1: '助動詞|名詞|助動詞',
+      pos2: '|非自立',
+    }),
+    makeToken({
+      surface: 'んでした',
+      headword: 'ん',
+      reading: 'ンデシタ',
+      pos1: '助動詞|助動詞|助動詞',
+    }),
+    makeToken({
+      surface: 'のでは',
+      headword: 'の',
+      reading: 'ノデハ',
+      pos1: '助詞|接続詞',
+    }),
+  ];
+
+  for (const token of tokens) {
+    assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
+  }
+});
+
+test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside explanatory ending family', () => {
+  const token = makeToken({
+    surface: '問題',
+    headword: '問題',
+    reading: 'モンダイ',
+    partOfSpeech: PartOfSpeech.noun,
+    pos1: '名詞',
+    pos2: '一般',
+  });
+
+  assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
+});
+
+test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
+  const tokens = [
+    makeToken({
+      surface: 'は',
+      headword: 'は',
+      reading: 'ハ',
+      partOfSpeech: PartOfSpeech.particle,
+      pos1: '助詞',
+    }),
+    makeToken({
+      surface: 'です',
+      headword: 'です',
+      reading: 'デス',
+      partOfSpeech: PartOfSpeech.bound_auxiliary,
+      pos1: '助動詞',
+    }),
+    makeToken({
+      surface: 'この',
+      headword: 'この',
+      reading: 'コノ',
+      partOfSpeech: PartOfSpeech.other,
+      pos1: '連体詞',
+    }),
+  ];
+
+  for (const token of tokens) {
+    assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
+  }
+});
+
+test('shouldExcludeTokenFromSubtitleAnnotations keeps mixed content tokens with trailing helpers', () => {
+  const token = makeToken({
+    surface: '行きます',
+    headword: '行く',
+    reading: 'イキマス',
+    partOfSpeech: PartOfSpeech.verb,
+    pos1: '動詞|助動詞',
+    pos2: '自立',
+  });
+
+  assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
+});
+
+test('shouldExcludeTokenFromSubtitleAnnotations excludes merged lexical tokens with trailing quote particles', () => {
+  const token = makeToken({
+    surface: 'どうしてもって',
+    headword: 'どうしても',
+    reading: 'ドウシテモッテ',
+    partOfSpeech: PartOfSpeech.other,
+    pos1: '副詞|助詞',
+    pos2: '一般|格助詞',
+  });
+
+  assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
+});
+
+test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
+  const token = makeToken({
+    surface: 'は',
+    headword: 'は',
+    reading: 'ハ',
+    partOfSpeech: PartOfSpeech.particle,
+    pos1: '助詞',
+    isKnown: true,
+    isNPlusOneTarget: true,
+    isNameMatch: true,
+    jlptLevel: 'N5',
+    frequencyRank: 12,
+  });
+
+  assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
+    ...token,
+    isKnown: false,
+    isNPlusOneTarget: false,
+    isNameMatch: false,
+    jlptLevel: undefined,
+    frequencyRank: undefined,
+  });
+});
+
+test('stripSubtitleAnnotationMetadata leaves content tokens unchanged', () => {
+  const token = makeToken({
+    surface: '猫',
+    headword: '猫',
+    reading: 'ネコ',
+    partOfSpeech: PartOfSpeech.noun,
+    pos1: '名詞',
+    isKnown: true,
+    jlptLevel: 'N5',
+    frequencyRank: 42,
+  });
+
+  assert.strictEqual(stripSubtitleAnnotationMetadata(token), token);
+});
+
 test('annotateTokens prioritizes name matches over n+1, frequency, and JLPT when enabled', () => {
  let jlptLookupCalls = 0;
  const tokens = [
@@ -25,6 +25,45 @@ const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
  'ふう',
  'ほう',
 ]);
+const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
+const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
+  'だ',
+  'です',
+  'でした',
+  'だった',
+  'では',
+  'じゃ',
+  'でしょう',
+  'だろう',
+] as const;
+const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
+  '',
+  'か',
+  'ね',
+  'よ',
+  'な',
+  'よね',
+  'かな',
+  'かね',
+] as const;
+const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
+  SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
+    SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
+      SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
+        (particle) => `${prefix}${core}${particle}`,
+      ),
+    ),
+  ),
+);
+const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
+  'って',
+  'ってよ',
+  'ってね',
+  'ってな',
+  'ってさ',
+  'ってか',
+  'ってば',
+]);

 const jlptLevelLookupCaches = new WeakMap<
  (text: string) => JlptLevel | null,
@@ -60,6 +99,7 @@ function normalizePos1Tag(pos1: string | undefined): string {
 }

 const SUBTITLE_ANNOTATION_EXCLUDED_POS1 = new Set(['感動詞']);
+const SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1 = new Set(['助詞', '助動詞', '連体詞']);

 function splitNormalizedTagParts(normalizedTag: string): string[] {
  if (!normalizedTag) {
@@ -84,7 +124,36 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin

 function isExcludedFromSubtitleAnnotationsByPos1(normalizedPos1: string): boolean {
  const parts = splitNormalizedTagParts(normalizedPos1);
-  return parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part));
+  if (parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part))) {
+    return true;
+  }
+
+  return parts.length > 0 && parts.every((part) => SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(part));
+}
+
+function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
+  const normalizedSurface = normalizeJlptTextForExclusion(token.surface);
+  const normalizedHeadword = normalizeJlptTextForExclusion(token.headword);
+  if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
+    return false;
+  }
+
+  const suffix = normalizedSurface.slice(normalizedHeadword.length);
+  if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
+    return false;
+  }
+
+  const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
+  if (pos1Parts.length < 2) {
+    return false;
+  }
+
+  const [leadingPos1, ...trailingPos1] = pos1Parts;
+  if (!leadingPos1 || SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(leadingPos1)) {
+    return false;
+  }
+
+  return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
 }

 function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
@@ -520,12 +589,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
 }

 function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
-  const candidates = [
-    resolveJlptLookupText(token),
-    token.surface,
-    token.headword,
-    token.reading,
-  ].filter(
+  const candidates = [token.surface, token.reading, resolveJlptLookupText(token)].filter(
    (candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
  );

@@ -542,7 +606,9 @@ function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {

    if (
      SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmedCandidate) ||
-      SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate)
+      SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate) ||
+      SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmedCandidate) ||
+      SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalizedCandidate)
    ) {
      return true;
    }
@@ -565,9 +631,28 @@ export function shouldExcludeTokenFromSubtitleAnnotations(token: MergedToken): b
    return true;
  }

+  if (isExcludedTrailingParticleMergedToken(token)) {
+    return true;
+  }
+
  return isExcludedFromSubtitleAnnotationsByTerm(token);
 }

+export function stripSubtitleAnnotationMetadata(token: MergedToken): MergedToken {
+  if (!shouldExcludeTokenFromSubtitleAnnotations(token)) {
+    return token;
+  }
+
+  return {
+    ...token,
+    isKnown: false,
+    isNPlusOneTarget: false,
+    isNameMatch: false,
+    jlptLevel: undefined,
+    frequencyRank: undefined,
+  };
+}
+
 function computeTokenKnownStatus(
  token: MergedToken,
  isKnownWord: (text: string) => boolean,