Overlay 2.0 (#12)

2026-05-04 12:41:30 -07:00 · 2026-03-01 02:36:51 -08:00
parent 45df3c466b
commit 44c7761c7c
397 changed files with 15139 additions and 7127 deletions
@@ -51,15 +51,20 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
 });

 test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
-  const lookupCalls: string[] = [];
  const tokens = [
-    makeToken({ surface: 'は', headword: 'は', partOfSpeech: PartOfSpeech.particle }),
+    makeToken({
+      surface: 'は',
+      headword: 'は',
+      partOfSpeech: PartOfSpeech.particle,
+      frequencyRank: 3,
+    }),
    makeToken({
      surface: 'です',
      headword: 'です',
      partOfSpeech: PartOfSpeech.bound_auxiliary,
      startPos: 1,
      endPos: 3,
+      frequencyRank: 4,
    }),
    makeToken({
      surface: 'の',
@@ -68,6 +73,7 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
      pos1: '助詞',
      startPos: 3,
      endPos: 4,
+      frequencyRank: 5,
    }),
    makeToken({
      surface: '猫',
@@ -75,24 +81,36 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
      partOfSpeech: PartOfSpeech.noun,
      startPos: 4,
      endPos: 5,
+      frequencyRank: 11,
    }),
  ];

-  const result = annotateTokens(
-    tokens,
-    makeDeps({
-      getFrequencyRank: (text) => {
-        lookupCalls.push(text);
-        return text === '猫' ? 11 : 999;
-      },
-    }),
-  );
+  const result = annotateTokens(tokens, makeDeps());

  assert.equal(result[0]?.frequencyRank, undefined);
  assert.equal(result[1]?.frequencyRank, undefined);
  assert.equal(result[2]?.frequencyRank, undefined);
  assert.equal(result[3]?.frequencyRank, 11);
-  assert.deepEqual(lookupCalls, ['猫']);
+});
+
+test('annotateTokens preserves existing frequency rank when frequency is enabled', () => {
+  const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
+
+  const result = annotateTokens(tokens, makeDeps());
+
+  assert.equal(result[0]?.frequencyRank, 42);
+});
+
+test('annotateTokens drops invalid frequency rank values', () => {
+  const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: Number.NaN })];
+  const result = annotateTokens(tokens, makeDeps());
+  assert.equal(result[0]?.frequencyRank, undefined);
+});
+
+test('annotateTokens clears frequency rank when frequency is disabled', () => {
+  const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
+  const result = annotateTokens(tokens, makeDeps(), { frequencyEnabled: false });
+  assert.equal(result[0]?.frequencyRank, undefined);
 });

 test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => {
@@ -157,3 +175,206 @@ test('annotateTokens N+1 handoff marks expected target when threshold is satisfi
  assert.equal(result[1]?.isNPlusOneTarget, true);
  assert.equal(result[2]?.isNPlusOneTarget, false);
 });
+
+test('annotateTokens N+1 minimum sentence words counts only eligible word tokens', () => {
+  const tokens = [
+    makeToken({ surface: '猫', headword: '猫', startPos: 0, endPos: 1 }),
+    makeToken({
+      surface: 'が',
+      headword: 'が',
+      partOfSpeech: PartOfSpeech.particle,
+      pos1: '助詞',
+      startPos: 1,
+      endPos: 2,
+    }),
+    makeToken({
+      surface: 'です',
+      headword: 'です',
+      partOfSpeech: PartOfSpeech.bound_auxiliary,
+      pos1: '助動詞',
+      startPos: 2,
+      endPos: 4,
+    }),
+  ];
+
+  const result = annotateTokens(
+    tokens,
+    makeDeps({
+      isKnownWord: (text) => text === 'が' || text === 'です',
+    }),
+    { minSentenceWordsForNPlusOne: 3 },
+  );
+
+  assert.equal(result[0]?.isKnown, false);
+  assert.equal(result[1]?.isKnown, true);
+  assert.equal(result[2]?.isKnown, true);
+  assert.equal(result[0]?.isNPlusOneTarget, false);
+});
+
+test('annotateTokens applies configured pos1 exclusions to both frequency and N+1', () => {
+  const tokens = [
+    makeToken({
+      surface: '猫',
+      headword: '猫',
+      pos1: '名詞',
+      frequencyRank: 21,
+      startPos: 0,
+      endPos: 1,
+    }),
+    makeToken({
+      surface: '走る',
+      headword: '走る',
+      pos1: '動詞',
+      partOfSpeech: PartOfSpeech.verb,
+      startPos: 1,
+      endPos: 3,
+      frequencyRank: 22,
+    }),
+  ];
+
+  const result = annotateTokens(
+    tokens,
+    makeDeps({
+      isKnownWord: (text) => text === '走る',
+    }),
+    {
+      minSentenceWordsForNPlusOne: 1,
+      pos1Exclusions: new Set(['名詞']),
+    },
+  );
+
+  assert.equal(result[0]?.frequencyRank, undefined);
+  assert.equal(result[1]?.frequencyRank, 22);
+  assert.equal(result[0]?.isNPlusOneTarget, false);
+  assert.equal(result[1]?.isNPlusOneTarget, false);
+});
+
+test('annotateTokens allows previously default-excluded pos1 when removed from effective set', () => {
+  const tokens = [
+    makeToken({
+      surface: 'は',
+      headword: 'は',
+      partOfSpeech: PartOfSpeech.other,
+      pos1: '助詞',
+      startPos: 0,
+      endPos: 1,
+      frequencyRank: 8,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+    pos1Exclusions: new Set(),
+  });
+
+  assert.equal(result[0]?.frequencyRank, 8);
+  assert.equal(result[0]?.isNPlusOneTarget, true);
+});
+
+test('annotateTokens excludes default non-independent pos2 from frequency and N+1', () => {
+  const tokens = [
+    makeToken({
+      surface: 'になれば',
+      headword: 'なる',
+      partOfSpeech: PartOfSpeech.verb,
+      pos1: '動詞',
+      pos2: '非自立',
+      startPos: 0,
+      endPos: 4,
+      frequencyRank: 7,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+  });
+
+  assert.equal(result[0]?.frequencyRank, undefined);
+  assert.equal(result[0]?.isNPlusOneTarget, false);
+});
+
+test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => {
+  const tokens = [
+    makeToken({
+      surface: 'ぐわっ',
+      reading: 'ぐわっ',
+      headword: 'ぐわっ',
+      pos1: '',
+      pos2: '',
+      frequencyRank: 12,
+      startPos: 0,
+      endPos: 3,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+  });
+
+  assert.equal(result[0]?.frequencyRank, undefined);
+});
+
+test('annotateTokens allows previously default-excluded pos2 when removed from effective set', () => {
+  const tokens = [
+    makeToken({
+      surface: 'になれば',
+      headword: 'なる',
+      partOfSpeech: PartOfSpeech.verb,
+      pos1: '動詞',
+      pos2: '非自立',
+      startPos: 0,
+      endPos: 4,
+      frequencyRank: 9,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+    pos2Exclusions: new Set(),
+  });
+
+  assert.equal(result[0]?.frequencyRank, 9);
+  assert.equal(result[0]?.isNPlusOneTarget, true);
+});
+
+test('annotateTokens keeps composite tokens when any component pos tag is content-bearing', () => {
+  const tokens = [
+    makeToken({
+      surface: 'になれば',
+      headword: 'なる',
+      pos1: '助詞|動詞',
+      pos2: '格助詞|自立|接続助詞',
+      startPos: 0,
+      endPos: 4,
+      frequencyRank: 5,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+  });
+
+  assert.equal(result[0]?.frequencyRank, 5);
+  assert.equal(result[0]?.isNPlusOneTarget, true);
+});
+
+test('annotateTokens excludes composite tokens when all component pos tags are excluded', () => {
+  const tokens = [
+    makeToken({
+      surface: 'けど',
+      headword: 'けど',
+      pos1: '助詞|助詞',
+      pos2: '接続助詞|終助詞',
+      startPos: 0,
+      endPos: 2,
+      frequencyRank: 6,
+    }),
+  ];
+
+  const result = annotateTokens(tokens, makeDeps(), {
+    minSentenceWordsForNPlusOne: 1,
+  });
+
+  assert.equal(result[0]?.frequencyRank, undefined);
+  assert.equal(result[0]?.isNPlusOneTarget, false);
+});
@@ -1,39 +1,38 @@
 import { markNPlusOneTargets } from '../../../token-merger';
 import {
-  FrequencyDictionaryLookup,
-  JlptLevel,
-  MergedToken,
-  NPlusOneMatchMode,
-  PartOfSpeech,
-} from '../../../types';
+  DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
+  resolveAnnotationPos1ExclusionSet,
+} from '../../../token-pos1-exclusions';
+import {
+  DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
+  resolveAnnotationPos2ExclusionSet,
+} from '../../../token-pos2-exclusions';
+import { JlptLevel, MergedToken, NPlusOneMatchMode, PartOfSpeech } from '../../../types';
 import { shouldIgnoreJlptByTerm, shouldIgnoreJlptForMecabPos1 } from '../jlpt-token-filter';

 const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
 const KATAKANA_CODEPOINT_START = 0x30a1;
 const KATAKANA_CODEPOINT_END = 0x30f6;
 const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
-const FREQUENCY_RANK_LOOKUP_CACHE_LIMIT = 2048;

 const jlptLevelLookupCaches = new WeakMap<
  (text: string) => JlptLevel | null,
  Map<string, JlptLevel | null>
 >();
-const frequencyRankLookupCaches = new WeakMap<
-  FrequencyDictionaryLookup,
-  Map<string, number | null>
->();

 export interface AnnotationStageDeps {
  isKnownWord: (text: string) => boolean;
  knownWordMatchMode: NPlusOneMatchMode;
  getJlptLevel: (text: string) => JlptLevel | null;
-  getFrequencyRank?: FrequencyDictionaryLookup;
 }

 export interface AnnotationStageOptions {
+  nPlusOneEnabled?: boolean;
  jlptEnabled?: boolean;
  frequencyEnabled?: boolean;
  minSentenceWordsForNPlusOne?: number;
+  pos1Exclusions?: ReadonlySet<string>;
+  pos2Exclusions?: ReadonlySet<string>;
 }

 function resolveKnownWordText(
@@ -59,106 +58,94 @@ function applyKnownWordMarking(
  });
 }

-function normalizeFrequencyLookupText(rawText: string): string {
-  return rawText.trim().toLowerCase();
+function normalizePos1Tag(pos1: string | undefined): string {
+  return typeof pos1 === 'string' ? pos1.trim() : '';
 }

-function getCachedFrequencyRank(
-  lookupText: string,
-  getFrequencyRank: FrequencyDictionaryLookup,
-): number | null {
-  const normalizedText = normalizeFrequencyLookupText(lookupText);
-  if (!normalizedText) {
-    return null;
+function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
+  if (!normalizedTag) {
+    return false;
  }
-
-  let cache = frequencyRankLookupCaches.get(getFrequencyRank);
-  if (!cache) {
-    cache = new Map<string, number | null>();
-    frequencyRankLookupCaches.set(getFrequencyRank, cache);
+  const parts = normalizedTag
+    .split('|')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0);
+  if (parts.length === 0) {
+    return false;
  }
-
-  if (cache.has(normalizedText)) {
-    return cache.get(normalizedText) ?? null;
-  }
-
-  let rank: number | null;
-  try {
-    rank = getFrequencyRank(normalizedText);
-  } catch {
-    rank = null;
-  }
-  if (rank !== null) {
-    if (!Number.isFinite(rank) || rank <= 0) {
-      rank = null;
-    }
-  }
-
-  cache.set(normalizedText, rank);
-  while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {
-    const firstKey = cache.keys().next().value;
-    if (firstKey !== undefined) {
-      cache.delete(firstKey);
-    }
-  }
-
-  return rank;
+  // Composite tags like "助詞|名詞" stay eligible unless every component is excluded.
+  return parts.every((part) => exclusions.has(part));
 }

-function resolveFrequencyLookupText(token: MergedToken): string {
-  if (token.headword && token.headword.length > 0) {
-    return token.headword;
+function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
+  if (options.pos1Exclusions) {
+    return options.pos1Exclusions;
  }
-  if (token.reading && token.reading.length > 0) {
-    return token.reading;
-  }
-  return token.surface;
+
+  return resolveAnnotationPos1ExclusionSet(DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG);
 }

-function getFrequencyLookupTextCandidates(token: MergedToken): string[] {
-  const lookupText = resolveFrequencyLookupText(token).trim();
-  return lookupText ? [lookupText] : [];
+function resolvePos2Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
+  if (options.pos2Exclusions) {
+    return options.pos2Exclusions;
+  }
+
+  return resolveAnnotationPos2ExclusionSet(DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG);
 }

-function isFrequencyExcludedByPos(token: MergedToken): boolean {
-  if (
-    token.partOfSpeech === PartOfSpeech.particle ||
-    token.partOfSpeech === PartOfSpeech.bound_auxiliary
-  ) {
+function normalizePos2Tag(pos2: string | undefined): string {
+  return typeof pos2 === 'string' ? pos2.trim() : '';
+}
+
+function isFrequencyExcludedByPos(
+  token: MergedToken,
+  pos1Exclusions: ReadonlySet<string>,
+  pos2Exclusions: ReadonlySet<string>,
+): boolean {
+  const normalizedPos1 = normalizePos1Tag(token.pos1);
+  const hasPos1 = normalizedPos1.length > 0;
+  if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
    return true;
  }

-  return token.pos1 === '助詞' || token.pos1 === '助動詞';
+  const normalizedPos2 = normalizePos2Tag(token.pos2);
+  const hasPos2 = normalizedPos2.length > 0;
+  if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
+    return true;
+  }
+
+  if (hasPos1 || hasPos2) {
+    return false;
+  }
+
+  if (isLikelyFrequencyNoiseToken(token)) {
+    return true;
+  }
+
+  return (
+    token.partOfSpeech === PartOfSpeech.particle ||
+    token.partOfSpeech === PartOfSpeech.bound_auxiliary
+  );
 }

 function applyFrequencyMarking(
  tokens: MergedToken[],
-  getFrequencyRank: FrequencyDictionaryLookup,
+  pos1Exclusions: ReadonlySet<string>,
+  pos2Exclusions: ReadonlySet<string>,
 ): MergedToken[] {
  return tokens.map((token) => {
-    if (isFrequencyExcludedByPos(token)) {
+    if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
      return { ...token, frequencyRank: undefined };
    }

-    const lookupTexts = getFrequencyLookupTextCandidates(token);
-    if (lookupTexts.length === 0) {
-      return { ...token, frequencyRank: undefined };
-    }
-
-    let bestRank: number | null = null;
-    for (const lookupText of lookupTexts) {
-      const rank = getCachedFrequencyRank(lookupText, getFrequencyRank);
-      if (rank === null) {
-        continue;
-      }
-      if (bestRank === null || rank < bestRank) {
-        bestRank = rank;
-      }
+    if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
+      const rank = Math.max(1, Math.floor(token.frequencyRank));
+      return { ...token, frequencyRank: rank };
    }

    return {
      ...token,
-      frequencyRank: bestRank ?? undefined,
+      frequencyRank: undefined,
    };
  });
 }
@@ -282,6 +269,98 @@ function isRepeatedKanaSfx(text: string): boolean {
  return topCount >= Math.ceil(chars.length / 2);
 }

+function isTrailingSmallTsuKanaSfx(text: string): boolean {
+  const normalized = normalizeJlptTextForExclusion(text);
+  if (!normalized) {
+    return false;
+  }
+
+  const chars = [...normalized];
+  if (chars.length < 2 || chars.length > 4) {
+    return false;
+  }
+
+  if (!chars.every(isKanaChar)) {
+    return false;
+  }
+
+  return chars[chars.length - 1] === 'っ';
+}
+
+function isReduplicatedKanaSfx(text: string): boolean {
+  const normalized = normalizeJlptTextForExclusion(text);
+  if (!normalized) {
+    return false;
+  }
+
+  const chars = [...normalized];
+  if (chars.length < 4 || chars.length % 2 !== 0) {
+    return false;
+  }
+
+  if (!chars.every(isKanaChar)) {
+    return false;
+  }
+
+  const half = chars.length / 2;
+  return chars.slice(0, half).join('') === chars.slice(half).join('');
+}
+
+function hasAdjacentKanaRepeat(text: string): boolean {
+  const normalized = normalizeJlptTextForExclusion(text);
+  if (!normalized) {
+    return false;
+  }
+
+  const chars = [...normalized];
+  if (!chars.every(isKanaChar)) {
+    return false;
+  }
+
+  for (let i = 1; i < chars.length; i += 1) {
+    if (chars[i] === chars[i - 1]) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+function isLikelyFrequencyNoiseToken(token: MergedToken): boolean {
+  const candidates = [token.headword, token.surface].filter(
+    (candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
+  );
+
+  for (const candidate of candidates) {
+    const trimmedCandidate = candidate.trim();
+    if (!trimmedCandidate) {
+      continue;
+    }
+
+    const normalizedCandidate = normalizeJlptTextForExclusion(trimmedCandidate);
+    if (!normalizedCandidate) {
+      continue;
+    }
+
+    if (shouldIgnoreJlptByTerm(trimmedCandidate) || shouldIgnoreJlptByTerm(normalizedCandidate)) {
+      return true;
+    }
+
+    if (
+      hasAdjacentKanaRepeat(trimmedCandidate) ||
+      hasAdjacentKanaRepeat(normalizedCandidate) ||
+      isReduplicatedKanaSfx(trimmedCandidate) ||
+      isReduplicatedKanaSfx(normalizedCandidate) ||
+      isTrailingSmallTsuKanaSfx(trimmedCandidate) ||
+      isTrailingSmallTsuKanaSfx(normalizedCandidate)
+    ) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 function isJlptEligibleToken(token: MergedToken): boolean {
  if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) {
    return false;
@@ -340,20 +419,24 @@ export function annotateTokens(
  deps: AnnotationStageDeps,
  options: AnnotationStageOptions = {},
 ): MergedToken[] {
-  const knownMarkedTokens = applyKnownWordMarking(
-    tokens,
-    deps.isKnownWord,
-    deps.knownWordMatchMode,
-  );
+  const pos1Exclusions = resolvePos1Exclusions(options);
+  const pos2Exclusions = resolvePos2Exclusions(options);
+  const nPlusOneEnabled = options.nPlusOneEnabled !== false;
+  const knownMarkedTokens = nPlusOneEnabled
+    ? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
+    : tokens.map((token) => ({
+        ...token,
+        isKnown: false,
+        isNPlusOneTarget: false,
+      }));

  const frequencyEnabled = options.frequencyEnabled !== false;
-  const frequencyMarkedTokens =
-    frequencyEnabled && deps.getFrequencyRank
-      ? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank)
-      : knownMarkedTokens.map((token) => ({
-          ...token,
-          frequencyRank: undefined,
-        }));
+  const frequencyMarkedTokens = frequencyEnabled
+    ? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
+    : knownMarkedTokens.map((token) => ({
+        ...token,
+        frequencyRank: undefined,
+      }));

  const jlptEnabled = options.jlptEnabled !== false;
  const jlptMarkedTokens = jlptEnabled
@@ -363,6 +446,14 @@ export function annotateTokens(
        jlptLevel: undefined,
      }));

+  if (!nPlusOneEnabled) {
+    return jlptMarkedTokens.map((token) => ({
+      ...token,
+      isKnown: false,
+      isNPlusOneTarget: false,
+    }));
+  }
+
  const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
  const sanitizedMinSentenceWordsForNPlusOne =
    minSentenceWordsForNPlusOne !== undefined &&
@@ -371,5 +462,10 @@ export function annotateTokens(
      ? minSentenceWordsForNPlusOne
      : 3;

-  return markNPlusOneTargets(jlptMarkedTokens, sanitizedMinSentenceWordsForNPlusOne);
+  return markNPlusOneTargets(
+    jlptMarkedTokens,
+    sanitizedMinSentenceWordsForNPlusOne,
+    pos1Exclusions,
+    pos2Exclusions,
+  );
 }
@@ -22,12 +22,13 @@ function makeToken(overrides: Partial<MergedToken>): MergedToken {
 test('enrichTokensWithMecabPos1 picks pos1 by best overlap when no surface match exists', () => {
  const tokens = [makeToken({ surface: 'grouped', startPos: 2, endPos: 7 })];
  const mecabTokens = [
-    makeToken({ surface: 'left', startPos: 0, endPos: 4, pos1: 'A' }),
-    makeToken({ surface: 'right', startPos: 2, endPos: 6, pos1: 'B' }),
+    makeToken({ surface: 'left', startPos: 0, endPos: 4, pos1: 'A', pos2: 'L2' }),
+    makeToken({ surface: 'right', startPos: 2, endPos: 6, pos1: 'B', pos2: '非自立' }),
  ];

  const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);
-  assert.equal(enriched[0]?.pos1, 'B');
+  assert.equal(enriched[0]?.pos1, 'A|B');
+  assert.equal(enriched[0]?.pos2, 'L2|非自立');
 });

 test('enrichTokensWithMecabPos1 fills missing pos1 using surface-sequence fallback', () => {
@@ -1,13 +1,45 @@
 import { MergedToken } from '../../../types';

-function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): string | undefined {
-  if (mecabTokens.length === 0) {
+type MecabPosMetadata = {
+  pos1: string;
+  pos2?: string;
+  pos3?: string;
+};
+
+function joinUniqueTags(values: Array<string | undefined>): string | undefined {
+  const unique: string[] = [];
+  for (const value of values) {
+    if (!value) {
+      continue;
+    }
+    const trimmed = value.trim();
+    if (!trimmed) {
+      continue;
+    }
+    if (!unique.includes(trimmed)) {
+      unique.push(trimmed);
+    }
+  }
+  if (unique.length === 0) {
    return undefined;
  }
+  if (unique.length === 1) {
+    return unique[0];
+  }
+  return unique.join('|');
+}
+
+function pickClosestMecabPosMetadata(
+  token: MergedToken,
+  mecabTokens: MergedToken[],
+): MecabPosMetadata | null {
+  if (mecabTokens.length === 0) {
+    return null;
+  }

  const tokenStart = token.startPos ?? 0;
  const tokenEnd = token.endPos ?? tokenStart + token.surface.length;
-  let bestSurfaceMatchPos1: string | undefined;
+  let bestSurfaceMatchToken: MergedToken | null = null;
  let bestSurfaceMatchDistance = Number.MAX_SAFE_INTEGER;
  let bestSurfaceMatchEndDistance = Number.MAX_SAFE_INTEGER;

@@ -31,19 +63,24 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
    ) {
      bestSurfaceMatchDistance = startDistance;
      bestSurfaceMatchEndDistance = endDistance;
-      bestSurfaceMatchPos1 = mecabToken.pos1;
+      bestSurfaceMatchToken = mecabToken;
    }
  }

-  if (bestSurfaceMatchPos1) {
-    return bestSurfaceMatchPos1;
+  if (bestSurfaceMatchToken) {
+    return {
+      pos1: bestSurfaceMatchToken.pos1 as string,
+      pos2: bestSurfaceMatchToken.pos2,
+      pos3: bestSurfaceMatchToken.pos3,
+    };
  }

-  let bestPos1: string | undefined;
+  let bestToken: MergedToken | null = null;
  let bestOverlap = 0;
  let bestSpan = 0;
  let bestStartDistance = Number.MAX_SAFE_INTEGER;
  let bestStart = Number.MAX_SAFE_INTEGER;
+  const overlappingTokens: MergedToken[] = [];

  for (const mecabToken of mecabTokens) {
    if (!mecabToken.pos1) {
@@ -58,6 +95,7 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
    if (overlap === 0) {
      continue;
    }
+    overlappingTokens.push(mecabToken);

    const span = mecabEnd - mecabStart;
    if (
@@ -71,11 +109,23 @@ function pickClosestMecabPos1(token: MergedToken, mecabTokens: MergedToken[]): s
      bestSpan = span;
      bestStartDistance = Math.abs(mecabStart - tokenStart);
      bestStart = mecabStart;
-      bestPos1 = mecabToken.pos1;
+      bestToken = mecabToken;
    }
  }

-  return bestOverlap > 0 ? bestPos1 : undefined;
+  if (bestOverlap === 0 || !bestToken) {
+    return null;
+  }
+
+  const overlapPos1 = joinUniqueTags(overlappingTokens.map((token) => token.pos1));
+  const overlapPos2 = joinUniqueTags(overlappingTokens.map((token) => token.pos2));
+  const overlapPos3 = joinUniqueTags(overlappingTokens.map((token) => token.pos3));
+
+  return {
+    pos1: overlapPos1 ?? (bestToken.pos1 as string),
+    pos2: overlapPos2 ?? bestToken.pos2,
+    pos3: overlapPos3 ?? bestToken.pos3,
+  };
 }

 function fillMissingPos1BySurfaceSequence(
@@ -101,7 +151,7 @@ function fillMissingPos1BySurfaceSequence(
      return token;
    }

-    let best: { pos1: string; index: number } | null = null;
+    let best: { token: MergedToken; index: number } | null = null;
    for (const candidate of indexedMecabTokens) {
      if (candidate.token.surface !== surface) {
        continue;
@@ -109,7 +159,7 @@ function fillMissingPos1BySurfaceSequence(
      if (candidate.index < cursor) {
        continue;
      }
-      best = { pos1: candidate.token.pos1 as string, index: candidate.index };
+      best = { token: candidate.token, index: candidate.index };
      break;
    }

@@ -118,7 +168,7 @@ function fillMissingPos1BySurfaceSequence(
        if (candidate.token.surface !== surface) {
          continue;
        }
-        best = { pos1: candidate.token.pos1 as string, index: candidate.index };
+        best = { token: candidate.token, index: candidate.index };
        break;
      }
    }
@@ -130,7 +180,9 @@ function fillMissingPos1BySurfaceSequence(
    cursor = best.index + 1;
    return {
      ...token,
-      pos1: best.pos1,
+      pos1: best.token.pos1,
+      pos2: best.token.pos2,
+      pos3: best.token.pos3,
    };
  });
 }
@@ -152,14 +204,16 @@ export function enrichTokensWithMecabPos1(
      return token;
    }

-    const pos1 = pickClosestMecabPos1(token, mecabTokens);
-    if (!pos1) {
+    const metadata = pickClosestMecabPosMetadata(token, mecabTokens);
+    if (!metadata) {
      return token;
    }

    return {
      ...token,
-      pos1,
+      pos1: metadata.pos1,
+      pos2: metadata.pos2,
+      pos3: metadata.pos3,
    };
  });

@@ -0,0 +1,149 @@
+import type { MergedToken } from '../../../types';
+import { createLogger } from '../../../logger';
+import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
+
+const logger = createLogger('main:tokenizer');
+const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER';
+
+interface WorkerRequest {
+  id: number;
+  tokens: MergedToken[];
+  mecabTokens: MergedToken[] | null;
+}
+
+interface WorkerResponse {
+  id?: unknown;
+  result?: unknown;
+  error?: unknown;
+}
+
+type PendingRequest = {
+  resolve: (value: MergedToken[]) => void;
+  reject: (reason?: unknown) => void;
+};
+
+class ParserEnrichmentWorkerRuntime {
+  private worker: import('node:worker_threads').Worker | null = null;
+  private nextRequestId = 1;
+  private pending = new Map<number, PendingRequest>();
+  private initAttempted = false;
+
+  async enrichTokens(
+    tokens: MergedToken[],
+    mecabTokens: MergedToken[] | null,
+  ): Promise<MergedToken[]> {
+    const worker = await this.getWorker();
+    if (!worker) {
+      return enrichTokensWithMecabPos1(tokens, mecabTokens);
+    }
+
+    return new Promise<MergedToken[]>((resolve, reject) => {
+      const id = this.nextRequestId++;
+      this.pending.set(id, { resolve, reject });
+      const request: WorkerRequest = { id, tokens, mecabTokens };
+      worker.postMessage(request);
+    });
+  }
+
+  private async getWorker(): Promise<import('node:worker_threads').Worker | null> {
+    if (process.env[DISABLE_WORKER_ENV] === '1') {
+      return null;
+    }
+    if (this.worker) {
+      return this.worker;
+    }
+    if (this.initAttempted) {
+      return null;
+    }
+
+    this.initAttempted = true;
+
+    let workerThreads: typeof import('node:worker_threads');
+    try {
+      workerThreads = await import('node:worker_threads');
+    } catch {
+      return null;
+    }
+
+    let workerPath = '';
+    try {
+      workerPath = require.resolve('./parser-enrichment-worker-thread.js');
+    } catch {
+      return null;
+    }
+
+    try {
+      const worker = new workerThreads.Worker(workerPath);
+      worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message));
+      worker.on('error', (error: Error) => this.handleWorkerFailure(error));
+      worker.on('exit', (code: number) => {
+        if (code !== 0) {
+          this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`));
+        } else {
+          this.worker = null;
+        }
+      });
+      this.worker = worker;
+      return worker;
+    } catch (error) {
+      logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`);
+      return null;
+    }
+  }
+
+  private handleWorkerMessage(message: WorkerResponse): void {
+    if (typeof message.id !== 'number') {
+      return;
+    }
+
+    const request = this.pending.get(message.id);
+    if (!request) {
+      return;
+    }
+    this.pending.delete(message.id);
+
+    if (typeof message.error === 'string' && message.error.length > 0) {
+      request.reject(new Error(message.error));
+      return;
+    }
+
+    if (!Array.isArray(message.result)) {
+      request.reject(new Error('Parser enrichment worker returned invalid payload'));
+      return;
+    }
+
+    request.resolve(message.result as MergedToken[]);
+  }
+
+  private handleWorkerFailure(error: Error): void {
+    logger.debug(
+      `Parser enrichment worker unavailable, falling back to main thread: ${error.message}`,
+    );
+    for (const pending of this.pending.values()) {
+      pending.reject(error);
+    }
+    this.pending.clear();
+
+    if (this.worker) {
+      this.worker.removeAllListeners();
+      this.worker = null;
+    }
+  }
+}
+
+let runtime: ParserEnrichmentWorkerRuntime | null = null;
+
+export async function enrichTokensWithMecabPos1Async(
+  tokens: MergedToken[],
+  mecabTokens: MergedToken[] | null,
+): Promise<MergedToken[]> {
+  if (!runtime) {
+    runtime = new ParserEnrichmentWorkerRuntime();
+  }
+
+  try {
+    return await runtime.enrichTokens(tokens, mecabTokens);
+  } catch {
+    return enrichTokensWithMecabPos1(tokens, mecabTokens);
+  }
+}
@@ -0,0 +1,25 @@
+import { parentPort } from 'node:worker_threads';
+import type { MergedToken } from '../../../types';
+import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
+
+interface WorkerRequest {
+  id: number;
+  tokens: MergedToken[];
+  mecabTokens: MergedToken[] | null;
+}
+
+if (!parentPort) {
+  throw new Error('parser-enrichment worker missing parent port');
+}
+
+const port = parentPort;
+
+port.on('message', (message: WorkerRequest) => {
+  try {
+    const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens);
+    port.postMessage({ id: message.id, result });
+  } catch (error) {
+    const messageText = error instanceof Error ? error.message : String(error);
+    port.postMessage({ id: message.id, error: messageText });
+  }
+});
@@ -0,0 +1,248 @@
+import assert from 'node:assert/strict';
+import test from 'node:test';
+import {
+  requestYomitanTermFrequencies,
+  syncYomitanDefaultAnkiServer,
+} from './yomitan-parser-runtime';
+
+function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
+  const parserWindow = {
+    isDestroyed: () => false,
+    webContents: {
+      executeJavaScript: async (script: string) => await executeJavaScript(script),
+    },
+  };
+
+  return {
+    getYomitanExt: () => ({ id: 'ext-id' }) as never,
+    getYomitanParserWindow: () => parserWindow as never,
+    setYomitanParserWindow: () => undefined,
+    getYomitanParserReadyPromise: () => null,
+    setYomitanParserReadyPromise: () => undefined,
+    getYomitanParserInitPromise: () => null,
+    setYomitanParserInitPromise: () => undefined,
+  };
+}
+
+test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
+  let scriptValue = '';
+  const deps = createDeps(async (script) => {
+    scriptValue = script;
+    return { updated: true };
+  });
+
+  const infoLogs: string[] = [];
+  const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
+    error: () => undefined,
+    info: (message) => infoLogs.push(message),
+  });
+
+  assert.equal(updated, true);
+  assert.match(scriptValue, /optionsGetFull/);
+  assert.match(scriptValue, /setAllSettings/);
+  assert.equal(infoLogs.length, 1);
+});
+
+test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => {
+  const deps = createDeps(async () => ({ updated: false }));
+
+  const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
+    error: () => undefined,
+    info: () => undefined,
+  });
+
+  assert.equal(updated, false);
+});
+
+test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
+  const deps = createDeps(async () => {
+    throw new Error('execute failed');
+  });
+
+  const errorLogs: string[] = [];
+  const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
+    error: (message) => errorLogs.push(message),
+    info: () => undefined,
+  });
+
+  assert.equal(updated, false);
+  assert.equal(errorLogs.length, 1);
+});
+
+test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
+  let executeCount = 0;
+  const deps = createDeps(async () => {
+    executeCount += 1;
+    return { updated: true };
+  });
+
+  const updated = await syncYomitanDefaultAnkiServer('   ', deps, {
+    error: () => undefined,
+    info: () => undefined,
+  });
+
+  assert.equal(updated, false);
+  assert.equal(executeCount, 0);
+});
+
+test('requestYomitanTermFrequencies returns normalized frequency entries', async () => {
+  let scriptValue = '';
+  const deps = createDeps(async (script) => {
+    scriptValue = script;
+    return [
+      {
+        term: '猫',
+        reading: 'ねこ',
+        dictionary: 'freq-dict',
+        dictionaryPriority: 0,
+        frequency: 77,
+        displayValue: '77',
+        displayValueParsed: true,
+      },
+      {
+        term: '鍛える',
+        reading: 'きたえる',
+        dictionary: 'freq-dict',
+        dictionaryPriority: 1,
+        frequency: 46961,
+        displayValue: '2847,46961',
+        displayValueParsed: true,
+      },
+      {
+        term: 'invalid',
+        dictionary: 'freq-dict',
+        frequency: 0,
+      },
+    ];
+  });
+
+  const result = await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
+    error: () => undefined,
+  });
+
+  assert.equal(result.length, 2);
+  assert.equal(result[0]?.term, '猫');
+  assert.equal(result[0]?.frequency, 77);
+  assert.equal(result[0]?.dictionaryPriority, 0);
+  assert.equal(result[1]?.term, '鍛える');
+  assert.equal(result[1]?.frequency, 2847);
+  assert.match(scriptValue, /getTermFrequencies/);
+  assert.match(scriptValue, /optionsGetFull/);
+});
+
+test('requestYomitanTermFrequencies prefers primary rank from displayValue array pair', async () => {
+  const deps = createDeps(async () => [
+    {
+      term: '無人',
+      reading: 'むじん',
+      dictionary: 'freq-dict',
+      dictionaryPriority: 0,
+      frequency: 157632,
+      displayValue: [7141, 157632],
+      displayValueParsed: true,
+    },
+  ]);
+
+  const result = await requestYomitanTermFrequencies([{ term: '無人', reading: 'むじん' }], deps, {
+    error: () => undefined,
+  });
+
+  assert.equal(result.length, 1);
+  assert.equal(result[0]?.term, '無人');
+  assert.equal(result[0]?.frequency, 7141);
+});
+
+test('requestYomitanTermFrequencies caches profile metadata between calls', async () => {
+  const scripts: string[] = [];
+  const deps = createDeps(async (script) => {
+    scripts.push(script);
+    if (script.includes('optionsGetFull')) {
+      return {
+        profileCurrent: 0,
+        profiles: [
+          {
+            options: {
+              scanning: { length: 40 },
+              dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
+            },
+          },
+        ],
+      };
+    }
+
+    if (script.includes('"term":"犬"')) {
+      return [
+        {
+          term: '犬',
+          reading: 'いぬ',
+          dictionary: 'freq-dict',
+          frequency: 12,
+          displayValue: '12',
+          displayValueParsed: true,
+        },
+      ];
+    }
+
+    return [
+      {
+        term: '猫',
+        reading: 'ねこ',
+        dictionary: 'freq-dict',
+        frequency: 77,
+        displayValue: '77',
+        displayValueParsed: true,
+      },
+    ];
+  });
+
+  await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
+    error: () => undefined,
+  });
+  await requestYomitanTermFrequencies([{ term: '犬', reading: 'いぬ' }], deps, {
+    error: () => undefined,
+  });
+
+  const optionsCalls = scripts.filter((script) => script.includes('optionsGetFull')).length;
+  assert.equal(optionsCalls, 1);
+});
+
+test('requestYomitanTermFrequencies caches repeated term+reading lookups', async () => {
+  const scripts: string[] = [];
+  const deps = createDeps(async (script) => {
+    scripts.push(script);
+    if (script.includes('optionsGetFull')) {
+      return {
+        profileCurrent: 0,
+        profiles: [
+          {
+            options: {
+              scanning: { length: 40 },
+              dictionaries: [{ name: 'freq-dict', enabled: true, id: 0 }],
+            },
+          },
+        ],
+      };
+    }
+
+    return [
+      {
+        term: '猫',
+        reading: 'ねこ',
+        dictionary: 'freq-dict',
+        frequency: 77,
+        displayValue: '77',
+        displayValueParsed: true,
+      },
+    ];
+  });
+
+  await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
+    error: () => undefined,
+  });
+  await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
+    error: () => undefined,
+  });
+
+  const frequencyCalls = scripts.filter((script) => script.includes('getTermFrequencies')).length;
+  assert.equal(frequencyCalls, 1);
+});
@@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron';

 interface LoggerLike {
  error: (message: string, ...args: unknown[]) => void;
+  info?: (message: string, ...args: unknown[]) => void;
 }

 interface YomitanParserRuntimeDeps {
@@ -14,6 +15,395 @@ interface YomitanParserRuntimeDeps {
  setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
 }

+export interface YomitanTermFrequency {
+  term: string;
+  reading: string | null;
+  dictionary: string;
+  dictionaryPriority: number;
+  frequency: number;
+  displayValue: string | null;
+  displayValueParsed: boolean;
+}
+
+export interface YomitanTermReadingPair {
+  term: string;
+  reading: string | null;
+}
+
+interface YomitanProfileMetadata {
+  profileIndex: number;
+  scanLength: number;
+  dictionaries: string[];
+  dictionaryPriorityByName: Record<string, number>;
+}
+
+const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
+const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
+const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
+
+function isObject(value: unknown): value is Record<string, unknown> {
+  return Boolean(value && typeof value === 'object');
+}
+
+function makeTermReadingCacheKey(term: string, reading: string | null): string {
+  return `${term}\u0000${reading ?? ''}`;
+}
+
+function getWindowFrequencyCache(window: BrowserWindow): Map<string, YomitanTermFrequency[]> {
+  let cache = yomitanFrequencyCacheByWindow.get(window);
+  if (!cache) {
+    cache = new Map<string, YomitanTermFrequency[]>();
+    yomitanFrequencyCacheByWindow.set(window, cache);
+  }
+  return cache;
+}
+
+function clearWindowCaches(window: BrowserWindow): void {
+  yomitanProfileMetadataByWindow.delete(window);
+  yomitanFrequencyCacheByWindow.delete(window);
+}
+export function clearYomitanParserCachesForWindow(window: BrowserWindow): void {
+  clearWindowCaches(window);
+}
+
+function asPositiveInteger(value: unknown): number | null {
+  if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
+    return null;
+  }
+  return Math.max(1, Math.floor(value));
+}
+
+function parsePositiveFrequencyString(value: string): number | null {
+  const trimmed = value.trim();
+  if (!trimmed) {
+    return null;
+  }
+
+  const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
+  if (!numericPrefix) {
+    return null;
+  }
+
+  const chunks = numericPrefix.split(',');
+  const normalizedNumber =
+    chunks.length <= 1
+      ? chunks[0] ?? ''
+      : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
+        ? chunks.join('')
+        : (chunks[0] ?? '');
+  const parsed = Number.parseInt(normalizedNumber, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    return null;
+  }
+
+  return parsed;
+}
+
+function parsePositiveFrequencyValue(value: unknown): number | null {
+  const numeric = asPositiveInteger(value);
+  if (numeric !== null) {
+    return numeric;
+  }
+
+  if (typeof value === 'string') {
+    return parsePositiveFrequencyString(value);
+  }
+
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      const parsed = parsePositiveFrequencyValue(item);
+      if (parsed !== null) {
+        return parsed;
+      }
+    }
+  }
+
+  return null;
+}
+
+function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
+  if (!isObject(value)) {
+    return null;
+  }
+
+  const term = typeof value.term === 'string' ? value.term.trim() : '';
+  const dictionary = typeof value.dictionary === 'string' ? value.dictionary.trim() : '';
+  const rawFrequency = parsePositiveFrequencyValue(value.frequency);
+  const displayValueRaw = value.displayValue;
+  const parsedDisplayFrequency =
+    displayValueRaw !== null && displayValueRaw !== undefined
+      ? parsePositiveFrequencyValue(displayValueRaw)
+      : null;
+  const frequency = parsedDisplayFrequency ?? rawFrequency;
+  if (!term || !dictionary || frequency === null) {
+    return null;
+  }
+  const dictionaryPriorityRaw = (value as { dictionaryPriority?: unknown }).dictionaryPriority;
+  const dictionaryPriority =
+    typeof dictionaryPriorityRaw === 'number' && Number.isFinite(dictionaryPriorityRaw)
+      ? Math.max(0, Math.floor(dictionaryPriorityRaw))
+      : Number.MAX_SAFE_INTEGER;
+
+  const reading =
+    value.reading === null
+      ? null
+      : typeof value.reading === 'string'
+        ? value.reading
+        : null;
+  const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
+  const displayValueParsed = value.displayValueParsed === true;
+
+  return {
+    term,
+    reading,
+    dictionary,
+    dictionaryPriority,
+    frequency,
+    displayValue,
+    displayValueParsed,
+  };
+}
+
+function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
+  const normalized: YomitanTermReadingPair[] = [];
+  const seen = new Set<string>();
+
+  for (const pair of termReadingList) {
+    const term = typeof pair.term === 'string' ? pair.term.trim() : '';
+    if (!term) {
+      continue;
+    }
+    const reading =
+      typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
+    const key = `${term}\u0000${reading ?? ''}`;
+    if (seen.has(key)) {
+      continue;
+    }
+    seen.add(key);
+    normalized.push({ term, reading });
+  }
+
+  return normalized;
+}
+
+function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null {
+  if (!isObject(value)) {
+    return null;
+  }
+
+  const profileIndexRaw = value.profileIndex ?? value.profileCurrent;
+  const profileIndex =
+    typeof profileIndexRaw === 'number' && Number.isFinite(profileIndexRaw)
+      ? Math.max(0, Math.floor(profileIndexRaw))
+      : 0;
+  const scanLengthRaw =
+    value.scanLength ??
+    (Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
+      ? (value.profiles[profileIndex] as { options?: { scanning?: { length?: unknown } } }).options
+          ?.scanning?.length
+      : undefined);
+  const scanLength =
+    typeof scanLengthRaw === 'number' && Number.isFinite(scanLengthRaw)
+      ? Math.max(1, Math.floor(scanLengthRaw))
+      : DEFAULT_YOMITAN_SCAN_LENGTH;
+  const dictionariesRaw =
+    value.dictionaries ??
+    (Array.isArray(value.profiles) && isObject(value.profiles[profileIndex])
+      ? (value.profiles[profileIndex] as { options?: { dictionaries?: unknown[] } }).options
+          ?.dictionaries
+      : undefined);
+  const dictionaries = Array.isArray(dictionariesRaw)
+    ? dictionariesRaw
+        .map((entry, index) => {
+          if (typeof entry === 'string') {
+            return { name: entry.trim(), priority: index };
+          }
+          if (!isObject(entry) || entry.enabled === false || typeof entry.name !== 'string') {
+            return null;
+          }
+          const normalizedName = entry.name.trim();
+          if (!normalizedName) {
+            return null;
+          }
+          const priorityRaw = (entry as { id?: unknown }).id;
+          const priority =
+            typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
+              ? Math.max(0, Math.floor(priorityRaw))
+              : index;
+          return { name: normalizedName, priority };
+        })
+        .filter((entry): entry is { name: string; priority: number } => entry !== null)
+        .sort((a, b) => a.priority - b.priority)
+        .map((entry) => entry.name)
+        .filter((entry) => entry.length > 0)
+    : [];
+  const dictionaryPriorityByNameRaw = value.dictionaryPriorityByName;
+  const dictionaryPriorityByName: Record<string, number> = {};
+  if (isObject(dictionaryPriorityByNameRaw)) {
+    for (const [name, priorityRaw] of Object.entries(dictionaryPriorityByNameRaw)) {
+      if (typeof priorityRaw !== 'number' || !Number.isFinite(priorityRaw)) {
+        continue;
+      }
+      const normalizedName = name.trim();
+      if (!normalizedName) {
+        continue;
+      }
+      dictionaryPriorityByName[normalizedName] = Math.max(0, Math.floor(priorityRaw));
+    }
+  }
+
+  for (let index = 0; index < dictionaries.length; index += 1) {
+    const dictionary = dictionaries[index];
+    if (!dictionary) {
+      continue;
+    }
+    if (dictionaryPriorityByName[dictionary] === undefined) {
+      dictionaryPriorityByName[dictionary] = index;
+    }
+  }
+
+  return {
+    profileIndex,
+    scanLength,
+    dictionaries,
+    dictionaryPriorityByName,
+  };
+}
+
+function normalizeFrequencyEntriesWithPriority(
+  rawResult: unknown[],
+  dictionaryPriorityByName: Record<string, number>,
+): YomitanTermFrequency[] {
+  const normalized: YomitanTermFrequency[] = [];
+  for (const entry of rawResult) {
+    const frequency = toYomitanTermFrequency(entry);
+    if (!frequency) {
+      continue;
+    }
+
+    const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
+    normalized.push({
+      ...frequency,
+      dictionaryPriority:
+        dictionaryPriority !== undefined ? dictionaryPriority : frequency.dictionaryPriority,
+    });
+  }
+
+  return normalized;
+}
+
+function groupFrequencyEntriesByPair(
+  entries: YomitanTermFrequency[],
+): Map<string, YomitanTermFrequency[]> {
+  const grouped = new Map<string, YomitanTermFrequency[]>();
+  for (const entry of entries) {
+    const reading =
+      typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
+    const key = makeTermReadingCacheKey(entry.term.trim(), reading);
+    const existing = grouped.get(key);
+    if (existing) {
+      existing.push(entry);
+      continue;
+    }
+    grouped.set(key, [entry]);
+  }
+  return grouped;
+}
+
+function groupFrequencyEntriesByTerm(
+  entries: YomitanTermFrequency[],
+): Map<string, YomitanTermFrequency[]> {
+  const grouped = new Map<string, YomitanTermFrequency[]>();
+  for (const entry of entries) {
+    const term = entry.term.trim();
+    if (!term) {
+      continue;
+    }
+
+    const existing = grouped.get(term);
+    if (existing) {
+      existing.push(entry);
+      continue;
+    }
+    grouped.set(term, [entry]);
+  }
+  return grouped;
+}
+
+async function requestYomitanProfileMetadata(
+  parserWindow: BrowserWindow,
+  logger: LoggerLike,
+): Promise<YomitanProfileMetadata | null> {
+  const cached = yomitanProfileMetadataByWindow.get(parserWindow);
+  if (cached) {
+    return cached;
+  }
+
+  const script = `
+    (async () => {
+      const invoke = (action, params) =>
+        new Promise((resolve, reject) => {
+          chrome.runtime.sendMessage({ action, params }, (response) => {
+            if (chrome.runtime.lastError) {
+              reject(new Error(chrome.runtime.lastError.message));
+              return;
+            }
+            if (!response || typeof response !== "object") {
+              reject(new Error("Invalid response from Yomitan backend"));
+              return;
+            }
+            if (response.error) {
+              reject(new Error(response.error.message || "Yomitan backend error"));
+              return;
+            }
+            resolve(response.result);
+          });
+        });
+
+      const optionsFull = await invoke("optionsGetFull", undefined);
+      const profileIndex =
+        typeof optionsFull.profileCurrent === "number" && Number.isFinite(optionsFull.profileCurrent)
+          ? Math.max(0, Math.floor(optionsFull.profileCurrent))
+          : 0;
+      const scanLengthRaw = optionsFull.profiles?.[profileIndex]?.options?.scanning?.length;
+      const scanLength =
+        typeof scanLengthRaw === "number" && Number.isFinite(scanLengthRaw)
+          ? Math.max(1, Math.floor(scanLengthRaw))
+          : ${DEFAULT_YOMITAN_SCAN_LENGTH};
+      const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
+      const dictionaryEntries = Array.isArray(dictionariesRaw)
+        ? dictionariesRaw
+            .filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
+            .map((entry, index) => ({
+              name: entry.name,
+              id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.max(0, Math.floor(entry.id)) : index
+            }))
+            .sort((a, b) => a.id - b.id)
+        : [];
+      const dictionaries = dictionaryEntries.map((entry) => entry.name);
+      const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
+        acc[entry.name] = index;
+        return acc;
+      }, {});
+
+      return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
+    })();
+  `;
+
+  try {
+    const rawMetadata = await parserWindow.webContents.executeJavaScript(script, true);
+    const metadata = toYomitanProfileMetadata(rawMetadata);
+    if (!metadata) {
+      return null;
+    }
+    yomitanProfileMetadataByWindow.set(parserWindow, metadata);
+    return metadata;
+  } catch (err) {
+    logger.error('Yomitan parser metadata request failed:', (err as Error).message);
+    return null;
+  }
+}
+
 async function ensureYomitanParserWindow(
  deps: YomitanParserRuntimeDeps,
  logger: LoggerLike,
@@ -58,6 +448,7 @@ async function ensureYomitanParserWindow(
    );

    parserWindow.on('closed', () => {
+      clearWindowCaches(parserWindow);
      if (deps.getYomitanParserWindow() === parserWindow) {
        deps.setYomitanParserWindow(null);
        deps.setYomitanParserReadyPromise(null);
@@ -77,6 +468,7 @@ async function ensureYomitanParserWindow(
      if (!parserWindow.isDestroyed()) {
        parserWindow.destroy();
      }
+      clearWindowCaches(parserWindow);
      if (deps.getYomitanParserWindow() === parserWindow) {
        deps.setYomitanParserWindow(null);
        deps.setYomitanParserReadyPromise(null);
@@ -108,7 +500,40 @@ export async function requestYomitanParseResults(
    return null;
  }

-  const script = `
+  const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
+  const script =
+    metadata !== null
+      ? `
+    (async () => {
+      const invoke = (action, params) =>
+        new Promise((resolve, reject) => {
+          chrome.runtime.sendMessage({ action, params }, (response) => {
+            if (chrome.runtime.lastError) {
+              reject(new Error(chrome.runtime.lastError.message));
+              return;
+            }
+            if (!response || typeof response !== "object") {
+              reject(new Error("Invalid response from Yomitan backend"));
+              return;
+            }
+            if (response.error) {
+              reject(new Error(response.error.message || "Yomitan backend error"));
+              return;
+            }
+            resolve(response.result);
+          });
+        });
+
+      return await invoke("parseText", {
+        text: ${JSON.stringify(text)},
+        optionsContext: { index: ${metadata.profileIndex} },
+        scanLength: ${metadata.scanLength},
+        useInternalParser: true,
+        useMecabParser: true
+      });
+    })();
+  `
+      : `
    (async () => {
      const invoke = (action, params) =>
        new Promise((resolve, reject) => {
@@ -132,7 +557,7 @@ export async function requestYomitanParseResults(
      const optionsFull = await invoke("optionsGetFull", undefined);
      const profileIndex = optionsFull.profileCurrent;
      const scanLength =
-        optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? 40;
+        optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? ${DEFAULT_YOMITAN_SCAN_LENGTH};

      return await invoke("parseText", {
        text: ${JSON.stringify(text)},
@@ -152,3 +577,278 @@ export async function requestYomitanParseResults(
    return null;
  }
 }
+
+export async function requestYomitanTermFrequencies(
+  termReadingList: YomitanTermReadingPair[],
+  deps: YomitanParserRuntimeDeps,
+  logger: LoggerLike,
+): Promise<YomitanTermFrequency[]> {
+  const normalizedTermReadingList = normalizeTermReadingList(termReadingList);
+  const yomitanExt = deps.getYomitanExt();
+  if (normalizedTermReadingList.length === 0 || !yomitanExt) {
+    return [];
+  }
+
+  const isReady = await ensureYomitanParserWindow(deps, logger);
+  const parserWindow = deps.getYomitanParserWindow();
+  if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
+    return [];
+  }
+
+  const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
+  const frequencyCache = getWindowFrequencyCache(parserWindow);
+  const missingTermReadingList: YomitanTermReadingPair[] = [];
+
+  const buildCachedResult = (): YomitanTermFrequency[] => {
+    const result: YomitanTermFrequency[] = [];
+    for (const pair of normalizedTermReadingList) {
+      const key = makeTermReadingCacheKey(pair.term, pair.reading);
+      const cached = frequencyCache.get(key);
+      if (cached && cached.length > 0) {
+        result.push(...cached);
+      }
+    }
+    return result;
+  };
+
+  for (const pair of normalizedTermReadingList) {
+    const key = makeTermReadingCacheKey(pair.term, pair.reading);
+    if (!frequencyCache.has(key)) {
+      missingTermReadingList.push(pair);
+    }
+  }
+
+  if (missingTermReadingList.length === 0) {
+    return buildCachedResult();
+  }
+
+  if (metadata && metadata.dictionaries.length > 0) {
+    const script = `
+      (async () => {
+        const invoke = (action, params) =>
+          new Promise((resolve, reject) => {
+            chrome.runtime.sendMessage({ action, params }, (response) => {
+              if (chrome.runtime.lastError) {
+                reject(new Error(chrome.runtime.lastError.message));
+                return;
+              }
+              if (!response || typeof response !== "object") {
+                reject(new Error("Invalid response from Yomitan backend"));
+                return;
+              }
+              if (response.error) {
+                reject(new Error(response.error.message || "Yomitan backend error"));
+                return;
+              }
+              resolve(response.result);
+            });
+          });
+
+        return await invoke("getTermFrequencies", {
+          termReadingList: ${JSON.stringify(missingTermReadingList)},
+          dictionaries: ${JSON.stringify(metadata.dictionaries)}
+        });
+      })();
+    `;
+
+    try {
+      const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
+      const fetchedEntries = Array.isArray(rawResult)
+        ? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
+        : [];
+      const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
+      const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
+      const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
+
+      for (const pair of missingTermReadingList) {
+        const key = makeTermReadingCacheKey(pair.term, pair.reading);
+        const exactEntries = groupedByPair.get(key);
+        const termEntries = groupedByTerm.get(pair.term) ?? [];
+        frequencyCache.set(key, exactEntries ?? termEntries);
+      }
+
+      const cachedResult = buildCachedResult();
+      const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
+      return [...cachedResult, ...unmatchedEntries];
+    } catch (err) {
+      logger.error('Yomitan term frequency request failed:', (err as Error).message);
+    }
+
+    return buildCachedResult();
+  }
+
+  const script = `
+    (async () => {
+      const invoke = (action, params) =>
+        new Promise((resolve, reject) => {
+          chrome.runtime.sendMessage({ action, params }, (response) => {
+            if (chrome.runtime.lastError) {
+              reject(new Error(chrome.runtime.lastError.message));
+              return;
+            }
+            if (!response || typeof response !== "object") {
+              reject(new Error("Invalid response from Yomitan backend"));
+              return;
+            }
+            if (response.error) {
+              reject(new Error(response.error.message || "Yomitan backend error"));
+              return;
+            }
+            resolve(response.result);
+          });
+        });
+
+      const optionsFull = await invoke("optionsGetFull", undefined);
+      const profileIndex = optionsFull.profileCurrent;
+      const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
+      const dictionaryEntries = Array.isArray(dictionariesRaw)
+        ? dictionariesRaw
+            .filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
+            .map((entry, index) => ({
+              name: entry.name,
+              id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.floor(entry.id) : index
+            }))
+            .sort((a, b) => a.id - b.id)
+        : [];
+      const dictionaries = dictionaryEntries.map((entry) => entry.name);
+      const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => {
+        acc[entry.name] = index;
+        return acc;
+      }, {});
+
+      if (dictionaries.length === 0) {
+        return [];
+      }
+
+      const rawFrequencies = await invoke("getTermFrequencies", {
+        termReadingList: ${JSON.stringify(missingTermReadingList)},
+        dictionaries
+      });
+
+      if (!Array.isArray(rawFrequencies)) {
+        return [];
+      }
+
+      return rawFrequencies
+        .filter((entry) => entry && typeof entry === "object")
+        .map((entry) => ({
+          ...entry,
+          dictionaryPriority:
+            typeof entry.dictionary === "string" && dictionaryPriorityByName[entry.dictionary] !== undefined
+              ? dictionaryPriorityByName[entry.dictionary]
+              : Number.MAX_SAFE_INTEGER
+        }));
+    })();
+  `;
+
+  try {
+    const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
+    const fetchedEntries = Array.isArray(rawResult)
+      ? rawResult
+          .map((entry) => toYomitanTermFrequency(entry))
+          .filter((entry): entry is YomitanTermFrequency => entry !== null)
+      : [];
+    const groupedByPair = groupFrequencyEntriesByPair(fetchedEntries);
+    const groupedByTerm = groupFrequencyEntriesByTerm(fetchedEntries);
+    const missingTerms = new Set(missingTermReadingList.map((pair) => pair.term));
+    for (const pair of missingTermReadingList) {
+      const key = makeTermReadingCacheKey(pair.term, pair.reading);
+      const exactEntries = groupedByPair.get(key);
+      const termEntries = groupedByTerm.get(pair.term) ?? [];
+      frequencyCache.set(key, exactEntries ?? termEntries);
+    }
+    const cachedResult = buildCachedResult();
+    const unmatchedEntries = fetchedEntries.filter((entry) => !missingTerms.has(entry.term.trim()));
+    return [...cachedResult, ...unmatchedEntries];
+  } catch (err) {
+    logger.error('Yomitan term frequency request failed:', (err as Error).message);
+    return buildCachedResult();
+  }
+}
+
+export async function syncYomitanDefaultAnkiServer(
+  serverUrl: string,
+  deps: YomitanParserRuntimeDeps,
+  logger: LoggerLike,
+): Promise<boolean> {
+  const normalizedTargetServer = serverUrl.trim();
+  if (!normalizedTargetServer) {
+    return false;
+  }
+
+  const isReady = await ensureYomitanParserWindow(deps, logger);
+  const parserWindow = deps.getYomitanParserWindow();
+  if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
+    return false;
+  }
+
+  const script = `
+    (async () => {
+      const invoke = (action, params) =>
+        new Promise((resolve, reject) => {
+          chrome.runtime.sendMessage({ action, params }, (response) => {
+            if (chrome.runtime.lastError) {
+              reject(new Error(chrome.runtime.lastError.message));
+              return;
+            }
+            if (!response || typeof response !== "object") {
+              reject(new Error("Invalid response from Yomitan backend"));
+              return;
+            }
+            if (response.error) {
+              reject(new Error(response.error.message || "Yomitan backend error"));
+              return;
+            }
+            resolve(response.result);
+          });
+        });
+
+      const targetServer = ${JSON.stringify(normalizedTargetServer)};
+      const optionsFull = await invoke("optionsGetFull", undefined);
+      const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
+      if (profiles.length === 0) {
+        return { updated: false, reason: "no-profiles" };
+      }
+
+      const defaultProfile = profiles[0];
+      if (!defaultProfile || typeof defaultProfile !== "object") {
+        return { updated: false, reason: "invalid-default-profile" };
+      }
+
+      defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object"
+        ? defaultProfile.options
+        : {};
+      defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object"
+        ? defaultProfile.options.anki
+        : {};
+
+      const currentServerRaw = defaultProfile.options.anki.server;
+      const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
+      const canReplaceDefault =
+        currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
+      if (!canReplaceDefault || currentServer === targetServer) {
+        return { updated: false, reason: "no-change", currentServer, targetServer };
+      }
+
+      defaultProfile.options.anki.server = targetServer;
+      await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
+      return { updated: true, currentServer, targetServer };
+    })();
+  `;
+
+  try {
+    const result = await parserWindow.webContents.executeJavaScript(script, true);
+    const updated =
+      typeof result === 'object' &&
+      result !== null &&
+      (result as { updated?: unknown }).updated === true;
+    if (updated) {
+      logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`);
+      return true;
+    }
+    return false;
+  } catch (err) {
+    logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message);
+    return false;
+  }
+}