Fix Yomitan token headword frequency matching and add frequency tests

2026-06-15 03:13:33 -07:00 · 2026-02-16 13:21:19 -08:00
parent e142d2dc3b
commit 0eb2868805
7 changed files with 1586 additions and 80 deletions
@@ -4,6 +4,8 @@
  "description": "All-in-one sentence mining overlay with AnkiConnect and dictionary integration",
  "main": "dist/main.js",
  "scripts": {
    "get-frequency": "bun run scripts/get_frequency.ts",
    "get-frequency:electron": "bun build scripts/get_frequency.ts --format=cjs --target=node --outfile dist/scripts/get_frequency.js --external electron && electron dist/scripts/get_frequency.js",
    "build": "tsc && pnpm run build:renderer && cp src/renderer/index.html src/renderer/style.css dist/renderer/ && bash scripts/build-macos-helper.sh",
    "build:renderer": "esbuild src/renderer/renderer.ts --bundle --platform=browser --format=esm --target=es2022 --outfile=dist/renderer/renderer.js --sourcemap",
    "check:main-lines": "bash scripts/check-main-lines.sh",
@@ -0,0 +1,907 @@
 import fs from "node:fs";
 import path from "node:path";
 import process from "node:process";
 import { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "../src/core/services/tokenizer-service.js";
 import { createFrequencyDictionaryLookupService } from "../src/core/services/frequency-dictionary-service.js";
 import { MecabTokenizer } from "../src/mecab-tokenizer.js";
 import type { MergedToken, FrequencyDictionaryLookup } from "../src/types.js";
 interface CliOptions {
  input: string;
  dictionaryPath: string;
  emitPretty: boolean;
  emitVerbose: boolean;
  mecabCommand?: string;
  mecabDictionaryPath?: string;
  forceMecabOnly?: boolean;
  yomitanExtensionPath?: string;
  yomitanUserDataPath?: string;
  emitColoredLine: boolean;
  colorMode: "single" | "banded";
  colorTopX: number;
  colorSingle: string;
  colorBand1: string;
  colorBand2: string;
  colorBand3: string;
  colorBand4: string;
  colorBand5: string;
  colorKnown: string;
  colorNPlusOne: string;
 }
 function parseCliArgs(argv: string[]): CliOptions {
  const args = [...argv];
  let inputParts: string[] = [];
  let dictionaryPath = path.join(process.cwd(), "vendor", "jiten_freq_global");
  let emitPretty = false;
  let emitVerbose = false;
  let mecabCommand: string | undefined;
  let mecabDictionaryPath: string | undefined;
  let forceMecabOnly = false;
  let yomitanExtensionPath: string | undefined;
  let yomitanUserDataPath: string | undefined;
  let emitColoredLine = false;
  let colorMode: "single" | "banded" = "single";
  let colorTopX = 1000;
  let colorSingle = "#f5a97f";
  let colorBand1 = "#ed8796";
  let colorBand2 = "#f5a97f";
  let colorBand3 = "#f9e2af";
  let colorBand4 = "#a6e3a1";
  let colorBand5 = "#8aadf4";
  let colorKnown = "#a6da95";
  let colorNPlusOne = "#c6a0f6";
  while (args.length > 0) {
    const arg = args.shift();
    if (!arg) break;
    if (arg === "--help" || arg === "-h") {
      printUsage();
      process.exit(0);
    }
    if (arg === "--dictionary") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --dictionary");
      }
      dictionaryPath = path.resolve(next);
      continue;
    }
    if (arg === "--mecab-command") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --mecab-command");
      }
      mecabCommand = next;
      continue;
    }
    if (arg === "--mecab-dictionary") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --mecab-dictionary");
      }
      mecabDictionaryPath = next;
      continue;
    }
    if (arg === "--yomitan-extension") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --yomitan-extension");
      }
      yomitanExtensionPath = path.resolve(next);
      continue;
    }
    if (arg === "--yomitan-user-data") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --yomitan-user-data");
      }
      yomitanUserDataPath = path.resolve(next);
      continue;
    }
    if (arg === "--colorized-line") {
      emitColoredLine = true;
      continue;
    }
    if (arg === "--color-mode") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-mode");
      }
      if (next !== "single" && next !== "banded") {
        throw new Error("--color-mode must be 'single' or 'banded'");
      }
      colorMode = next;
      continue;
    }
    if (arg === "--color-top-x") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-top-x");
      }
      const parsed = Number.parseInt(next, 10);
      if (!Number.isFinite(parsed) || parsed <= 0) {
        throw new Error("--color-top-x must be a positive integer");
      }
      colorTopX = parsed;
      continue;
    }
    if (arg === "--color-single") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-single");
      }
      colorSingle = next;
      continue;
    }
    if (arg === "--color-band-1") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-band-1");
      }
      colorBand1 = next;
      continue;
    }
    if (arg === "--color-band-2") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-band-2");
      }
      colorBand2 = next;
      continue;
    }
    if (arg === "--color-band-3") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-band-3");
      }
      colorBand3 = next;
      continue;
    }
    if (arg === "--color-band-4") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-band-4");
      }
      colorBand4 = next;
      continue;
    }
    if (arg === "--color-band-5") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-band-5");
      }
      colorBand5 = next;
      continue;
    }
    if (arg === "--color-known") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-known");
      }
      colorKnown = next;
      continue;
    }
    if (arg === "--color-n-plus-one") {
      const next = args.shift();
      if (!next) {
        throw new Error("Missing value for --color-n-plus-one");
      }
      colorNPlusOne = next;
      continue;
    }
    if (arg.startsWith("--dictionary=")) {
      dictionaryPath = path.resolve(arg.slice("--dictionary=".length));
      continue;
    }
    if (arg.startsWith("--mecab-command=")) {
      mecabCommand = arg.slice("--mecab-command=".length);
      continue;
    }
    if (arg.startsWith("--mecab-dictionary=")) {
      mecabDictionaryPath = arg.slice("--mecab-dictionary=".length);
      continue;
    }
    if (arg.startsWith("--yomitan-extension=")) {
      yomitanExtensionPath = path.resolve(
        arg.slice("--yomitan-extension=".length),
      );
      continue;
    }
    if (arg.startsWith("--yomitan-user-data=")) {
      yomitanUserDataPath = path.resolve(
        arg.slice("--yomitan-user-data=".length),
      );
      continue;
    }
    if (arg.startsWith("--colorized-line")) {
      emitColoredLine = true;
      continue;
    }
    if (arg.startsWith("--color-mode=")) {
      const value = arg.slice("--color-mode=".length);
      if (value !== "single" && value !== "banded") {
        throw new Error("--color-mode must be 'single' or 'banded'");
      }
      colorMode = value;
      continue;
    }
    if (arg.startsWith("--color-top-x=")) {
      const value = arg.slice("--color-top-x=".length);
      const parsed = Number.parseInt(value, 10);
      if (!Number.isFinite(parsed) || parsed <= 0) {
        throw new Error("--color-top-x must be a positive integer");
      }
      colorTopX = parsed;
      continue;
    }
    if (arg.startsWith("--color-single=")) {
      colorSingle = arg.slice("--color-single=".length);
      continue;
    }
    if (arg.startsWith("--color-band-1=")) {
      colorBand1 = arg.slice("--color-band-1=".length);
      continue;
    }
    if (arg.startsWith("--color-band-2=")) {
      colorBand2 = arg.slice("--color-band-2=".length);
      continue;
    }
    if (arg.startsWith("--color-band-3=")) {
      colorBand3 = arg.slice("--color-band-3=".length);
      continue;
    }
    if (arg.startsWith("--color-band-4=")) {
      colorBand4 = arg.slice("--color-band-4=".length);
      continue;
    }
    if (arg.startsWith("--color-band-5=")) {
      colorBand5 = arg.slice("--color-band-5=".length);
      continue;
    }
    if (arg.startsWith("--color-known=")) {
      colorKnown = arg.slice("--color-known=".length);
      continue;
    }
    if (arg.startsWith("--color-n-plus-one=")) {
      colorNPlusOne = arg.slice("--color-n-plus-one=".length);
      continue;
    }
    if (arg === "--pretty") {
      emitPretty = true;
      continue;
    }
    if (arg === "--verbose") {
      emitVerbose = true;
      continue;
    }
    if (arg === "--force-mecab") {
      forceMecabOnly = true;
      continue;
    }
    if (arg.startsWith("-")) {
      throw new Error(`Unknown flag: ${arg}`);
    }
    inputParts.push(arg);
  }
  const input = inputParts.join(" ").trim();
  if (!input) {
    const stdin = fs.readFileSync(0, "utf8").trim();
    if (!stdin) {
      throw new Error(
        "Please provide input text as arguments or via stdin.",
      );
    }
    return {
      input: stdin,
      dictionaryPath,
      emitPretty,
      emitVerbose,
      forceMecabOnly,
      yomitanExtensionPath,
      yomitanUserDataPath,
      emitColoredLine,
      colorMode,
      colorTopX,
      colorSingle,
      colorBand1,
      colorBand2,
      colorBand3,
      colorBand4,
      colorBand5,
      colorKnown,
      colorNPlusOne,
      mecabCommand,
      mecabDictionaryPath,
    };
  }
  return {
    input,
    dictionaryPath,
    emitPretty,
    emitVerbose,
    forceMecabOnly,
    yomitanExtensionPath,
    yomitanUserDataPath,
    emitColoredLine,
    colorMode,
    colorTopX,
    colorSingle,
    colorBand1,
    colorBand2,
    colorBand3,
    colorBand4,
    colorBand5,
    colorKnown,
    colorNPlusOne,
    mecabCommand,
    mecabDictionaryPath,
  };
  }
 function printUsage(): void {
  process.stdout.write(`Usage:
  pnpm run get-frequency [--pretty] [--verbose] [--dictionary <path>] [--mecab-command <path>] [--mecab-dictionary <path>] <text>
  --pretty               Pretty-print JSON output.
  --verbose               Include merged-frequency diagnostics and lookup terms.
  --force-mecab          Skip Yomitan parser initialization and force MeCab fallback.
  --yomitan-extension <path> Optional path to a Yomitan extension directory.
  --yomitan-user-data <path> Optional Electron userData directory for Yomitan state.
  --colorized-line        Output a terminal-colorized line based on token classification.
  --color-mode <single|banded> Frequency coloring mode (default: single).
  --color-top-x <n>      Frequency color applies when rank <= n (default: 1000).
  --color-single <#hex>  Frequency single-mode color (default: #f5a97f).
  --color-band-1 <#hex>  Frequency band-1 color.
  --color-band-2 <#hex>  Frequency band-2 color.
  --color-band-3 <#hex>  Frequency band-3 color.
  --color-band-4 <#hex>  Frequency band-4 color.
  --color-band-5 <#hex>  Frequency band-5 color.
  --color-known <#hex>    Known-word color (default: #a6da95).
  --color-n-plus-one <#hex> N+1 target color (default: #c6a0f6).
  --dictionary <path>    Frequency dictionary root path (default: ./vendor/jiten_freq_global)
  --mecab-command <path>  Optional MeCab binary path (default: mecab)
  --mecab-dictionary <path> Optional MeCab dictionary directory (default: system default)
  -h, --help            Show usage.
 \n`);
 }
 type FrequencyCandidate = {
  term: string;
  rank: number;
 };
 function getFrequencyLookupTextCandidates(token: MergedToken): string[] {
  const tokenWithCandidates = token as MergedToken & {
    frequencyLookupTerms?: string[];
  };
  const lookupTextCandidates: string[] = [];
  const addLookupText = (text: string | undefined): void => {
    if (!text) {
      return;
    }
    const trimmed = text.trim();
    if (!trimmed) {
      return;
    }
    lookupTextCandidates.push(trimmed);
  };
  if (Array.isArray(tokenWithCandidates.frequencyLookupTerms)) {
    for (const term of tokenWithCandidates.frequencyLookupTerms) {
      addLookupText(term);
    }
  }
  addLookupText(token.headword);
  addLookupText(token.reading);
  addLookupText(token.surface);
  const uniqueLookupTerms: string[] = [];
  const seen = new Set<string>();
  for (const term of lookupTextCandidates) {
    if (seen.has(term)) {
      continue;
    }
    seen.add(term);
    uniqueLookupTerms.push(term);
  }
  return uniqueLookupTerms;
 }
 function getBestFrequencyLookupCandidate(
  token: MergedToken,
  getFrequencyRank: FrequencyDictionaryLookup,
 ): FrequencyCandidate | null {
  const lookupTexts = getFrequencyLookupTextCandidates(token);
  let best: FrequencyCandidate | null = null;
  for (const term of lookupTexts) {
    const rank = getFrequencyRank(term);
    if (typeof rank !== "number" || !Number.isFinite(rank) || rank <= 0) {
      continue;
    }
    if (!best || rank < best.rank) {
      best = { term, rank };
    }
  }
  return best;
 }
 function simplifyToken(token: MergedToken): Record<string, unknown> {
  return {
    surface: token.surface,
    reading: token.reading,
    headword: token.headword,
    startPos: token.startPos,
    endPos: token.endPos,
    partOfSpeech: token.partOfSpeech,
    isMerged: token.isMerged,
    isKnown: token.isKnown,
    isNPlusOneTarget: token.isNPlusOneTarget,
    frequencyRank: token.frequencyRank,
    jlptLevel: token.jlptLevel,
  };
 }
 function simplifyTokenWithVerbose(
  token: MergedToken,
  getFrequencyRank: FrequencyDictionaryLookup,
 ): Record<string, unknown> {
  const tokenWithCandidates = token as MergedToken & {
    frequencyLookupTerms?: string[];
  };
  const frequencyLookupTerms = tokenWithCandidates.frequencyLookupTerms;
  const candidates = getFrequencyLookupTextCandidates(token).map((term) => ({
    term,
    rank: getFrequencyRank(term),
  })).filter((candidate) =>
    typeof candidate.rank === "number" &&
    Number.isFinite(candidate.rank) &&
    candidate.rank > 0
  );
  const bestCandidate = getBestFrequencyLookupCandidate(
    token,
    getFrequencyRank,
  );
  return {
    surface: token.surface,
    reading: token.reading,
    headword: token.headword,
    startPos: token.startPos,
    endPos: token.endPos,
    partOfSpeech: token.partOfSpeech,
    isMerged: token.isMerged,
    isKnown: token.isKnown,
    isNPlusOneTarget: token.isNPlusOneTarget,
    frequencyRank: token.frequencyRank,
    jlptLevel: token.jlptLevel,
    frequencyLookupTerms:
      Array.isArray(frequencyLookupTerms) && frequencyLookupTerms.length > 0
        ? frequencyLookupTerms
        : undefined,
    frequencyCandidates: candidates,
    frequencyBestLookupTerm: bestCandidate?.term ?? null,
    frequencyBestLookupRank: bestCandidate?.rank ?? null,
  };
 }
 interface YomitanRuntimeState {
  yomitanExt: unknown | null;
  parserWindow: unknown | null;
  parserReadyPromise: Promise<void> | null;
  parserInitPromise: Promise<boolean> | null;
  available: boolean;
  note?: string;
 }
 async function createYomitanRuntimeState(
  userDataPath: string,
 ): Promise<YomitanRuntimeState> {
  const state: YomitanRuntimeState = {
    yomitanExt: null,
    parserWindow: null,
    parserReadyPromise: null,
    parserInitPromise: null,
    available: false,
  };
  const electronImport = await import("electron").catch((error) => {
    state.note = error instanceof Error ? error.message : "unknown error";
    return null;
  });
  if (!electronImport || !electronImport.app || !electronImport.app.whenReady) {
    state.note = "electron runtime not available in this process";
    return state;
  }
  try {
    await electronImport.app.whenReady();
    const loadYomitanExtensionService = (
      await import(
        "../src/core/services/yomitan-extension-loader-service.js"
      )
    ).loadYomitanExtensionService as (
      options: {
        userDataPath: string;
        getYomitanParserWindow: () => unknown;
        setYomitanParserWindow: (window: unknown) => void;
        setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
        setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
        setYomitanExtension: (extension: unknown) => void;
      },
    ) => Promise<unknown>;
    const extension = await loadYomitanExtensionService({
      userDataPath,
      getYomitanParserWindow: () => state.parserWindow,
      setYomitanParserWindow: (window) => {
        state.parserWindow = window;
      },
      setYomitanParserReadyPromise: (promise) => {
        state.parserReadyPromise = promise;
      },
      setYomitanParserInitPromise: (promise) => {
        state.parserInitPromise = promise;
      },
      setYomitanExtension: (extension) => {
        state.yomitanExt = extension;
      },
    });
    if (!extension) {
      state.note = "yomitan extension is not available";
      return state;
    }
    state.yomitanExt = extension;
    state.available = true;
    return state;
  } catch (error) {
    state.note =
      error instanceof Error
        ? error.message
        : "failed to initialize yomitan extension";
    return state;
  }
 }
 async function createYomitanRuntimeStateWithSearch(
  userDataPath: string,
  extensionPath?: string,
 ): Promise<YomitanRuntimeState> {
  const preferredPath = extensionPath
    ? path.resolve(extensionPath)
    : undefined;
  const defaultVendorPath = path.resolve(process.cwd(), "vendor", "yomitan");
  const candidates = [
    ...(preferredPath ? [preferredPath] : []),
    defaultVendorPath,
  ];
  for (const candidate of candidates) {
    if (!candidate) {
      continue;
    }
    try {
      if (fs.existsSync(path.join(candidate, "manifest.json"))) {
        const state = await createYomitanRuntimeState(userDataPath);
        if (state.available) {
          return state;
        }
        if (!state.note) {
          state.note = `Failed to load yomitan extension at ${candidate}`;
        }
        return state;
      }
    } catch {
      continue;
    }
  }
  return createYomitanRuntimeState(userDataPath);
 }
 async function getFrequencyLookup(dictionaryPath: string): Promise<FrequencyDictionaryLookup> {
  return createFrequencyDictionaryLookupService({
    searchPaths: [dictionaryPath],
    log: (message) => {
      // Keep script output pure JSON by default
      if (process.env.DEBUG_FREQUENCY === "1") {
        console.error(message);
      }
    },
  });
 }
 const ANSI_RESET = "\u001b[0m";
 const ANSI_FG_PREFIX = "\u001b[38;2";
 const HEX_COLOR_PATTERN = /^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{6})$/;
 function parseHexRgb(input: string): [number, number, number] | null {
  const normalized = input.trim().replace(/^#/, "");
  if (!HEX_COLOR_PATTERN.test(`#${normalized}`)) {
    return null;
  }
  const expanded = normalized.length === 3
    ? normalized.split("").map((char) => `${char}${char}`).join("")
    : normalized;
  const r = Number.parseInt(expanded.substring(0, 2), 16);
  const g = Number.parseInt(expanded.substring(2, 4), 16);
  const b = Number.parseInt(expanded.substring(4, 6), 16);
  if (
    !Number.isFinite(r) ||
    !Number.isFinite(g) ||
    !Number.isFinite(b)
  ) {
    return null;
  }
  return [r, g, b];
 }
 function wrapWithForeground(text: string, color: string): string {
  const rgb = parseHexRgb(color);
  if (!rgb) {
    return text;
  }
  return `${ANSI_FG_PREFIX};${rgb[0]};${rgb[1]};${rgb[2]}m${text}${ANSI_RESET}`;
 }
 function getBandColor(
  rank: number,
  colorTopX: number,
  colorMode: "single" | "banded",
  colorSingle: string,
  bandedColors: [string, string, string, string, string],
 ): string {
  const topX = Math.max(1, Math.floor(colorTopX));
  const safeRank = Math.max(1, Math.floor(rank));
  if (safeRank > topX) {
    return "";
  }
  if (colorMode === "single") {
    return colorSingle;
  }
  const normalizedBand = Math.ceil((safeRank / topX) * bandedColors.length);
  const band = Math.min(bandedColors.length, Math.max(1, normalizedBand));
  return bandedColors[band - 1];
 }
 function getTokenColor(token: MergedToken, args: CliOptions): string {
  if (token.isNPlusOneTarget) {
    return args.colorNPlusOne;
  }
  if (token.isKnown) {
    return args.colorKnown;
  }
  if (typeof token.frequencyRank === "number" && Number.isFinite(token.frequencyRank)) {
    return getBandColor(
      token.frequencyRank,
      args.colorTopX,
      args.colorMode,
      args.colorSingle,
      [args.colorBand1, args.colorBand2, args.colorBand3, args.colorBand4, args.colorBand5],
    );
  }
  return "";
 }
 function renderColoredLine(
  text: string,
  tokens: MergedToken[],
  args: CliOptions,
 ): string {
  if (!args.emitColoredLine) {
    return text;
  }
  if (tokens.length === 0) {
    return text;
  }
  const ordered = [...tokens].sort((a, b) => {
    const aStart = a.startPos ?? 0;
    const bStart = b.startPos ?? 0;
    if (aStart !== bStart) {
      return aStart - bStart;
    }
    return (a.endPos ?? a.surface.length) - (b.endPos ?? b.surface.length);
  });
  let cursor = 0;
  let output = "";
  for (const token of ordered) {
    const start = token.startPos ?? 0;
    const end = token.endPos ?? (token.startPos ? token.startPos + token.surface.length : token.surface.length);
    if (start < 0 || end < 0 || end < start) {
      continue;
    }
    const safeStart = Math.min(Math.max(0, start), text.length);
    const safeEnd = Math.min(Math.max(safeStart, end), text.length);
    if (safeStart > cursor) {
      output += text.slice(cursor, safeStart);
    }
    const tokenText = text.slice(safeStart, safeEnd);
    const color = getTokenColor(token, args);
    output += color ? wrapWithForeground(tokenText, color) : tokenText;
    cursor = safeEnd;
  }
  if (cursor < text.length) {
    output += text.slice(cursor);
  }
  return output;
 }
 async function main(): Promise<void> {
  const args = parseCliArgs(process.argv.slice(2));
  const getFrequencyRank = await getFrequencyLookup(args.dictionaryPath);
  const mecabTokenizer = new MecabTokenizer({
    mecabCommand: args.mecabCommand,
    dictionaryPath: args.mecabDictionaryPath,
  });
  const isMecabAvailable = await mecabTokenizer.checkAvailability();
  if (!isMecabAvailable) {
    throw new Error(
      "MeCab is not available on this system. Install/run environment with MeCab to tokenize input.",
    );
  }
  const app = await import("electron").catch(() => null);
  if (app && args.yomitanUserDataPath) {
    app.app.setPath("userData", args.yomitanUserDataPath);
  }
  const yomitanState =
    !args.forceMecabOnly
      ? await createYomitanRuntimeStateWithSearch(
          app?.app?.getPath ? app.app.getPath("userData") : process.cwd(),
          args.yomitanExtensionPath,
        )
      : null;
  const hasYomitan = Boolean(yomitanState?.available && yomitanState?.yomitanExt);
  const deps = createTokenizerDepsRuntimeService({
    getYomitanExt: () =>
      (hasYomitan ? yomitanState!.yomitanExt : null) as never,
    getYomitanParserWindow: () =>
      (hasYomitan ? yomitanState!.parserWindow : null) as never,
    setYomitanParserWindow: (window) => {
      if (!hasYomitan) {
        return;
      }
      yomitanState!.parserWindow = window;
    },
    getYomitanParserReadyPromise: () =>
      (hasYomitan ? yomitanState!.parserReadyPromise : null) as never,
    setYomitanParserReadyPromise: (promise) => {
      if (!hasYomitan) {
        return;
      }
      yomitanState!.parserReadyPromise = promise;
    },
    getYomitanParserInitPromise: () =>
      (hasYomitan ? yomitanState!.parserInitPromise : null) as never,
    setYomitanParserInitPromise: (promise) => {
      if (!hasYomitan) {
        return;
      }
      yomitanState!.parserInitPromise = promise;
    },
    isKnownWord: () => false,
    getKnownWordMatchMode: () => "headword",
    getJlptLevel: () => null,
    getFrequencyDictionaryEnabled: () => true,
    getFrequencyRank,
    getMecabTokenizer: () => ({
      tokenize: (text: string) => mecabTokenizer.tokenize(text),
    }),
  });
  const subtitleData = await tokenizeSubtitleService(args.input, deps);
  const tokenCount = subtitleData.tokens?.length ?? 0;
  const mergedCount = subtitleData.tokens?.filter((token) => token.isMerged).length ?? 0;
  const hasYomitanCandidates = Boolean(
    subtitleData.tokens?.some((token) => {
      const frequencyLookupTerms = (
        token as MergedToken & { frequencyLookupTerms?: string[] }
      ).frequencyLookupTerms;
      return Array.isArray(frequencyLookupTerms) && frequencyLookupTerms.length > 0;
    }) ?? false,
  );
  const tokens =
    subtitleData.tokens?.map((token) =>
      args.emitVerbose
        ? simplifyTokenWithVerbose(token, getFrequencyRank)
        : simplifyToken(token),
    ) ?? null;
  const diagnostics = {
    yomitan: {
      available: Boolean(yomitanState?.available),
      loaded: hasYomitan,
      forceMecabOnly: args.forceMecabOnly,
      note: yomitanState?.note ?? null,
    },
    mecab: {
      command: args.mecabCommand ?? "mecab",
      dictionaryPath: args.mecabDictionaryPath ?? null,
      available: isMecabAvailable,
    },
    tokenizer: {
      sourceHint:
        tokenCount === 0
          ? "none"
          : hasYomitan ? "yomitan-merged" : "mecab-merge",
      mergedTokenCount: mergedCount,
      totalTokenCount: tokenCount,
    },
  };
  if (tokens === null) {
    diagnostics.mecab["status"] = "no-tokens";
    diagnostics.mecab["note"] =
      "MeCab returned no parseable tokens. This is often caused by a missing/invalid MeCab dictionary path.";
  } else {
    diagnostics.mecab["status"] = "ok";
  }
  const output = {
    input: args.input,
    tokenizerText: subtitleData.text,
    tokens,
    diagnostics,
  };
  const json = JSON.stringify(output, null, args.emitPretty ? 2 : undefined);
  process.stdout.write(`${json}\n`);
  if (args.emitColoredLine && subtitleData.tokens) {
    const coloredLine = renderColoredLine(subtitleData.text, subtitleData.tokens, args);
    process.stdout.write(`${coloredLine}\n`);
  }
 }
 main().catch((error) => {
  console.error(`Error: ${(error as Error).message}`);
  process.exit(1);
 });
@@ -228,6 +228,223 @@ test("tokenizeSubtitleService applies frequency dictionary ranks", async () => {
  assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
 });
 test("tokenizeSubtitleService uses all Yomitan headword candidates for frequency lookup", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫です",
                    reading: "ねこです",
                    headwords: [
                      [{ term: "猫です" }],
                      [{ term: "猫" }],
                    ],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "猫" ? 40 : text === "猫です" ? 1200 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, 40);
 });
 test("tokenizeSubtitleService prefers exact headword frequency over surface/reading when available", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫",
                    reading: "ねこ",
                    headwords: [[{ term: "ネコ" }]],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "猫" ? 1200 : text === "ネコ" ? 8 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, 8);
 });
 test("tokenizeSubtitleService keeps no frequency when only reading matches and headword candidates miss", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫",
                    reading: "ねこ",
                    headwords: [[{ term: "猫です" }]],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "ねこ" ? 77 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
 });
 test("tokenizeSubtitleService ignores invalid frequency ranks and takes best valid headword candidate", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫です",
                    reading: "ねこです",
                    headwords: [
                      [{ term: "猫" }],
                      [{ term: "猫です" }],
                    ],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "猫" ? Number.NaN : text === "猫です" ? 500 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, 500);
 });
 test("tokenizeSubtitleService handles real-word frequency candidates and prefers most frequent term", async () => {
  const result = await tokenizeSubtitleService(
    "昨日",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "昨日",
                    reading: "きのう",
                    headwords: [
                      [{ term: "昨日" }],
                      [{ term: "きのう" }],
                    ],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "きのう" ? 120 : text === "昨日" ? 40 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, 40);
 });
 test("tokenizeSubtitleService ignores candidates with no dictionary rank when higher-frequency candidate exists", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getFrequencyDictionaryEnabled: () => true,
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫",
                    reading: "ねこ",
                    headwords: [
                      [{ term: "猫" }],
                      [{ term: "猫です" }],
                      [{ term: "unknown-term" }],
                    ],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyRank: (text) => (text === "unknown-term" ? -1 : text === "猫" ? 88 : text === "猫です" ? 9000 : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.frequencyRank, 88);
 });
 test("tokenizeSubtitleService ignores frequency lookup failures", async () => {
  const result = await tokenizeSubtitleService(
    "猫",
@@ -557,10 +774,147 @@ test("tokenizeSubtitleService uses Yomitan parser result when available", async
  );
  assert.equal(result.text, "猫です");
-  assert.equal(result.tokens?.length, 1);
+  assert.equal(result.tokens?.length, 2);
-  assert.equal(result.tokens?.[0]?.surface, "猫です");
+  assert.equal(result.tokens?.[0]?.surface, "猫");
-  assert.equal(result.tokens?.[0]?.reading, "ねこです");
+  assert.equal(result.tokens?.[0]?.reading, "ねこ");
  assert.equal(result.tokens?.[0]?.isKnown, false);
  assert.equal(result.tokens?.[1]?.surface, "です");
  assert.equal(result.tokens?.[1]?.reading, "です");
  assert.equal(result.tokens?.[1]?.isKnown, false);
 });
 test("tokenizeSubtitleService prefers mecab parser tokens when scanning parser returns one token", async () => {
  const result = await tokenizeSubtitleService(
    "俺は小園にいきたい",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "俺は小園にいきたい",
                    reading: "おれは小園にいきたい",
                    headwords: [[{ term: "俺は小園にいきたい" }]],
                  },
                ],
              ],
            },
            {
              source: "mecab",
              index: 0,
              content: [
                [{ text: "俺", reading: "おれ", headwords: [[{ term: "俺" }]] }],
                [{ text: "は", reading: "は", headwords: [[{ term: "は" }]] }],
                [{ text: "小園", reading: "おうえん", headwords: [[{ term: "小園" }]] }],
                [{ text: "に", reading: "に", headwords: [[{ term: "に" }]] }],
                [{ text: "いきたい", reading: "いきたい", headwords: [[{ term: "いきたい" }]] }],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyDictionaryEnabled: () => true,
      tokenizeWithMecab: async () => null,
      getFrequencyRank: (text) =>
        text === "小園" ? 25 : text === "いきたい" ? 1500 : null,
    }),
  );
  assert.equal(result.tokens?.length, 5);
  assert.equal(result.tokens?.map((token) => token.surface).join(","), "俺,は,小園,に,いきたい");
  assert.equal(result.tokens?.[2]?.surface, "小園");
  assert.equal(result.tokens?.[2]?.frequencyRank, 25);
 });
 test("tokenizeSubtitleService keeps scanning parser tokens when they are already split", async () => {
  const result = await tokenizeSubtitleService(
    "小園に行きたい",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [{ text: "小園", reading: "おうえん", headwords: [[{ term: "小園" }]] }],
                [{ text: "に", reading: "に", headwords: [[{ term: "に" }]] }],
                [{ text: "行きたい", reading: "いきたい", headwords: [[{ term: "行きたい" }]] }],
              ],
            },
            {
              source: "mecab",
              index: 0,
              content: [
                [{ text: "小", reading: "お", headwords: [[{ term: "小" }]] }],
                [{ text: "園", reading: "えん", headwords: [[{ term: "園" }]] }],
                [{ text: "に", reading: "に", headwords: [[{ term: "に" }]] }],
                [{ text: "行き", reading: "いき", headwords: [[{ term: "行き" }]] }],
                [{ text: "たい", reading: "たい", headwords: [[{ term: "たい" }]] }],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyDictionaryEnabled: () => true,
      getFrequencyRank: (text) => (text === "小園" ? 20 : null),
      tokenizeWithMecab: async () => null,
    }),
  );
  assert.equal(result.tokens?.length, 3);
  assert.equal(
    result.tokens?.map((token) => token.surface).join(","),
    "小園,に,行きたい",
  );
  assert.equal(result.tokens?.[0]?.frequencyRank, 20);
  assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
  assert.equal(result.tokens?.[2]?.frequencyRank, undefined);
 });
 test("tokenizeSubtitleService still assigns frequency to non-known Yomitan tokens", async () => {
  const result = await tokenizeSubtitleService(
    "小園に",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  { text: "小園", reading: "おうえん", headwords: [[{ term: "小園" }]] },
                ],
                [
                  { text: "に", reading: "に", headwords: [[{ term: "に" }]] },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      getFrequencyDictionaryEnabled: () => true,
      getFrequencyRank: (text) => (text === "小園" ? 75 : text === "に" ? 3000 : null),
      isKnownWord: (text) => text === "小園",
    }),
  );
  assert.equal(result.tokens?.length, 2);
  assert.equal(result.tokens?.[0]?.isKnown, true);
  assert.equal(result.tokens?.[0]?.frequencyRank, 75);
  assert.equal(result.tokens?.[1]?.isKnown, false);
  assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
 });
 test("tokenizeSubtitleService marks tokens as known using callback", async () => {
@@ -589,6 +943,63 @@ test("tokenizeSubtitleService marks tokens as known using callback", async () =>
  assert.equal(result.tokens?.[0]?.isKnown, true);
 });
 test("tokenizeSubtitleService still assigns frequency rank to non-known tokens", async () => {
  const result = await tokenizeSubtitleService(
    "既知未知",
    makeDeps({
      tokenizeWithMecab: async () => [
        {
          surface: "既知",
          reading: "キチ",
          partOfSpeech: PartOfSpeech.noun,
          pos1: "",
          pos2: "",
          pos3: "",
          pos4: "",
          inflectionType: "",
          inflectionForm: "",
          headword: "既知",
          katakanaReading: "キチ",
          pronunciation: "キチ",
          startPos: 0,
          endPos: 2,
          isMerged: false,
          isKnown: false,
          isNPlusOneTarget: false,
        },
        {
          surface: "未知",
          reading: "ミチ",
          partOfSpeech: PartOfSpeech.noun,
          pos1: "",
          pos2: "",
          pos3: "",
          pos4: "",
          inflectionType: "",
          inflectionForm: "",
          headword: "未知",
          katakanaReading: "ミチ",
          pronunciation: "ミチ",
          startPos: 2,
          endPos: 4,
          isMerged: false,
          isKnown: false,
          isNPlusOneTarget: false,
        },
      ],
      getFrequencyDictionaryEnabled: () => true,
      getFrequencyRank: (text) => (text === "既知" ? 20 : text === "未知" ? 30 : null),
      isKnownWord: (text) => text === "既知",
    }),
  );
  assert.equal(result.tokens?.length, 2);
  assert.equal(result.tokens?.[0]?.isKnown, true);
  assert.equal(result.tokens?.[0]?.frequencyRank, 20);
  assert.equal(result.tokens?.[1]?.isKnown, false);
  assert.equal(result.tokens?.[1]?.frequencyRank, 30);
 });
 test("tokenizeSubtitleService selects one N+1 target token", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
@@ -1,4 +1,4 @@
-import { BrowserWindow, Extension, session } from "electron";
+import type { BrowserWindow, Extension } from "electron";
 import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
 import {
  JlptLevel,
@@ -252,20 +252,67 @@ function resolveFrequencyLookupText(token: MergedToken): string {
  return token.surface;
 }
 function getFrequencyLookupTextCandidates(token: MergedToken): string[] {
  const tokenWithCandidates = token as MergedToken & {
    frequencyLookupTerms?: string[];
  };
  const lookupTextCandidates: string[] = [];
  const addLookupText = (text: string | undefined): void => {
    if (!text) {
      return;
    }
    const trimmed = text.trim();
    if (!trimmed) {
      return;
    }
    lookupTextCandidates.push(trimmed);
  };
  if (Array.isArray(tokenWithCandidates.frequencyLookupTerms)) {
    for (const term of tokenWithCandidates.frequencyLookupTerms) {
      addLookupText(term);
    }
  }
  addLookupText(resolveFrequencyLookupText(token));
  const uniqueLookupTerms: string[] = [];
  const seen = new Set<string>();
  for (const term of lookupTextCandidates) {
    if (seen.has(term)) {
      continue;
    }
    seen.add(term);
    uniqueLookupTerms.push(term);
  }
  return uniqueLookupTerms;
 }
 function applyFrequencyMarking(
  tokens: MergedToken[],
  getFrequencyRank: FrequencyDictionaryLookup,
 ): MergedToken[] {
  return tokens.map((token) => {
-    const lookupText = resolveFrequencyLookupText(token);
+    const lookupTexts = getFrequencyLookupTextCandidates(token);
-    if (!lookupText) {
+    if (lookupTexts.length === 0) {
      return { ...token, frequencyRank: undefined };
    }
-    const rank = getCachedFrequencyRank(lookupText, getFrequencyRank);
+    let bestRank: number | null = null;
    for (const lookupText of lookupTexts) {
      const rank = getCachedFrequencyRank(lookupText, getFrequencyRank);
      if (rank === null) {
        continue;
      }
      if (bestRank === null || rank < bestRank) {
        bestRank = rank;
      }
    }
    return {
      ...token,
-      frequencyRank: rank ?? undefined,
+      frequencyRank: bestRank ?? undefined,
    };
  });
 }
@@ -397,7 +444,7 @@ function isYomitanParseResultItem(
  if (!isObject(value)) {
    return false;
  }
-  if ((value as YomitanParseResultItem).source !== "scanning-parser") {
+  if (!isString((value as YomitanParseResultItem).source)) {
    return false;
  }
  if (!Array.isArray((value as YomitanParseResultItem).content)) {
@@ -452,6 +499,27 @@ function extractYomitanHeadword(segment: YomitanParseSegment): string {
  return "";
 }
 function extractYomitanHeadwords(segment: YomitanParseSegment): string[] {
  const headwords = segment.headwords;
  if (!isYomitanHeadwordRows(headwords)) {
    return [];
  }
  const results: string[] = [];
  for (const group of headwords) {
    for (const candidate of group) {
      if (isString(candidate.term)) {
        const term = candidate.term.trim();
        if (term.length > 0) {
          results.push(term);
        }
      }
    }
  }
  return results;
 }
 function applyJlptMarking(
  tokens: MergedToken[],
  getJlptLevel: (text: string) => JlptLevel | null,
@@ -475,29 +543,27 @@ function applyJlptMarking(
  });
 }
-function mapYomitanParseResultsToMergedTokens(
+interface YomitanParseCandidate {
-  parseResults: unknown,
+  source: string;
  index: number;
  tokens: MergedToken[];
 }
 function mapYomitanParseResultItemToMergedTokens(
  parseResult: YomitanParseResultItem,
  isKnownWord: (text: string) => boolean,
  knownWordMatchMode: NPlusOneMatchMode,
-): MergedToken[] | null {
+): YomitanParseCandidate | null {
-  if (!Array.isArray(parseResults) || parseResults.length === 0) {
+  const content = parseResult.content;
  if (!Array.isArray(content) || content.length === 0) {
    return null;
  }
-  const scanningItems = parseResults.filter(
+  const source = String(parseResult.source ?? "");
-    (item): item is YomitanParseResultItem => isYomitanParseResultItem(item),
+  const index =
-  );
+    typeof parseResult.index === "number" && Number.isInteger(parseResult.index)
-
+      ? parseResult.index
-  if (scanningItems.length === 0) {
+      : 0;
    return null;
  }
  const primaryItem =
    scanningItems.find((item) => item.index === 0) || scanningItems[0];
  const content = primaryItem.content;
  if (!Array.isArray(content)) {
    return null;
  }
  const tokens: MergedToken[] = [];
  let charOffset = 0;
@@ -509,60 +575,117 @@ function mapYomitanParseResultsToMergedTokens(
    }
    validLineCount += 1;
    let surface = "";
    let reading = "";
    let headword = "";
    for (const segment of line) {
      const segmentText = segment.text;
      if (!segmentText || segmentText.length === 0) {
        continue;
      }
-      surface += segmentText;
+      const start = charOffset;
      const end = start + segmentText.length;
      charOffset = end;
-      if (typeof segment.reading === "string") {
+      const headword = extractYomitanHeadword(segment) || segmentText;
-        reading += segment.reading;
+      const frequencyLookupTerms = extractYomitanHeadwords(segment);
      }
-      if (!headword) {
+      tokens.push({
-        headword = extractYomitanHeadword(segment);
+        surface: segmentText,
-      }
+        reading: typeof segment.reading === "string" ? segment.reading : "",
        headword,
        startPos: start,
        endPos: end,
        partOfSpeech: PartOfSpeech.other,
        pos1: "",
        isMerged: true,
        isNPlusOneTarget: false,
        isKnown: (() => {
          const matchText = resolveKnownWordText(
            segmentText,
            headword,
            knownWordMatchMode,
          );
          return matchText ? isKnownWord(matchText) : false;
        })(),
        frequencyLookupTerms:
          frequencyLookupTerms.length > 0 ? frequencyLookupTerms : undefined,
      });
    }
    if (!surface) {
      continue;
    }
    const start = charOffset;
    const end = start + surface.length;
    charOffset = end;
    tokens.push({
      surface,
      reading,
      headword: headword || surface,
      startPos: start,
      endPos: end,
      partOfSpeech: PartOfSpeech.other,
      pos1: "",
      isMerged: true,
      isNPlusOneTarget: false,
      isKnown: (() => {
        const matchText = resolveKnownWordText(
          surface,
          headword,
          knownWordMatchMode,
        );
        return matchText ? isKnownWord(matchText) : false;
      })(),
    });
  }
-  if (validLineCount === 0) {
+  if (validLineCount === 0 || tokens.length === 0) {
    return null;
  }
-  return tokens.length > 0 ? tokens : null;
+
  return { source, index, tokens };
 }
 function selectBestYomitanParseCandidate(
  candidates: YomitanParseCandidate[],
 ): MergedToken[] | null {
  if (candidates.length === 0) {
    return null;
  }
  const scanningCandidates = candidates.filter(
    (candidate) => candidate.source === "scanning-parser",
  );
  const mecabCandidates = candidates.filter(
    (candidate) => candidate.source === "mecab",
  );
  const getBestByTokenCount = (
    items: YomitanParseCandidate[],
  ): YomitanParseCandidate | null => items.length === 0
    ? null
    : items.reduce((best, current) =>
      current.tokens.length > best.tokens.length ? current : best,
    );
  if (scanningCandidates.length > 0) {
    const bestScanning = getBestByTokenCount(scanningCandidates);
    if (bestScanning && bestScanning.tokens.length > 1) {
      return bestScanning.tokens;
    }
    const bestMecab = getBestByTokenCount(mecabCandidates);
    if (
      bestMecab &&
      bestMecab.tokens.length > (bestScanning?.tokens.length ?? 0)
    ) {
      return bestMecab.tokens;
    }
    return bestScanning ? bestScanning.tokens : null;
  }
  const bestCandidate = getBestByTokenCount(candidates);
  return bestCandidate ? bestCandidate.tokens : null;
 }
 function mapYomitanParseResultsToMergedTokens(
  parseResults: unknown,
  isKnownWord: (text: string) => boolean,
  knownWordMatchMode: NPlusOneMatchMode,
 ): MergedToken[] | null {
  if (!Array.isArray(parseResults) || parseResults.length === 0) {
    return null;
  }
  const candidates = parseResults
    .filter((item): item is YomitanParseResultItem =>
      isYomitanParseResultItem(item),
    )
    .map((item) =>
      mapYomitanParseResultItemToMergedTokens(
        item,
        isKnownWord,
        knownWordMatchMode,
      ),
    )
    .filter((candidate): candidate is YomitanParseCandidate => candidate !== null);
  const bestCandidate = selectBestYomitanParseCandidate(candidates);
  return bestCandidate;
 }
 function pickClosestMecabPos1(
@@ -664,6 +787,7 @@ async function enrichYomitanPos1(
 async function ensureYomitanParserWindow(
  deps: TokenizerServiceDeps,
 ): Promise<boolean> {
  const electron = await import("electron");
  const yomitanExt = deps.getYomitanExt();
  if (!yomitanExt) {
    return false;
@@ -680,6 +804,7 @@ async function ensureYomitanParserWindow(
  }
  const initPromise = (async () => {
    const { BrowserWindow, session } = electron;
    const parserWindow = new BrowserWindow({
      show: false,
      width: 800,
@@ -786,7 +911,7 @@ async function parseWithYomitanInternalParser(
        optionsContext: { index: profileIndex },
        scanLength,
        useInternalParser: true,
-        useMecabParser: false
+        useMecabParser: true
      });
    })();
  `;
@@ -86,14 +86,29 @@ export function parseMecabLine(line: string): Token | null {
  };
 }
 export interface MecabTokenizerOptions {
  mecabCommand?: string;
  dictionaryPath?: string;
 }
 export class MecabTokenizer {
  private mecabPath: string | null = null;
  private mecabCommand: string;
  private dictionaryPath: string | null;
  private available: boolean = false;
  private enabled: boolean = true;
  constructor(options: MecabTokenizerOptions = {}) {
    this.mecabCommand = options.mecabCommand?.trim() || "mecab";
    this.dictionaryPath = options.dictionaryPath?.trim() || null;
  }
  async checkAvailability(): Promise<boolean> {
    try {
-      const result = execSync("which mecab", { encoding: "utf-8" }).trim();
+      const command = this.mecabCommand;
      const result = command.includes("/")
        ? command
        : execSync(`which ${command}`, { encoding: "utf-8" }).trim();
      if (result) {
        this.mecabPath = result;
        this.available = true;
@@ -114,7 +129,11 @@ export class MecabTokenizer {
    }
    return new Promise((resolve) => {
-      const mecab = spawn("mecab", [], {
+      const mecabArgs: string[] = [];
      if (this.dictionaryPath) {
        mecabArgs.push("-d", this.dictionaryPath);
      }
      const mecab = spawn(this.mecabPath ?? this.mecabCommand, mecabArgs, {
        stdio: ["pipe", "pipe", "pipe"],
      });
@@ -149,6 +168,21 @@ export class MecabTokenizer {
          }
        }
        if (tokens.length === 0 && text.trim().length > 0) {
          const trimmedStdout = stdout.trim();
          const trimmedStderr = stderr.trim();
          if (trimmedStdout) {
            log.warn(
              "MeCab returned no parseable tokens.",
              `command=${this.mecabPath ?? this.mecabCommand}`,
              `stdout=${trimmedStdout.slice(0, 1024)}`,
            );
          }
          if (trimmedStderr) {
            log.warn("MeCab stderr while tokenizing:", trimmedStderr);
          }
        }
        resolve(tokens);
      });
@@ -23,15 +23,35 @@ function createToken(overrides: Partial<MergedToken>): MergedToken {
 }
 function extractClassBlock(cssText: string, selector: string): string {
-  const start = cssText.indexOf(selector);
+  const ruleRegex = /([^{}]+)\{([^}]*)\}/g;
-  if (start < 0) return "";
+  let match: RegExpExecArray | null = null;
  let fallbackBlock = "";
-  const openBrace = cssText.indexOf("{", start);
+  while ((match = ruleRegex.exec(cssText)) !== null) {
-  if (openBrace < 0) return "";
+    const selectorsBlock = match[1]?.trim() ?? "";
-  const closeBrace = cssText.indexOf("}", openBrace);
+    const selectorBlock = match[2] ?? "";
  if (closeBrace < 0) return "";
-  return cssText.slice(openBrace + 1, closeBrace);
+    const selectors = selectorsBlock
      .split(",")
      .map((entry) => entry.trim())
      .filter((entry) => entry.length > 0);
    if (selectors.includes(selector)) {
      if (selectors.length === 1) {
        return selectorBlock;
      }
      if (!fallbackBlock) {
        fallbackBlock = selectorBlock;
      }
    }
  }
  if (fallbackBlock) {
    return fallbackBlock;
  }
  return "";
 }
 test("computeWordClass preserves known and n+1 classes while adding JLPT classes", () => {
@@ -173,10 +193,16 @@ test("computeWordClass uses configured band count for banded mode", () => {
    topX: 4,
    mode: "banded",
    singleColor: "#000000",
-    bandedColors: ["#111111", "#222222", "#333333"] as any,
+    bandedColors: [
      "#111111",
      "#222222",
      "#333333",
      "#444444",
      "#555555",
    ],
  } as any);
-  assert.equal(actual, "word word-frequency-band-1");
+  assert.equal(actual, "word word-frequency-band-3");
 });
 test("computeWordClass skips frequency class when rank is out of topX", () => {
@@ -56,6 +56,7 @@ export interface MergedToken {
  isNPlusOneTarget: boolean;
  jlptLevel?: JlptLevel;
  frequencyRank?: number;
  frequencyLookupTerms?: string[];
 }
 export type FrequencyDictionaryLookup = (term: string) => number | null;