Merge pull request #2 from ksyasuda/add-jlpt-tagging

Add opt-in JLPT tagging flow
2026-06-15 03:13:33 -07:00 · 2026-02-15 17:30:22 -08:00
parent af1200b8d7 20f5de1cf7
commit 396fde3011
32 changed files with 1160 additions and 126 deletions
@@ -1,63 +0,0 @@
 name: Docs
 on:
  push:
    branches: [main]
    paths:
      - 'docs/**'
      - '.github/workflows/docs.yml'
      - 'package.json'
      - 'pnpm-lock.yaml'
  workflow_dispatch:
 permissions:
  contents: read
  pages: write
  id-token: write
 concurrency:
  group: pages
  cancel-in-progress: true
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Setup pnpm
        uses: pnpm/action-setup@v4
        with:
          version: 9
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: pnpm
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
      - name: Build docs
        run: pnpm run docs:build
      - name: Setup Pages
        uses: actions/configure-pages@v5
      - name: Upload artifact
        uses: actions/upload-pages-artifact@v3
        with:
          path: docs/.vitepress/dist
  deploy:
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
    runs-on: ubuntu-latest
    needs: build
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
        uses: actions/deploy-pages@v4
@@ -2,3 +2,6 @@
 	path = vendor/texthooker-ui
 	url = https://github.com/ksyasuda/texthooker-ui.git
 	branch = subminer
 [submodule "vendor/yomitan-jlpt-vocab"]
 	path = vendor/yomitan-jlpt-vocab
 	url = https://github.com/stephenmk/yomitan-jlpt-vocab
@@ -46,12 +46,19 @@ The `subminer` wrapper uses a [Bun](https://bun.sh) shebang, so `bun` must be on
 ### From Source
 ```bash
-git clone https://github.com/ksyasuda/SubMiner.git
+git clone --recurse-submodules https://github.com/ksyasuda/SubMiner.git
 cd SubMiner
 make build
 make install
 ```
 If you already cloned without submodules:
 ```bash
 cd SubMiner
 git submodule update --init --recursive
 ```
 For macOS builds, signing, and platform-specific details, see [docs/installation.md](docs/installation.md).
 ## Quick Start
@@ -3,7 +3,7 @@ id: TASK-23
 title: >-
  Add opt-in JLPT level tagging by bundling and querying local Yomitan
  dictionary
-status: To Do
+status: In Progress
 assignee: []
 created_date: '2026-02-13 16:42'
 labels: []
@@ -19,13 +19,13 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words
 ## Acceptance Criteria
 <!-- AC:BEGIN -->
- [ ] #1 Add an opt-in setting/feature flag so JLPT tagging is disabled by default and can be enabled per user/session as requested.
+- [x] #1 Add an opt-in setting/feature flag so JLPT tagging is disabled by default and can be enabled per user/session as requested.
- [ ] #2 Bundle the existing JLPT Yomitan extension package/data into the project so lookups can be performed offline from local files.
+- [x] #2 Bundle the existing JLPT Yomitan extension package/data into the project so lookups can be performed offline from local files.
- [ ] #3 Implement token-level dictionary lookup against the bundled JLPT dictionary file to determine presence and JLPT level for words in subtitle lines.
+- [x] #3 Implement token-level dictionary lookup against the bundled JLPT dictionary file to determine presence and JLPT level for words in subtitle lines.
- [ ] #4 Render a colored underline under each token determined to have a JLPT level; the underline must match token width/length and not affect layout or disrupt line rendering.
+- [x] #4 Render a colored underline under each token determined to have a JLPT level; the underline must match token width/length and not affect layout or disrupt line rendering.
- [ ] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes.
+- [x] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes.
- [ ] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior.
+- [x] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior.
- [ ] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior.
+- [x] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior.
 - [ ] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path.
 - [ ] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data.
 - [ ] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy.
@@ -34,5 +34,8 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words
 ## Definition of Done
 <!-- DOD:BEGIN -->
 - [ ] #1 Feature has a clear toggle and persistence of preference if applicable.
- [ ] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility.
+- [x] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility.
 <!-- DOD:END -->
 ## Note
 - Full performance/limits documentation and dictionary source/version/perf notes are deferred and tracked separately.
@@ -1,7 +1,7 @@
 ---
 id: TASK-23.1
 title: Implement JLPT token lookup service for subtitle words
-status: To Do
+status: In Progress
 assignee: []
 created_date: '2026-02-13 16:42'
 labels: []
@@ -18,14 +18,17 @@ Create a lookup layer that parses/queries the bundled JLPT dictionary file and r
 ## Acceptance Criteria
 <!-- AC:BEGIN -->
- [ ] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically.
+- [x] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically.
- [ ] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing.
+- [x] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing.
 - [ ] #3 Lookup path is efficient enough for frame-by-frame subtitle updates.
- [ ] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines.
+- [x] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines.
 - [ ] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics.
 <!-- AC:END -->
 ## Note
 - Full performance and malformed-format limitation documentation is deferred per request and will be handled in a separate pass if needed.
 ## Definition of Done
 <!-- DOD:BEGIN -->
- [ ] #1 Lookup service returns JLPT level with deterministic output for test fixtures.
+- [x] #1 Lookup service returns JLPT level with deterministic output for test fixtures.
 <!-- DOD:END -->
@@ -1,7 +1,7 @@
 ---
 id: TASK-23.2
 title: Bundle JLPT Yomitan dictionary assets for offline local lookup
-status: To Do
+status: In Progress
 assignee: []
 created_date: '2026-02-13 16:42'
 labels: []
@@ -18,13 +18,16 @@ Package and include the JLPT Yomitan extension dictionary assets in SubMiner so
 ## Acceptance Criteria
 <!-- AC:BEGIN -->
- [ ] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location.
+- [x] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location.
- [ ] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime.
+- [x] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime.
 - [ ] #3 Dictionary version/source is documented so future updates are explicit and reproducible.
 - [ ] #4 Dictionary bundle size and load impact are documented in task notes or project docs.
 <!-- AC:END -->
 ## Note
 - Full dictionary source/version/performance notes are intentionally deferred for now (out of scope in this pass).
 ## Definition of Done
 <!-- DOD:BEGIN -->
- [ ] #1 Dictionary data is bundled and consumable during development and packaged app runs.
+- [x] #1 Dictionary data is bundled and consumable during development and packaged app runs.
 <!-- DOD:END -->
@@ -1,7 +1,7 @@
 ---
 id: TASK-23.3
 title: Render JLPT token underlines with level-based colors in subtitle lines
-status: To Do
+status: Done
 assignee: []
 created_date: '2026-02-13 16:42'
 labels: []
@@ -18,14 +18,14 @@ Render JLPT-aware token annotations as token-length colored underlines in the su
 ## Acceptance Criteria
 <!-- AC:BEGIN -->
- [ ] #1 For each token with JLPT level, renderer draws an underline matching token width/length.
+- [x] #1 For each token with JLPT level, renderer draws an underline matching token width/length.
- [ ] #2 Underlines use distinct colors by JLPT level (e.g., N5/N4/N3/N2/N1) and mapping is consistent/documented.
+- [x] #2 Underlines use distinct colors by JLPT level (e.g., N5/N4/N3/N2/N1) and mapping is consistent/documented.
- [ ] #3 Non-tagged tokens remain visually unchanged.
+- [x] #3 Non-tagged tokens remain visually unchanged.
- [ ] #4 Rendering does not alter line height/selection behavior or break wrapping behavior.
+- [x] #4 Rendering does not alter line height/selection behavior or break wrapping behavior.
- [ ] #5 Feature degrades gracefully when level data is missing or lookup is unavailable.
+- [x] #5 Feature degrades gracefully when level data is missing or lookup is unavailable.
 <!-- AC:END -->
 ## Definition of Done
 <!-- DOD:BEGIN -->
- [ ] #1 Visual output validated for all mapped JLPT levels with no legibility/layout regressions.
+- [x] #1 Visual output validated for all mapped JLPT levels with no legibility/layout regressions.
 <!-- DOD:END -->
@@ -1,7 +1,7 @@
 ---
 id: TASK-23.4
 title: Add opt-in control and end-to-end flow + tests for JLPT tagging
-status: To Do
+status: In Progress
 assignee: []
 created_date: '2026-02-13 16:42'
 labels: []
@@ -18,12 +18,15 @@ Add user/config setting to enable JLPT tagging, wire the feature toggle through
 ## Acceptance Criteria
 <!-- AC:BEGIN -->
- [ ] #1 JLPT tagging is opt-in and defaults to disabled.
+- [x] #1 JLPT tagging is opt-in and defaults to disabled.
- [ ] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing.
+- [x] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing.
- [ ] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering.
+- [x] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering.
 - [ ] #4 Add tests covering at least one positive match, one non-match, and disabled state.
 <!-- AC:END -->
 ## Note
 - Full end-to-end + disabled-state test coverage remains pending as an explicit follow-up item.
 ## Definition of Done
 <!-- DOD:BEGIN -->
 - [ ] #1 End-to-end option behavior and opt-in state persistence are implemented and verified.
@@ -149,6 +149,7 @@
  // Primary and secondary subtitle styling.
  // ==========================================
  "subtitleStyle": {
    "enableJlpt": false,
    "fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
    "fontSize": 35,
    "fontColor": "#cad3f5",
@@ -157,6 +158,13 @@
    "backgroundColor": "rgba(54, 58, 79, 0.5)",
    "nPlusOneColor": "#c6a0f6",
    "knownWordColor": "#a6da95",
    "jlptColors": {
      "N1": "#ed8796",
      "N2": "#f5a97f",
      "N3": "#f9e2af",
      "N4": "#a6e3a1",
      "N5": "#8aadf4"
    },
    "secondary": {
      "fontSize": 24,
      "fontColor": "#ffffff",
@@ -552,12 +552,26 @@ See `config.example.jsonc` for detailed configuration options.
 | `fontWeight`      | string      | CSS font-weight, e.g. `"bold"`, `"normal"`, `"600"` (default: `"normal"`)     |
 | `fontStyle`       | string      | `"normal"` or `"italic"` (default: `"normal"`)                                |
 | `backgroundColor` | string      | Any CSS color, including `"transparent"` (default: `"rgba(54, 58, 79, 0.5)"`) |
 | `enableJlpt`      | boolean     | Enable JLPT level underline styling (`false` by default)                        |
 | `nPlusOneColor`   | string      | Existing n+1 highlight color (default: `#c6a0f6`)                            |
 | `knownWordColor`  | string      | Existing known-word highlight color (default: `#a6da95`)                       |
 | `jlptColors`      | object      | JLPT level underline colors object (`N1`..`N5`)                               |
 | `secondary`       | object      | Override any of the above for secondary subtitles (optional)                  |
 Secondary subtitle defaults: `fontSize: 24`, `fontColor: "#ffffff"`, `backgroundColor: "transparent"`. Any property not set in `secondary` falls back to the CSS defaults.
 **See `config.example.jsonc`** for the complete list of subtitle style configuration options.
 `jlptColors` keys are:
 | Key  | Default   | Description                              |
 | ---- | --------- | ---------------------------------------- |
 | `N1` | `#ed8796` | JLPT N1 underline color                  |
 | `N2` | `#f5a97f` | JLPT N2 underline color                  |
 | `N3` | `#f9e2af` | JLPT N3 underline color                  |
 | `N4` | `#a6e3a1` | JLPT N4 underline color                  |
 | `N5` | `#8aadf4` | JLPT N5 underline color                  |
 ### Texthooker
 Control whether the browser opens automatically when texthooker starts:
@@ -149,6 +149,7 @@
  // Primary and secondary subtitle styling.
  // ==========================================
  "subtitleStyle": {
    "enableJlpt": false,
    "fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
    "fontSize": 35,
    "fontColor": "#cad3f5",
@@ -157,6 +158,13 @@
    "backgroundColor": "rgba(54, 58, 79, 0.5)",
    "nPlusOneColor": "#c6a0f6",
    "knownWordColor": "#a6da95",
    "jlptColors": {
      "N1": "#ed8796",
      "N2": "#f5a97f",
      "N3": "#f9e2af",
      "N4": "#a6e3a1",
      "N5": "#8aadf4"
    },
    "secondary": {
      "fontSize": 24,
      "fontColor": "#ffffff",
@@ -97,6 +97,10 @@
        "from": "vendor/yomitan",
        "to": "yomitan"
      },
      {
        "from": "vendor/yomitan-jlpt-vocab",
        "to": "yomitan-jlpt-vocab"
      },
      {
        "from": "assets",
        "to": "assets"
@@ -174,6 +174,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
    ffmpeg_path: "",
  },
  subtitleStyle: {
    enableJlpt: false,
    fontFamily:
      "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
    fontSize: 35,
@@ -183,6 +184,13 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
    backgroundColor: "rgba(54, 58, 79, 0.5)",
    nPlusOneColor: "#c6a0f6",
    knownWordColor: "#a6da95",
    jlptColors: {
      N1: "#ed8796",
      N2: "#f5a97f",
      N3: "#f9e2af",
      N4: "#a6e3a1",
      N5: "#8aadf4",
    },
    secondary: {
      fontSize: 24,
      fontColor: "#ffffff",
@@ -280,6 +288,13 @@ export const CONFIG_OPTION_REGISTRY: ConfigOptionRegistryEntry[] = [
    defaultValue: DEFAULT_CONFIG.websocket.port,
    description: "Built-in subtitle websocket server port.",
  },
  {
    path: "subtitleStyle.enableJlpt",
    kind: "boolean",
    defaultValue: DEFAULT_CONFIG.subtitleStyle.enableJlpt,
    description: "Enable JLPT vocabulary level underlines. "
      + "When disabled, JLPT tagging lookup and underlines are skipped.",
  },
  {
    path: "ankiConnect.enabled",
    kind: "boolean",
@@ -442,6 +442,18 @@ export class ConfigService {
            : {}),
        },
      };
      const enableJlpt = asBoolean((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt);
      if (enableJlpt !== undefined) {
        resolved.subtitleStyle.enableJlpt = enableJlpt;
      } else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
        warn(
          "subtitleStyle.enableJlpt",
          (src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
          resolved.subtitleStyle.enableJlpt,
          "Expected boolean.",
        );
      }
    }
    if (isObject(src.ankiConnect)) {
@@ -37,6 +37,7 @@ export {
 } from "./runtime-config-service";
 export { openYomitanSettingsWindow } from "./yomitan-settings-service";
 export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
 export { createJlptVocabularyLookupService } from "./jlpt-vocab-service";
 export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
 export {
  getJimakuLanguagePreferenceService,
@@ -0,0 +1,29 @@
 // Token-level lexical terms excluded from JLPT highlighting.
 // These are not tied to POS and act as a safety layer for non-dictionary cases.
 export const JLPT_EXCLUDED_TERMS = new Set([
  "この",
  "その",
  "あの",
  "どの",
  "これ",
  "それ",
  "あれ",
  "どれ",
  "ここ",
  "そこ",
  "あそこ",
  "どこ",
  "こと",
  "ああ",
  "ええ",
  "うう",
  "おお",
  "はは",
  "へえ",
  "ふう",
  "ほう",
 ]);
 export function shouldIgnoreJlptByTerm(term: string): boolean {
  return JLPT_EXCLUDED_TERMS.has(term);
 }
@@ -0,0 +1,45 @@
 // MeCab POS1 categories that should be excluded from JLPT-level token tagging.
 // These are filtered out because they are typically functional or non-lexical words.
 export type JlptIgnoredPos1Entry = {
  pos1: string;
  reason: string;
 };
 export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
  {
    pos1: "助詞",
    reason: "Particles (ko/kara/nagara etc.): mostly grammatical glue, not independent vocabulary.",
  },
  {
    pos1: "助動詞",
    reason: "Auxiliary verbs (past tense, politeness, modality): grammar helpers.",
  },
  {
    pos1: "記号",
    reason: "Symbols/punctuation and symbols-like tokens.",
  },
  {
    pos1: "補助記号",
    reason: "Auxiliary symbols (e.g. bracket-like or markup tokens).",
  },
  {
    pos1: "連体詞",
    reason: "Adnominal forms (e.g. demonstratives like \"この\").",
  },
  {
    pos1: "感動詞",
    reason: "Interjections/onomatopoeia-style exclamations.",
  },
  {
    pos1: "接続詞",
    reason: "Conjunctions that connect clauses, usually not target vocab items.",
  },
  {
    pos1: "接頭詞",
    reason: "Prefixes/prefix-like grammatical elements.",
  },
 ] as const satisfies readonly JlptIgnoredPos1Entry[];
 export const JLPT_IGNORED_MECAB_POS1 = JLPT_IGNORED_MECAB_POS1_ENTRIES.map(
  (entry) => entry.pos1,
 );
@@ -0,0 +1,23 @@
 import {
  JlptIgnoredPos1Entry,
  JLPT_IGNORED_MECAB_POS1,
  JLPT_IGNORED_MECAB_POS1_ENTRIES,
 } from "./jlpt-ignored-mecab-pos1";
 export { JLPT_IGNORED_MECAB_POS1_ENTRIES, JlptIgnoredPos1Entry };
 // Data-driven MeCab POS names (pos1) used for JLPT filtering.
 export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
  JLPT_IGNORED_MECAB_POS1;
 const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
  JLPT_IGNORED_MECAB_POS1_LIST,
 );
 export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
  return JLPT_IGNORED_MECAB_POS1_ENTRIES;
 }
 export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
  return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
 }
@@ -0,0 +1,168 @@
 import * as fs from "fs";
 import * as path from "path";
 import type { JlptLevel } from "../../types";
 export interface JlptVocabLookupOptions {
  searchPaths: string[];
  log: (message: string) => void;
 }
 const JLPT_BANK_FILES: { level: JlptLevel; filename: string }[] = [
  { level: "N1", filename: "term_meta_bank_1.json" },
  { level: "N2", filename: "term_meta_bank_2.json" },
  { level: "N3", filename: "term_meta_bank_3.json" },
  { level: "N4", filename: "term_meta_bank_4.json" },
  { level: "N5", filename: "term_meta_bank_5.json" },
 ];
 const JLPT_LEVEL_PRECEDENCE: Record<JlptLevel, number> = {
  N1: 5,
  N2: 4,
  N3: 3,
  N4: 2,
  N5: 1,
 };
 const NOOP_LOOKUP = (): null => null;
 function normalizeJlptTerm(value: string): string {
  return value.trim();
 }
 function hasFrequencyDisplayValue(meta: unknown): boolean {
  if (!meta || typeof meta !== "object") return false;
  const frequency = (meta as { frequency?: unknown }).frequency;
  if (!frequency || typeof frequency !== "object") return false;
  return Object.prototype.hasOwnProperty.call(
    frequency as Record<string, unknown>,
    "displayValue",
  );
 }
 function addEntriesToMap(
  rawEntries: unknown,
  level: JlptLevel,
  terms: Map<string, JlptLevel>,
  log: (message: string) => void,
 ): void {
  const shouldUpdateLevel = (
    existingLevel: JlptLevel | undefined,
    incomingLevel: JlptLevel,
  ): boolean =>
    existingLevel === undefined ||
    JLPT_LEVEL_PRECEDENCE[incomingLevel] >
      JLPT_LEVEL_PRECEDENCE[existingLevel];
  if (!Array.isArray(rawEntries)) {
    return;
  }
  for (const rawEntry of rawEntries) {
    if (!Array.isArray(rawEntry)) {
      continue;
    }
    const [term, _entryId, meta] = rawEntry as [unknown, unknown, unknown];
    if (typeof term !== "string") {
      continue;
    }
    const normalizedTerm = normalizeJlptTerm(term);
    if (!normalizedTerm) {
      continue;
    }
    if (!hasFrequencyDisplayValue(meta)) {
      continue;
    }
    const existingLevel = terms.get(normalizedTerm);
    if (shouldUpdateLevel(existingLevel, level)) {
      terms.set(normalizedTerm, level);
      continue;
    }
    log(
      `JLPT dictionary already has ${normalizedTerm} as ${existingLevel}; keeping that level instead of ${level}`,
    );
  }
 }
 function collectDictionaryFromPath(
  dictionaryPath: string,
  log: (message: string) => void,
 ): Map<string, JlptLevel> {
  const terms = new Map<string, JlptLevel>();
  for (const bank of JLPT_BANK_FILES) {
    const bankPath = path.join(dictionaryPath, bank.filename);
    if (!fs.existsSync(bankPath)) {
      continue;
    }
    let rawText: string;
    try {
      rawText = fs.readFileSync(bankPath, "utf-8");
    } catch {
      continue;
    }
    let rawEntries: unknown;
    try {
      rawEntries = JSON.parse(rawText) as unknown;
    } catch {
      continue;
    }
    addEntriesToMap(rawEntries, bank.level, terms, log);
  }
  return terms;
 }
 export async function createJlptVocabularyLookupService(
  options: JlptVocabLookupOptions,
 ): Promise<(term: string) => JlptLevel | null> {
  const attemptedPaths: string[] = [];
  let foundDirectoryCount = 0;
  let foundBankCount = 0;
  for (const dictionaryPath of options.searchPaths) {
    attemptedPaths.push(dictionaryPath);
    if (!fs.existsSync(dictionaryPath)) {
      continue;
    }
    if (!fs.statSync(dictionaryPath).isDirectory()) {
      continue;
    }
    foundDirectoryCount += 1;
    const terms = collectDictionaryFromPath(dictionaryPath, options.log);
    if (terms.size > 0) {
      foundBankCount += 1;
      options.log(
        `JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`,
      );
      return (term: string): JlptLevel | null => {
        if (!term) return null;
        const normalized = normalizeJlptTerm(term);
        return normalized ? terms.get(normalized) ?? null : null;
      };
    }
    options.log(
      `JLPT dictionary directory exists but contains no readable term_meta_bank_*.json files: ${dictionaryPath}`,
    );
  }
  options.log(
    `JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`,
  );
  if (foundDirectoryCount > 0 && foundBankCount === 0) {
    options.log(
      "JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.",
    );
  }
  return NOOP_LOOKUP;
 }
@@ -92,6 +92,7 @@ export async function runAppReadyRuntimeService(
 ): Promise<void> {
  deps.loadSubtitlePosition();
  deps.resolveKeybindings();
  await deps.createMecabTokenizerAndCheck();
  deps.createMpvClient();
  deps.reloadConfig();
@@ -117,7 +118,6 @@ export async function runAppReadyRuntimeService(
    deps.log("mpv_websocket detected, skipping built-in WebSocket server");
  }
  await deps.createMecabTokenizerAndCheck();
  deps.createSubtitleTimingTracker();
  await deps.loadYomitanExtension();
@@ -21,6 +21,7 @@ function makeDeps(
    setYomitanParserInitPromise: () => {},
    isKnownWord: () => false,
    getKnownWordMatchMode: () => "headword",
    getJlptLevel: () => null,
    tokenizeWithMecab: async () => null,
    ...overrides,
  };
@@ -43,10 +44,171 @@ function makeDepsFromMecabTokenizer(
    getMecabTokenizer: () => ({
      tokenize,
    }),
    getJlptLevel: () => null,
    ...overrides,
  });
 }
 test("tokenizeSubtitleService assigns JLPT level to parsed Yomitan tokens", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "猫",
                    reading: "ねこ",
                    headwords: [[{ term: "猫" }]],
                  },
                  {
                    text: "です",
                    reading: "です",
                    headwords: [[{ term: "です" }]],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      tokenizeWithMecab: async () => null,
      getJlptLevel: (text) => (text === "猫" ? "N5" : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.jlptLevel, "N5");
 });
 test("tokenizeSubtitleService skips JLPT level for excluded demonstratives", async () => {
  const result = await tokenizeSubtitleService(
    "この",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "この",
                    reading: "この",
                    headwords: [[{ term: "この" }]],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      tokenizeWithMecab: async () => null,
      getJlptLevel: (text) => (text === "この" ? "N5" : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
 });
 test("tokenizeSubtitleService skips JLPT level for repeated kana SFX", async () => {
  const result = await tokenizeSubtitleService(
    "ああ",
    makeDeps({
      getYomitanExt: () => ({ id: "dummy-ext" } as any),
      getYomitanParserWindow: () => ({
        isDestroyed: () => false,
        webContents: {
          executeJavaScript: async () => [
            {
              source: "scanning-parser",
              index: 0,
              content: [
                [
                  {
                    text: "ああ",
                    reading: "ああ",
                    headwords: [[{ term: "ああ" }]],
                  },
                ],
              ],
            },
          ],
        },
      } as unknown as Electron.BrowserWindow),
      tokenizeWithMecab: async () => null,
      getJlptLevel: (text) => (text === "ああ" ? "N5" : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
 });
 test("tokenizeSubtitleService assigns JLPT level to mecab tokens", async () => {
  const result = await tokenizeSubtitleService(
    "猫です",
    makeDepsFromMecabTokenizer(async () => [
      {
        word: "猫",
        partOfSpeech: PartOfSpeech.noun,
        pos1: "",
        pos2: "",
        pos3: "",
        pos4: "",
        inflectionType: "",
        inflectionForm: "",
        headword: "猫",
        katakanaReading: "ネコ",
        pronunciation: "ネコ",
      },
    ], {
      getJlptLevel: (text) => (text === "猫" ? "N4" : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.jlptLevel, "N4");
 });
 test("tokenizeSubtitleService skips JLPT level for mecab tokens marked as ineligible", async () => {
  const result = await tokenizeSubtitleService(
    "は",
    makeDepsFromMecabTokenizer(async () => [
      {
        word: "は",
        partOfSpeech: PartOfSpeech.particle,
        pos1: "助詞",
        pos2: "",
        pos3: "",
        pos4: "",
        inflectionType: "",
        inflectionForm: "",
        headword: "は",
        katakanaReading: "ハ",
        pronunciation: "ハ",
      },
    ], {
      getJlptLevel: (text) => (text === "は" ? "N5" : null),
    }),
  );
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.pos1, "助詞");
  assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
 });
 test("tokenizeSubtitleService returns null tokens for empty normalized text", async () => {
  const result = await tokenizeSubtitleService(" \\n  ", makeDeps());
  assert.deepEqual(result, { text: " \\n  ", tokens: null });
@@ -1,20 +1,23 @@
 import { BrowserWindow, Extension, session } from "electron";
 import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
 import {
  JlptLevel,
  MergedToken,
  NPlusOneMatchMode,
  PartOfSpeech,
  SubtitleData,
  Token,
 } from "../../types";
 import { shouldIgnoreJlptForMecabPos1 } from "./jlpt-token-filter-config";
 import { shouldIgnoreJlptByTerm } from "./jlpt-excluded-terms";
 interface YomitanParseHeadword {
  term?: unknown;
 }
 interface YomitanParseSegment {
-  text?: unknown;
+  text?: string;
-  reading?: unknown;
+  reading?: string;
  headwords?: unknown;
 }
@@ -24,6 +27,20 @@ interface YomitanParseResultItem {
  content?: unknown;
 }
 type YomitanParseLine = YomitanParseSegment[];
 const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
 const KATAKANA_CODEPOINT_START = 0x30a1;
 const KATAKANA_CODEPOINT_END = 0x30f6;
 function isObject(value: unknown): value is Record<string, unknown> {
  return Boolean(value && typeof value === "object");
 }
 function isString(value: unknown): value is string {
  return typeof value === "string";
 }
 export interface TokenizerServiceDeps {
  getYomitanExt: () => Extension | null;
  getYomitanParserWindow: () => BrowserWindow | null;
@@ -34,6 +51,8 @@ export interface TokenizerServiceDeps {
  setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
  isKnownWord: (text: string) => boolean;
  getKnownWordMatchMode: () => NPlusOneMatchMode;
  getJlptLevel: (text: string) => JlptLevel | null;
  getJlptEnabled?: () => boolean;
  tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
 }
@@ -51,6 +70,8 @@ export interface TokenizerDepsRuntimeOptions {
  setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
  isKnownWord: (text: string) => boolean;
  getKnownWordMatchMode: () => NPlusOneMatchMode;
  getJlptLevel: (text: string) => JlptLevel | null;
  getJlptEnabled?: () => boolean;
  getMecabTokenizer: () => MecabTokenizerLike | null;
 }
@@ -67,6 +88,8 @@ export function createTokenizerDepsRuntimeService(
    setYomitanParserInitPromise: options.setYomitanParserInitPromise,
    isKnownWord: options.isKnownWord,
    getKnownWordMatchMode: options.getKnownWordMatchMode,
    getJlptLevel: options.getJlptLevel,
    getJlptEnabled: options.getJlptEnabled,
    tokenizeWithMecab: async (text) => {
      const mecabTokenizer = options.getMecabTokenizer();
      if (!mecabTokenizer) {
@@ -112,19 +135,205 @@ function applyKnownWordMarking(
  });
 }
 function resolveJlptLookupText(token: MergedToken): string {
  if (token.headword && token.headword.length > 0) {
    return token.headword;
  }
  if (token.reading && token.reading.length > 0) {
    return token.reading;
  }
  return token.surface;
 }
 function normalizeJlptTextForExclusion(text: string): string {
  const raw = text.trim();
  if (!raw) {
    return "";
  }
  let normalized = "";
  for (const char of raw) {
    const code = char.codePointAt(0);
    if (code === undefined) {
      continue;
    }
    if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
      normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
      continue;
    }
    normalized += char;
  }
  return normalized;
 }
 function isKanaChar(char: string): boolean {
  const code = char.codePointAt(0);
  if (code === undefined) {
    return false;
  }
  return (
    (code >= 0x3041 && code <= 0x3096) ||
    (code >= 0x309b && code <= 0x309f) ||
    (code >= 0x30a0 && code <= 0x30fa) ||
    (code >= 0x30fd && code <= 0x30ff)
  );
 }
 /**
 * Detects repeated-kana speech-like tokens (e.g. 「ああああ」, 「ははは」, 「うーん」 style patterns)
 * so they are not JLPT-labeled when they are mostly expressive particles/sfx.
 */
 function isRepeatedKanaSfx(text: string): boolean {
  const normalized = text.trim();
  if (!normalized) {
    return false;
  }
  const chars = [...normalized];
  if (!chars.every(isKanaChar)) {
    return false;
  }
  const counts = new Map<string, number>();
  let hasAdjacentRepeat = false;
  for (let i = 0; i < chars.length; i += 1) {
    const char = chars[i];
    counts.set(char, (counts.get(char) ?? 0) + 1);
    if (i > 0 && chars[i] === chars[i - 1]) {
      hasAdjacentRepeat = true;
    }
  }
  const topCount = Math.max(...counts.values());
  if (chars.length <= 2) {
    return hasAdjacentRepeat || topCount >= 2;
  }
  if (hasAdjacentRepeat) {
    return true;
  }
  return topCount >= Math.ceil(chars.length / 2);
 }
 function isJlptEligibleToken(token: MergedToken): boolean {
  if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) return false;
  const candidates = [
    resolveJlptLookupText(token),
    token.surface,
    token.reading,
    token.headword,
  ].filter((candidate): candidate is string => typeof candidate === "string" && candidate.length > 0);
  for (const candidate of candidates) {
    const normalizedCandidate = normalizeJlptTextForExclusion(candidate);
    if (!normalizedCandidate) {
      continue;
    }
    const trimmedCandidate = candidate.trim();
    if (
      shouldIgnoreJlptByTerm(trimmedCandidate) ||
      shouldIgnoreJlptByTerm(normalizedCandidate)
    ) {
      return false;
    }
    if (
      isRepeatedKanaSfx(candidate) ||
      isRepeatedKanaSfx(normalizedCandidate)
    ) {
      return false;
    }
  }
  return true;
 }
 function isYomitanParseResultItem(
  value: unknown,
 ): value is YomitanParseResultItem {
  if (!isObject(value)) {
    return false;
  }
  if ((value as YomitanParseResultItem).source !== "scanning-parser") {
    return false;
  }
  if (!Array.isArray((value as YomitanParseResultItem).content)) {
    return false;
  }
  return true;
 }
 function isYomitanParseLine(value: unknown): value is YomitanParseLine {
  if (!Array.isArray(value)) {
    return false;
  }
  return value.every((segment) => {
    if (!isObject(segment)) {
      return false;
    }
    const candidate = segment as YomitanParseSegment;
    return isString(candidate.text);
  });
 }
 function isYomitanHeadwordRows(value: unknown): value is YomitanParseHeadword[][] {
  return (
    Array.isArray(value) &&
    value.every(
      (group) =>
        Array.isArray(group) &&
        group.every((item) =>
          isObject(item) && isString((item as YomitanParseHeadword).term),
        ),
    )
  );
 }
 function extractYomitanHeadword(segment: YomitanParseSegment): string {
  const headwords = segment.headwords;
-  if (!Array.isArray(headwords) || headwords.length === 0) {
+  if (!isYomitanHeadwordRows(headwords)) {
    return "";
  }
-  const firstGroup = headwords[0];
+  for (const group of headwords) {
-  if (!Array.isArray(firstGroup) || firstGroup.length === 0) {
+    if (group.length > 0) {
-    return "";
+      const firstHeadword = group[0] as YomitanParseHeadword;
      if (isString(firstHeadword?.term)) {
        return firstHeadword.term;
      }
    }
  }
-  const firstHeadword = firstGroup[0] as YomitanParseHeadword;
+  return "";
-  return typeof firstHeadword?.term === "string" ? firstHeadword.term : "";
+}
 function applyJlptMarking(
  tokens: MergedToken[],
  getJlptLevel: (text: string) => JlptLevel | null,
 ): MergedToken[] {
  return tokens.map((token) => {
    if (!isJlptEligibleToken(token)) {
      return { ...token, jlptLevel: undefined };
    }
    const primaryLevel = getJlptLevel(resolveJlptLookupText(token));
    const fallbackLevel = getJlptLevel(token.surface);
  return {
    ...token,
    jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
  };
  });
 }
 function mapYomitanParseResultsToMergedTokens(
@@ -136,14 +345,9 @@ function mapYomitanParseResultsToMergedTokens(
    return null;
  }
-  const scanningItems = parseResults.filter((item) => {
+  const scanningItems = parseResults.filter(
-    const resultItem = item as YomitanParseResultItem;
+    (item): item is YomitanParseResultItem => isYomitanParseResultItem(item),
-    return (
+  );
      resultItem &&
      resultItem.source === "scanning-parser" &&
      Array.isArray(resultItem.content)
    );
  }) as YomitanParseResultItem[];
  if (scanningItems.length === 0) {
    return null;
@@ -158,24 +362,21 @@ function mapYomitanParseResultsToMergedTokens(
  const tokens: MergedToken[] = [];
  let charOffset = 0;
  let validLineCount = 0;
  for (const line of content) {
-    if (!Array.isArray(line)) {
+    if (!isYomitanParseLine(line)) {
      continue;
    }
    validLineCount += 1;
    let surface = "";
    let reading = "";
    let headword = "";
-    for (const rawSegment of line) {
+    for (const segment of line) {
      const segment = rawSegment as YomitanParseSegment;
      if (!segment || typeof segment !== "object") {
        continue;
      }
      const segmentText = segment.text;
-      if (typeof segmentText !== "string" || segmentText.length === 0) {
+      if (!segmentText || segmentText.length === 0) {
        continue;
      }
@@ -205,6 +406,7 @@ function mapYomitanParseResultsToMergedTokens(
      startPos: start,
      endPos: end,
      partOfSpeech: PartOfSpeech.other,
      pos1: "",
      isMerged: true,
      isNPlusOneTarget: false,
      isKnown: (() => {
@@ -218,9 +420,108 @@ function mapYomitanParseResultsToMergedTokens(
    });
  }
  if (validLineCount === 0) {
    return null;
  }
  return tokens.length > 0 ? tokens : null;
 }
 function pickClosestMecabPos1(
  token: MergedToken,
  mecabTokens: MergedToken[],
 ): string | undefined {
  if (mecabTokens.length === 0) {
    return undefined;
  }
  const tokenStart = token.startPos ?? 0;
  const tokenEnd = token.endPos ?? tokenStart + token.surface.length;
  let bestPos1: string | undefined;
  let bestOverlap = 0;
  let bestSpan = 0;
  let bestStart = Number.MAX_SAFE_INTEGER;
  for (const mecabToken of mecabTokens) {
    if (!mecabToken.pos1) {
      continue;
    }
    const mecabStart = mecabToken.startPos ?? 0;
    const mecabEnd = mecabToken.endPos ?? mecabStart + mecabToken.surface.length;
    const overlapStart = Math.max(tokenStart, mecabStart);
    const overlapEnd = Math.min(tokenEnd, mecabEnd);
    const overlap = Math.max(0, overlapEnd - overlapStart);
    if (overlap === 0) {
      continue;
    }
    const span = mecabEnd - mecabStart;
    if (
      overlap > bestOverlap ||
      (overlap === bestOverlap &&
        (span > bestSpan ||
          (span === bestSpan && mecabStart < bestStart)))
    ) {
      bestOverlap = overlap;
      bestSpan = span;
      bestStart = mecabStart;
      bestPos1 = mecabToken.pos1;
    }
  }
  return bestOverlap > 0 ? bestPos1 : undefined;
 }
 async function enrichYomitanPos1(
  tokens: MergedToken[],
  deps: TokenizerServiceDeps,
  text: string,
 ): Promise<MergedToken[]> {
  if (!tokens || tokens.length === 0) {
    return tokens;
  }
  let mecabTokens: MergedToken[] | null = null;
  try {
    mecabTokens = await deps.tokenizeWithMecab(text);
  } catch (err) {
    const error = err as Error;
    console.warn(
      "Failed to enrich Yomitan tokens with MeCab POS:",
      error.message,
      `tokenCount=${tokens.length}`,
      `textLength=${text.length}`,
    );
    return tokens;
  }
  if (!mecabTokens || mecabTokens.length === 0) {
    console.warn(
      "MeCab enrichment returned no tokens; preserving Yomitan token output.",
      `tokenCount=${tokens.length}`,
      `textLength=${text.length}`,
    );
    return tokens;
  }
  return tokens.map((token) => {
    if (token.pos1) {
      return token;
    }
    const pos1 = pickClosestMecabPos1(token, mecabTokens);
    if (!pos1) {
      return token;
    }
    return {
      ...token,
      pos1,
    };
  });
 }
 async function ensureYomitanParserWindow(
  deps: TokenizerServiceDeps,
 ): Promise<boolean> {
@@ -356,11 +657,16 @@ async function parseWithYomitanInternalParser(
      script,
      true,
    );
-    return mapYomitanParseResultsToMergedTokens(
+  const yomitanTokens = mapYomitanParseResultsToMergedTokens(
      parseResults,
      deps.isKnownWord,
      deps.getKnownWordMatchMode(),
    );
    if (!yomitanTokens || yomitanTokens.length === 0) {
      return null;
    }
    return enrichYomitanPos1(yomitanTokens, deps, text);
  } catch (err) {
    console.error("Yomitan parser request failed:", (err as Error).message);
    return null;
@@ -385,6 +691,7 @@ export async function tokenizeSubtitleService(
    .replace(/\n/g, " ")
    .replace(/\s+/g, " ")
    .trim();
  const jlptEnabled = deps.getJlptEnabled?.() !== false;
  const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
  if (yomitanTokens && yomitanTokens.length > 0) {
@@ -393,7 +700,10 @@ export async function tokenizeSubtitleService(
      deps.isKnownWord,
      deps.getKnownWordMatchMode(),
    );
-    return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
+    const jlptMarkedTokens = jlptEnabled
      ? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
      : knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
    return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
  }
  try {
@@ -404,7 +714,10 @@ export async function tokenizeSubtitleService(
        deps.isKnownWord,
        deps.getKnownWordMatchMode(),
      );
-      return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
+      const jlptMarkedTokens = jlptEnabled
        ? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
        : knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
      return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
    }
  } catch (err) {
    console.error("Tokenization error:", (err as Error).message);
@@ -59,6 +59,7 @@ export async function loadYomitanExtensionService(
  deps: YomitanExtensionLoaderDeps,
 ): Promise<Extension | null> {
  const searchPaths = [
    path.join(__dirname, "..", "..", "vendor", "yomitan"),
    path.join(__dirname, "..", "..", "..", "vendor", "yomitan"),
    path.join(process.resourcesPath, "yomitan"),
    "/usr/share/SubMiner/yomitan",
@@ -95,6 +95,7 @@ import {
  createOverlayContentMeasurementStoreService,
  createOverlayWindowService,
  createTokenizerDepsRuntimeService,
  createJlptVocabularyLookupService,
  cycleSecondarySubModeService,
  enforceOverlayLayerOrderService,
  ensureOverlayWindowLevelService,
@@ -227,6 +228,8 @@ const isDev =
  process.argv.includes("--dev") || process.argv.includes("--debug");
 const texthookerService = new TexthookerService();
 const subtitleWsService = new SubtitleWebSocketService();
 let jlptDictionaryLookupInitialized = false;
 let jlptDictionaryLookupInitialization: Promise<void> | null = null;
 const appLogger = {
  logInfo: (message: string) => {
    console.log(message);
@@ -464,6 +467,73 @@ function loadSubtitlePosition(): SubtitlePosition | null {
  return appState.subtitlePosition;
 }
 function getJlptDictionarySearchPaths(): string[] {
  const homeDir = os.homedir();
  const dictionaryRoots = [
    // Development/runtime source trees where the repo is checked out.
    path.join(__dirname, "..", "..", "vendor", "yomitan-jlpt-vocab"),
    path.join(app.getAppPath(), "vendor", "yomitan-jlpt-vocab"),
    // Packaged app resources (Electron build output layout).
    path.join(process.resourcesPath, "yomitan-jlpt-vocab"),
    path.join(process.resourcesPath, "app.asar", "vendor", "yomitan-jlpt-vocab"),
    // User override/config directories for manually installed dictionaries.
    USER_DATA_PATH,
    app.getPath("userData"),
    path.join(homeDir, ".config", "SubMiner"),
    path.join(homeDir, ".config", "subminer"),
    path.join(homeDir, "Library", "Application Support", "SubMiner"),
    path.join(homeDir, "Library", "Application Support", "subminer"),
    // Last-resort fallback: current working directory (local CLI/test runs).
    process.cwd(),
  ];
  const searchPaths: string[] = [];
  for (const dictionaryRoot of dictionaryRoots) {
    searchPaths.push(dictionaryRoot);
    searchPaths.push(path.join(dictionaryRoot, "vendor", "yomitan-jlpt-vocab"));
    searchPaths.push(path.join(dictionaryRoot, "yomitan-jlpt-vocab"));
  }
  const uniquePaths = new Set<string>();
  for (const searchPath of searchPaths) {
    uniquePaths.add(searchPath);
  }
  return [...uniquePaths];
 }
 async function initializeJlptDictionaryLookup(): Promise<void> {
  appState.jlptLevelLookup = await createJlptVocabularyLookupService({
    searchPaths: getJlptDictionarySearchPaths(),
    log: (message) => {
      console.log(`[JLPT] ${message}`);
    },
  });
 }
 async function ensureJlptDictionaryLookup(): Promise<void> {
  if (!getResolvedConfig().subtitleStyle.enableJlpt) {
    return;
  }
  if (jlptDictionaryLookupInitialized) {
    return;
  }
  if (!jlptDictionaryLookupInitialization) {
    jlptDictionaryLookupInitialization = initializeJlptDictionaryLookup()
      .then(() => {
        jlptDictionaryLookupInitialized = true;
      })
      .catch((error) => {
        jlptDictionaryLookupInitialization = null;
        throw error;
      });
  }
  await jlptDictionaryLookupInitialization;
 }
 function saveSubtitlePosition(position: SubtitlePosition): void {
  appState.subtitlePosition = position;
  saveSubtitlePositionService({
@@ -804,6 +874,7 @@ function updateMpvSubtitleRenderMetrics(
 }
 async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
  await ensureJlptDictionaryLookup();
  return tokenizeSubtitleService(
    text,
    createTokenizerDepsRuntimeService({
@@ -825,6 +896,9 @@ async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
      getKnownWordMatchMode: () =>
        appState.ankiIntegration?.getKnownWordMatchMode() ??
        getResolvedConfig().ankiConnect.nPlusOne.matchMode,
      getJlptLevel: (text) => appState.jlptLevelLookup(text),
      getJlptEnabled: () =>
        getResolvedConfig().subtitleStyle.enableJlpt,
      getMecabTokenizer: () => appState.mecabTokenizer,
    }),
  );
@@ -1345,6 +1419,7 @@ registerIpcRuntimeServices({
          ...resolvedConfig.subtitleStyle,
          nPlusOneColor: resolvedConfig.ankiConnect.nPlusOne.nPlusOne,
          knownWordColor: resolvedConfig.ankiConnect.nPlusOne.knownWord,
          enableJlpt: resolvedConfig.subtitleStyle.enableJlpt,
        };
      },
    saveSubtitlePosition: (position: unknown) =>
@@ -6,6 +6,7 @@ import type {
  SecondarySubMode,
  SubtitlePosition,
  KikuFieldGroupingChoice,
  JlptLevel,
 } from "../types";
 import type { CliArgs } from "../cli/args";
 import type { SubtitleTimingTracker } from "../subtitle-timing-tracker";
@@ -53,6 +54,7 @@ export interface AppState {
  backendOverride: string | null;
  autoStartOverlay: boolean;
  texthookerOnlyMode: boolean;
  jlptLevelLookup: (term: string) => JlptLevel | null;
 }
 export interface AppStateInitialValues {
@@ -112,6 +114,7 @@ export function createAppState(values: AppStateInitialValues): AppState {
    backendOverride: values.backendOverride ?? null,
    autoStartOverlay: values.autoStartOverlay ?? false,
    texthookerOnlyMode: values.texthookerOnlyMode ?? false,
    jlptLevelLookup: () => null,
  };
 }
@@ -71,6 +71,11 @@ export type RendererState = {
  knownWordColor: string;
  nPlusOneColor: string;
  jlptN1Color: string;
  jlptN2Color: string;
  jlptN3Color: string;
  jlptN4Color: string;
  jlptN5Color: string;
  keybindingsMap: Map<string, (string | number)[]>;
  chordPending: boolean;
@@ -130,6 +135,11 @@ export function createRendererState(): RendererState {
    knownWordColor: "#a6da95",
    nPlusOneColor: "#c6a0f6",
    jlptN1Color: "#ed8796",
    jlptN2Color: "#f5a97f",
    jlptN3Color: "#f9e2af",
    jlptN4Color: "#a6e3a1",
    jlptN5Color: "#8aadf4",
    keybindingsMap: new Map(),
    chordPending: false,
@@ -250,6 +250,11 @@ body {
  color: #cad3f5;
  --subtitle-known-word-color: #a6da95;
  --subtitle-n-plus-one-color: #c6a0f6;
  --subtitle-jlpt-n1-color: #ed8796;
  --subtitle-jlpt-n2-color: #f5a97f;
  --subtitle-jlpt-n3-color: #f9e2af;
  --subtitle-jlpt-n4-color: #a6e3a1;
  --subtitle-jlpt-n5-color: #8aadf4;
  text-shadow:
    2px 2px 4px rgba(0, 0, 0, 0.8),
    -1px -1px 2px rgba(0, 0, 0, 0.5);
@@ -296,6 +301,51 @@ body.settings-modal-open #subtitleContainer {
  text-shadow: 0 0 6px rgba(198, 160, 246, 0.35);
 }
 #subtitleRoot .word.word-jlpt-n1 {
  color: inherit;
  text-decoration-line: underline;
  text-decoration-thickness: 2px;
  text-underline-offset: 4px;
  text-decoration-color: var(--subtitle-jlpt-n1-color, #ed8796);
  text-decoration-style: solid;
 }
 #subtitleRoot .word.word-jlpt-n2 {
  color: inherit;
  text-decoration-line: underline;
  text-decoration-thickness: 2px;
  text-underline-offset: 4px;
  text-decoration-color: var(--subtitle-jlpt-n2-color, #f5a97f);
  text-decoration-style: solid;
 }
 #subtitleRoot .word.word-jlpt-n3 {
  color: inherit;
  text-decoration-line: underline;
  text-decoration-thickness: 2px;
  text-underline-offset: 4px;
  text-decoration-color: var(--subtitle-jlpt-n3-color, #f9e2af);
  text-decoration-style: solid;
 }
 #subtitleRoot .word.word-jlpt-n4 {
  color: inherit;
  text-decoration-line: underline;
  text-decoration-thickness: 2px;
  text-underline-offset: 4px;
  text-decoration-color: var(--subtitle-jlpt-n4-color, #a6e3a1);
  text-decoration-style: solid;
 }
 #subtitleRoot .word.word-jlpt-n5 {
  color: inherit;
  text-decoration-line: underline;
  text-decoration-thickness: 2px;
  text-underline-offset: 4px;
  text-decoration-color: var(--subtitle-jlpt-n5-color, #8aadf4);
  text-decoration-style: solid;
 }
 #subtitleRoot .word:hover {
  background: rgba(255, 255, 255, 0.2);
  border-radius: 3px;
@@ -0,0 +1,80 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import fs from "node:fs";
 import path from "node:path";
 import type { MergedToken } from "../types";
 import { PartOfSpeech } from "../types.js";
 import { computeWordClass } from "./subtitle-render.js";
 function createToken(overrides: Partial<MergedToken>): MergedToken {
  return {
    surface: "",
    reading: "",
    headword: "",
    startPos: 0,
    endPos: 0,
    partOfSpeech: PartOfSpeech.other,
    isMerged: true,
    isKnown: false,
    isNPlusOneTarget: false,
    ...overrides,
  };
 }
 function extractClassBlock(cssText: string, level: number): string {
  const selector = `#subtitleRoot .word.word-jlpt-n${level}`;
  const start = cssText.indexOf(selector);
  if (start < 0) return "";
  const openBrace = cssText.indexOf("{", start);
  if (openBrace < 0) return "";
  const closeBrace = cssText.indexOf("}", openBrace);
  if (closeBrace < 0) return "";
  return cssText.slice(openBrace + 1, closeBrace);
 }
 test("computeWordClass preserves known and n+1 classes while adding JLPT classes", () => {
  const knownJlpt = createToken({
    isKnown: true,
    jlptLevel: "N1",
    surface: "猫",
  });
  const nPlusOneJlpt = createToken({
    isNPlusOneTarget: true,
    jlptLevel: "N2",
    surface: "犬",
  });
  assert.equal(computeWordClass(knownJlpt), "word word-known word-jlpt-n1");
  assert.equal(
    computeWordClass(nPlusOneJlpt),
    "word word-n-plus-one word-jlpt-n2",
  );
 });
 test("JLPT CSS rules use underline-only styling in renderer stylesheet", () => {
  const distCssPath = path.join(process.cwd(), "dist", "renderer", "style.css");
  const srcCssPath = path.join(process.cwd(), "src", "renderer", "style.css");
  const cssPath = fs.existsSync(distCssPath)
    ? distCssPath
    : srcCssPath;
  if (!fs.existsSync(cssPath)) {
    assert.fail(
      "JLPT CSS file missing. Run `pnpm run build` first, or ensure src/renderer/style.css exists.",
    );
  }
  const cssText = fs.readFileSync(cssPath, "utf-8");
  for (let level = 1; level <= 5; level += 1) {
    const block = extractClassBlock(cssText, level);
    assert.ok(block.length > 0, `word-jlpt-n${level} class should exist`);
    assert.match(block, /text-decoration-line:\s*underline;/);
    assert.match(block, /text-decoration-thickness:\s*2px;/);
    assert.match(block, /text-underline-offset:\s*4px;/);
    assert.match(block, /color:\s*inherit;/);
  }
 });
@@ -15,6 +15,15 @@ function normalizeSubtitle(text: string, trim = true): string {
  return trim ? normalized.trim() : normalized;
 }
 const HEX_COLOR_PATTERN =
  /^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})$/;
 function sanitizeHexColor(value: unknown, fallback: string): string {
  return typeof value === "string" && HEX_COLOR_PATTERN.test(value.trim())
    ? value.trim()
    : fallback;
 }
 function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
  const fragment = document.createDocumentFragment();
@@ -50,16 +59,20 @@ function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
  root.appendChild(fragment);
 }
-function computeWordClass(token: MergedToken): string {
+export function computeWordClass(token: MergedToken): string {
  const classes = ["word"];
  if (token.isNPlusOneTarget) {
-    return "word word-n-plus-one";
+    classes.push("word-n-plus-one");
  } else if (token.isKnown) {
    classes.push("word-known");
  }
-  if (token.isKnown) {
+  if (token.jlptLevel) {
-    return "word word-known";
+    classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
  }
-  return "word";
+  return classes.join(" ");
 }
 function renderCharacterLevel(root: HTMLElement, text: string): void {
@@ -189,6 +202,22 @@ export function createSubtitleRenderer(ctx: RendererContext) {
      style.knownWordColor ?? ctx.state.knownWordColor ?? "#a6da95";
    const nPlusOneColor =
      style.nPlusOneColor ?? ctx.state.nPlusOneColor ?? "#c6a0f6";
    const jlptColors = {
      N1: ctx.state.jlptN1Color ?? "#ed8796",
      N2: ctx.state.jlptN2Color ?? "#f5a97f",
      N3: ctx.state.jlptN3Color ?? "#f9e2af",
      N4: ctx.state.jlptN4Color ?? "#a6e3a1",
      N5: ctx.state.jlptN5Color ?? "#8aadf4",
      ...(style.jlptColors
        ? {
          N1: sanitizeHexColor(style.jlptColors?.N1, ctx.state.jlptN1Color),
          N2: sanitizeHexColor(style.jlptColors?.N2, ctx.state.jlptN2Color),
          N3: sanitizeHexColor(style.jlptColors?.N3, ctx.state.jlptN3Color),
          N4: sanitizeHexColor(style.jlptColors?.N4, ctx.state.jlptN4Color),
          N5: sanitizeHexColor(style.jlptColors?.N5, ctx.state.jlptN5Color),
        }
        : {}),
    };
    ctx.state.knownWordColor = knownWordColor;
    ctx.state.nPlusOneColor = nPlusOneColor;
@@ -197,6 +226,16 @@ export function createSubtitleRenderer(ctx: RendererContext) {
      knownWordColor,
    );
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-n-plus-one-color", nPlusOneColor);
    ctx.state.jlptN1Color = jlptColors.N1;
    ctx.state.jlptN2Color = jlptColors.N2;
    ctx.state.jlptN3Color = jlptColors.N3;
    ctx.state.jlptN4Color = jlptColors.N4;
    ctx.state.jlptN5Color = jlptColors.N5;
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n1-color", jlptColors.N1);
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n2-color", jlptColors.N2);
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n3-color", jlptColors.N3);
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n4-color", jlptColors.N4);
    ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n5-color", jlptColors.N5);
    const secondaryStyle = style.secondary;
    if (!secondaryStyle) return;
@@ -223,6 +223,7 @@ export function mergeTokens(
          startPos: prev.startPos,
          endPos: end,
          partOfSpeech: prev.partOfSpeech,
          pos1: prev.pos1 ?? token.pos1,
          pos2: prev.pos2 ?? token.pos2,
          pos3: prev.pos3 ?? token.pos3,
          isMerged: true,
@@ -245,6 +246,7 @@ export function mergeTokens(
          startPos: start,
          endPos: end,
          partOfSpeech: token.partOfSpeech,
          pos1: token.pos1,
          pos2: token.pos2,
          pos3: token.pos3,
          isMerged: false,
@@ -48,13 +48,17 @@ export interface MergedToken {
  startPos: number;
  endPos: number;
  partOfSpeech: PartOfSpeech;
  pos1?: string;
  pos2?: string;
  pos3?: string;
  isMerged: boolean;
  isKnown: boolean;
  isNPlusOneTarget: boolean;
  jlptLevel?: JlptLevel;
 }
 export type JlptLevel = "N1" | "N2" | "N3" | "N4" | "N5";
 export interface WindowGeometry {
  x: number;
  y: number;
@@ -262,6 +266,7 @@ export interface AnkiConnectConfig {
 }
 export interface SubtitleStyleConfig {
  enableJlpt?: boolean;
  fontFamily?: string;
  fontSize?: number;
  fontColor?: string;
@@ -270,6 +275,13 @@ export interface SubtitleStyleConfig {
  backgroundColor?: string;
  nPlusOneColor?: string;
  knownWordColor?: string;
  jlptColors?: {
    N1: string;
    N2: string;
    N3: string;
    N4: string;
    N5: string;
  };
  secondary?: {
    fontFamily?: string;
    fontSize?: number;