From 2211c086c062a8454f97591b52c95ffa80338433 Mon Sep 17 00:00:00 2001
From: sudacode <suda@sudacode.com>
Date: Sun, 15 Feb 2026 21:00:00 -0800
Subject: [PATCH] refactor: consolidate JLPT token filter utilities

---
 src/core/services/index.ts                    | 10 ++++
 src/core/services/jlpt-excluded-terms.ts      | 29 -----------
 src/core/services/jlpt-token-filter-config.ts | 23 ---------
 ...red-mecab-pos1.ts => jlpt-token-filter.ts} | 49 ++++++++++++++++++-
 src/core/services/tokenizer-service.ts        |  6 ++-
 5 files changed, 61 insertions(+), 56 deletions(-)
 delete mode 100644 src/core/services/jlpt-excluded-terms.ts
 delete mode 100644 src/core/services/jlpt-token-filter-config.ts
 rename src/core/services/{jlpt-ignored-mecab-pos1.ts => jlpt-token-filter.ts} (56%)
diff --git a/src/core/services/index.ts b/src/core/services/index.ts
index bbf444b..1ce9d73 100644
--- a/src/core/services/index.ts
+++ b/src/core/services/index.ts
@@ -38,6 +38,16 @@ export {
 export { openYomitanSettingsWindow } from "./yomitan-settings-service";
 export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
 export { createJlptVocabularyLookupService } from "./jlpt-vocab-service";
+export {
+  getIgnoredPos1Entries,
+  JlptIgnoredPos1Entry,
+  JLPT_EXCLUDED_TERMS,
+  JLPT_IGNORED_MECAB_POS1,
+  JLPT_IGNORED_MECAB_POS1_ENTRIES,
+  JLPT_IGNORED_MECAB_POS1_LIST,
+  shouldIgnoreJlptByTerm,
+  shouldIgnoreJlptForMecabPos1,
+} from "./jlpt-token-filter";
 export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
 export {
   getJimakuLanguagePreferenceService,
diff --git a/src/core/services/jlpt-excluded-terms.ts b/src/core/services/jlpt-excluded-terms.ts
deleted file mode 100644
index 1139300..0000000
--- a/src/core/services/jlpt-excluded-terms.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-// Token-level lexical terms excluded from JLPT highlighting.
-// These are not tied to POS and act as a safety layer for non-dictionary cases.
-export const JLPT_EXCLUDED_TERMS = new Set([
-  "この",
-  "その",
-  "あの",
-  "どの",
-  "これ",
-  "それ",
-  "あれ",
-  "どれ",
-  "ここ",
-  "そこ",
-  "あそこ",
-  "どこ",
-  "こと",
-  "ああ",
-  "ええ",
-  "うう",
-  "おお",
-  "はは",
-  "へえ",
-  "ふう",
-  "ほう",
-]);
-
-export function shouldIgnoreJlptByTerm(term: string): boolean {
-  return JLPT_EXCLUDED_TERMS.has(term);
-}
diff --git a/src/core/services/jlpt-token-filter-config.ts b/src/core/services/jlpt-token-filter-config.ts
deleted file mode 100644
index 7ef63c7..0000000
--- a/src/core/services/jlpt-token-filter-config.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import {
-  JlptIgnoredPos1Entry,
-  JLPT_IGNORED_MECAB_POS1,
-  JLPT_IGNORED_MECAB_POS1_ENTRIES,
-} from "./jlpt-ignored-mecab-pos1";
-
-export { JLPT_IGNORED_MECAB_POS1_ENTRIES, JlptIgnoredPos1Entry };
-
-// Data-driven MeCab POS names (pos1) used for JLPT filtering.
-export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
-  JLPT_IGNORED_MECAB_POS1;
-
-const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
-  JLPT_IGNORED_MECAB_POS1_LIST,
-);
-
-export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
-  return JLPT_IGNORED_MECAB_POS1_ENTRIES;
-}
-
-export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
-  return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
-}
diff --git a/src/core/services/jlpt-ignored-mecab-pos1.ts b/src/core/services/jlpt-token-filter.ts
similarity index 56%
rename from src/core/services/jlpt-ignored-mecab-pos1.ts
rename to src/core/services/jlpt-token-filter.ts
index 6d8b198..f340421 100644
--- a/src/core/services/jlpt-ignored-mecab-pos1.ts
+++ b/src/core/services/jlpt-token-filter.ts
@@ -1,10 +1,40 @@
-// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
-// These are filtered out because they are typically functional or non-lexical words.
 export type JlptIgnoredPos1Entry = {
   pos1: string;
   reason: string;
 };
 
+// Token-level lexical terms excluded from JLPT highlighting.
+// These are not tied to POS and act as a safety layer for non-dictionary cases.
+export const JLPT_EXCLUDED_TERMS = new Set([
+  "この",
+  "その",
+  "あの",
+  "どの",
+  "これ",
+  "それ",
+  "あれ",
+  "どれ",
+  "ここ",
+  "そこ",
+  "あそこ",
+  "どこ",
+  "こと",
+  "ああ",
+  "ええ",
+  "うう",
+  "おお",
+  "はは",
+  "へえ",
+  "ふう",
+  "ほう",
+]);
+
+export function shouldIgnoreJlptByTerm(term: string): boolean {
+  return JLPT_EXCLUDED_TERMS.has(term);
+}
+
+// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
+// These are filtered out because they are typically functional or non-lexical words.
 export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
   {
     pos1: "助詞",
@@ -43,3 +73,18 @@ export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
 export const JLPT_IGNORED_MECAB_POS1 = JLPT_IGNORED_MECAB_POS1_ENTRIES.map(
   (entry) => entry.pos1,
 );
+
+export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
+  JLPT_IGNORED_MECAB_POS1;
+
+const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
+  JLPT_IGNORED_MECAB_POS1_LIST,
+);
+
+export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
+  return JLPT_IGNORED_MECAB_POS1_ENTRIES;
+}
+
+export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
+  return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
+}
diff --git a/src/core/services/tokenizer-service.ts b/src/core/services/tokenizer-service.ts
index 0cac83e..a276a68 100644
--- a/src/core/services/tokenizer-service.ts
+++ b/src/core/services/tokenizer-service.ts
@@ -8,8 +8,10 @@ import {
   SubtitleData,
   Token,
 } from "../../types";
-import { shouldIgnoreJlptForMecabPos1 } from "./jlpt-token-filter-config";
-import { shouldIgnoreJlptByTerm } from "./jlpt-excluded-terms";
+import {
+  shouldIgnoreJlptForMecabPos1,
+  shouldIgnoreJlptByTerm,
+} from "./jlpt-token-filter";
 
 interface YomitanParseHeadword {
   term?: unknown;