refactor: consolidate JLPT token filter utilities

2026-06-15 15:13:31 -07:00 · 2026-02-15 21:00:00 -08:00
parent e14dad410e
commit 2211c086c0
5 changed files with 61 additions and 56 deletions
@@ -38,6 +38,16 @@ export {
 export { openYomitanSettingsWindow } from "./yomitan-settings-service";
 export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
 export { createJlptVocabularyLookupService } from "./jlpt-vocab-service";
+export {
+  getIgnoredPos1Entries,
+  JlptIgnoredPos1Entry,
+  JLPT_EXCLUDED_TERMS,
+  JLPT_IGNORED_MECAB_POS1,
+  JLPT_IGNORED_MECAB_POS1_ENTRIES,
+  JLPT_IGNORED_MECAB_POS1_LIST,
+  shouldIgnoreJlptByTerm,
+  shouldIgnoreJlptForMecabPos1,
+} from "./jlpt-token-filter";
 export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
 export {
  getJimakuLanguagePreferenceService,
@@ -1,29 +0,0 @@
-// Token-level lexical terms excluded from JLPT highlighting.
-// These are not tied to POS and act as a safety layer for non-dictionary cases.
-export const JLPT_EXCLUDED_TERMS = new Set([
-  "この",
-  "その",
-  "あの",
-  "どの",
-  "これ",
-  "それ",
-  "あれ",
-  "どれ",
-  "ここ",
-  "そこ",
-  "あそこ",
-  "どこ",
-  "こと",
-  "ああ",
-  "ええ",
-  "うう",
-  "おお",
-  "はは",
-  "へえ",
-  "ふう",
-  "ほう",
-]);
-
-export function shouldIgnoreJlptByTerm(term: string): boolean {
-  return JLPT_EXCLUDED_TERMS.has(term);
-}
@@ -1,23 +0,0 @@
-import {
-  JlptIgnoredPos1Entry,
-  JLPT_IGNORED_MECAB_POS1,
-  JLPT_IGNORED_MECAB_POS1_ENTRIES,
-} from "./jlpt-ignored-mecab-pos1";
-
-export { JLPT_IGNORED_MECAB_POS1_ENTRIES, JlptIgnoredPos1Entry };
-
-// Data-driven MeCab POS names (pos1) used for JLPT filtering.
-export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
-  JLPT_IGNORED_MECAB_POS1;
-
-const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
-  JLPT_IGNORED_MECAB_POS1_LIST,
-);
-
-export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
-  return JLPT_IGNORED_MECAB_POS1_ENTRIES;
-}
-
-export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
-  return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
-}
@@ -1,10 +1,40 @@
-// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
-// These are filtered out because they are typically functional or non-lexical words.
 export type JlptIgnoredPos1Entry = {
  pos1: string;
  reason: string;
 };

+// Token-level lexical terms excluded from JLPT highlighting.
+// These are not tied to POS and act as a safety layer for non-dictionary cases.
+export const JLPT_EXCLUDED_TERMS = new Set([
+  "この",
+  "その",
+  "あの",
+  "どの",
+  "これ",
+  "それ",
+  "あれ",
+  "どれ",
+  "ここ",
+  "そこ",
+  "あそこ",
+  "どこ",
+  "こと",
+  "ああ",
+  "ええ",
+  "うう",
+  "おお",
+  "はは",
+  "へえ",
+  "ふう",
+  "ほう",
+]);
+
+export function shouldIgnoreJlptByTerm(term: string): boolean {
+  return JLPT_EXCLUDED_TERMS.has(term);
+}
+
+// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
+// These are filtered out because they are typically functional or non-lexical words.
 export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
  {
    pos1: "助詞",
@@ -43,3 +73,18 @@ export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
 export const JLPT_IGNORED_MECAB_POS1 = JLPT_IGNORED_MECAB_POS1_ENTRIES.map(
  (entry) => entry.pos1,
 );
+
+export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
+  JLPT_IGNORED_MECAB_POS1;
+
+const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
+  JLPT_IGNORED_MECAB_POS1_LIST,
+);
+
+export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
+  return JLPT_IGNORED_MECAB_POS1_ENTRIES;
+}
+
+export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
+  return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
+}
@@ -8,8 +8,10 @@ import {
  SubtitleData,
  Token,
 } from "../../types";
-import { shouldIgnoreJlptForMecabPos1 } from "./jlpt-token-filter-config";
-import { shouldIgnoreJlptByTerm } from "./jlpt-excluded-terms";
+import {
+  shouldIgnoreJlptForMecabPos1,
+  shouldIgnoreJlptByTerm,
+} from "./jlpt-token-filter";

 interface YomitanParseHeadword {
  term?: unknown;