mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
Add opt-in JLPT tagging flow
This commit is contained in:
@@ -174,6 +174,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
||||
ffmpeg_path: "",
|
||||
},
|
||||
subtitleStyle: {
|
||||
enableJlpt: false,
|
||||
fontFamily:
|
||||
"Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
||||
fontSize: 35,
|
||||
@@ -183,6 +184,13 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
||||
backgroundColor: "rgba(54, 58, 79, 0.5)",
|
||||
nPlusOneColor: "#c6a0f6",
|
||||
knownWordColor: "#a6da95",
|
||||
jlptColors: {
|
||||
N1: "#ed8796",
|
||||
N2: "#f5a97f",
|
||||
N3: "#f9e2af",
|
||||
N4: "#a6e3a1",
|
||||
N5: "#8aadf4",
|
||||
},
|
||||
secondary: {
|
||||
fontSize: 24,
|
||||
fontColor: "#ffffff",
|
||||
@@ -280,6 +288,13 @@ export const CONFIG_OPTION_REGISTRY: ConfigOptionRegistryEntry[] = [
|
||||
defaultValue: DEFAULT_CONFIG.websocket.port,
|
||||
description: "Built-in subtitle websocket server port.",
|
||||
},
|
||||
{
|
||||
path: "subtitleStyle.enableJlpt",
|
||||
kind: "boolean",
|
||||
defaultValue: DEFAULT_CONFIG.subtitleStyle.enableJlpt,
|
||||
description: "Enable JLPT vocabulary level underlines. "
|
||||
+ "When disabled, JLPT tagging lookup and underlines are skipped.",
|
||||
},
|
||||
{
|
||||
path: "ankiConnect.enabled",
|
||||
kind: "boolean",
|
||||
|
||||
@@ -442,6 +442,18 @@ export class ConfigService {
|
||||
: {}),
|
||||
},
|
||||
};
|
||||
|
||||
const enableJlpt = asBoolean((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt);
|
||||
if (enableJlpt !== undefined) {
|
||||
resolved.subtitleStyle.enableJlpt = enableJlpt;
|
||||
} else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
|
||||
warn(
|
||||
"subtitleStyle.enableJlpt",
|
||||
(src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
|
||||
resolved.subtitleStyle.enableJlpt,
|
||||
"Expected boolean.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (isObject(src.ankiConnect)) {
|
||||
|
||||
@@ -37,6 +37,7 @@ export {
|
||||
} from "./runtime-config-service";
|
||||
export { openYomitanSettingsWindow } from "./yomitan-settings-service";
|
||||
export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
|
||||
export { createJlptVocabularyLookupService } from "./jlpt-vocab-service";
|
||||
export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
|
||||
export {
|
||||
getJimakuLanguagePreferenceService,
|
||||
|
||||
29
src/core/services/jlpt-excluded-terms.ts
Normal file
29
src/core/services/jlpt-excluded-terms.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
// Token-level lexical terms excluded from JLPT highlighting.
|
||||
// These are not tied to POS and act as a safety layer for non-dictionary cases.
|
||||
export const JLPT_EXCLUDED_TERMS = new Set([
|
||||
"この",
|
||||
"その",
|
||||
"あの",
|
||||
"どの",
|
||||
"これ",
|
||||
"それ",
|
||||
"あれ",
|
||||
"どれ",
|
||||
"ここ",
|
||||
"そこ",
|
||||
"あそこ",
|
||||
"どこ",
|
||||
"こと",
|
||||
"ああ",
|
||||
"ええ",
|
||||
"うう",
|
||||
"おお",
|
||||
"はは",
|
||||
"へえ",
|
||||
"ふう",
|
||||
"ほう",
|
||||
]);
|
||||
|
||||
export function shouldIgnoreJlptByTerm(term: string): boolean {
|
||||
return JLPT_EXCLUDED_TERMS.has(term);
|
||||
}
|
||||
45
src/core/services/jlpt-ignored-mecab-pos1.ts
Normal file
45
src/core/services/jlpt-ignored-mecab-pos1.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
|
||||
// These are filtered out because they are typically functional or non-lexical words.
|
||||
export type JlptIgnoredPos1Entry = {
|
||||
pos1: string;
|
||||
reason: string;
|
||||
};
|
||||
|
||||
export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
|
||||
{
|
||||
pos1: "助詞",
|
||||
reason: "Particles (ko/kara/nagara etc.): mostly grammatical glue, not independent vocabulary.",
|
||||
},
|
||||
{
|
||||
pos1: "助動詞",
|
||||
reason: "Auxiliary verbs (past tense, politeness, modality): grammar helpers.",
|
||||
},
|
||||
{
|
||||
pos1: "記号",
|
||||
reason: "Symbols/punctuation and symbols-like tokens.",
|
||||
},
|
||||
{
|
||||
pos1: "補助記号",
|
||||
reason: "Auxiliary symbols (e.g. bracket-like or markup tokens).",
|
||||
},
|
||||
{
|
||||
pos1: "連体詞",
|
||||
reason: "Adnominal forms (e.g. demonstratives like \"この\").",
|
||||
},
|
||||
{
|
||||
pos1: "感動詞",
|
||||
reason: "Interjections/onomatopoeia-style exclamations.",
|
||||
},
|
||||
{
|
||||
pos1: "接続詞",
|
||||
reason: "Conjunctions that connect clauses, usually not target vocab items.",
|
||||
},
|
||||
{
|
||||
pos1: "接頭詞",
|
||||
reason: "Prefixes/prefix-like grammatical elements.",
|
||||
},
|
||||
] as const satisfies readonly JlptIgnoredPos1Entry[];
|
||||
|
||||
export const JLPT_IGNORED_MECAB_POS1 = JLPT_IGNORED_MECAB_POS1_ENTRIES.map(
|
||||
(entry) => entry.pos1,
|
||||
);
|
||||
23
src/core/services/jlpt-token-filter-config.ts
Normal file
23
src/core/services/jlpt-token-filter-config.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import {
|
||||
JlptIgnoredPos1Entry,
|
||||
JLPT_IGNORED_MECAB_POS1,
|
||||
JLPT_IGNORED_MECAB_POS1_ENTRIES,
|
||||
} from "./jlpt-ignored-mecab-pos1";
|
||||
|
||||
export { JLPT_IGNORED_MECAB_POS1_ENTRIES, JlptIgnoredPos1Entry };
|
||||
|
||||
// Data-driven MeCab POS names (pos1) used for JLPT filtering.
|
||||
export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
|
||||
JLPT_IGNORED_MECAB_POS1;
|
||||
|
||||
const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
|
||||
JLPT_IGNORED_MECAB_POS1_LIST,
|
||||
);
|
||||
|
||||
export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
|
||||
return JLPT_IGNORED_MECAB_POS1_ENTRIES;
|
||||
}
|
||||
|
||||
export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
|
||||
return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
|
||||
}
|
||||
194
src/core/services/jlpt-vocab-service.ts
Normal file
194
src/core/services/jlpt-vocab-service.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
|
||||
import type { JlptLevel } from "../../types";
|
||||
|
||||
export interface JlptVocabLookupOptions {
|
||||
searchPaths: string[];
|
||||
log: (message: string) => void;
|
||||
}
|
||||
|
||||
const JLPT_BANK_FILES: { level: JlptLevel; filename: string }[] = [
|
||||
{ level: "N1", filename: "term_meta_bank_1.json" },
|
||||
{ level: "N2", filename: "term_meta_bank_2.json" },
|
||||
{ level: "N3", filename: "term_meta_bank_3.json" },
|
||||
{ level: "N4", filename: "term_meta_bank_4.json" },
|
||||
{ level: "N5", filename: "term_meta_bank_5.json" },
|
||||
];
|
||||
|
||||
const NOOP_LOOKUP = (): null => null;
|
||||
|
||||
function normalizeJlptTerm(value: string): string {
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
function hasFrequencyDisplayValue(meta: unknown): boolean {
|
||||
if (!meta || typeof meta !== "object") return false;
|
||||
const frequency = (meta as { frequency?: unknown }).frequency;
|
||||
if (!frequency || typeof frequency !== "object") return false;
|
||||
return Object.prototype.hasOwnProperty.call(
|
||||
frequency as Record<string, unknown>,
|
||||
"displayValue",
|
||||
);
|
||||
}
|
||||
|
||||
function addEntriesToMap(
|
||||
rawEntries: unknown,
|
||||
level: JlptLevel,
|
||||
terms: Map<string, JlptLevel>,
|
||||
log: (message: string) => void,
|
||||
): void {
|
||||
if (!Array.isArray(rawEntries)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const rawEntry of rawEntries) {
|
||||
if (!Array.isArray(rawEntry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const [term, _entryId, meta] = rawEntry as [unknown, unknown, unknown];
|
||||
if (typeof term !== "string") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const normalizedTerm = normalizeJlptTerm(term);
|
||||
if (!normalizedTerm) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hasFrequencyDisplayValue(meta)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!terms.has(normalizedTerm)) {
|
||||
terms.set(normalizedTerm, level);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (terms.get(normalizedTerm) !== "N1" && level === "N1") {
|
||||
terms.set(normalizedTerm, level);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (terms.get(normalizedTerm) !== "N1" && terms.get(normalizedTerm) !== "N2" && level === "N2") {
|
||||
terms.set(normalizedTerm, level);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
terms.get(normalizedTerm) !== "N1" &&
|
||||
terms.get(normalizedTerm) !== "N2" &&
|
||||
terms.get(normalizedTerm) !== "N3" &&
|
||||
level === "N3"
|
||||
) {
|
||||
terms.set(normalizedTerm, level);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
terms.get(normalizedTerm) !== "N1" &&
|
||||
terms.get(normalizedTerm) !== "N2" &&
|
||||
terms.get(normalizedTerm) !== "N3" &&
|
||||
terms.get(normalizedTerm) !== "N4" &&
|
||||
level === "N4"
|
||||
) {
|
||||
terms.set(normalizedTerm, level);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
terms.get(normalizedTerm) !== "N1" &&
|
||||
terms.get(normalizedTerm) !== "N2" &&
|
||||
terms.get(normalizedTerm) !== "N3" &&
|
||||
terms.get(normalizedTerm) !== "N4" &&
|
||||
terms.get(normalizedTerm) !== "N5" &&
|
||||
level === "N5"
|
||||
) {
|
||||
terms.set(normalizedTerm, level);
|
||||
}
|
||||
|
||||
log(
|
||||
`JLPT dictionary already has ${normalizedTerm} as ${terms.get(normalizedTerm)}; keeping that level instead of ${level}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function collectDictionaryFromPath(
|
||||
dictionaryPath: string,
|
||||
log: (message: string) => void,
|
||||
): Map<string, JlptLevel> {
|
||||
const terms = new Map<string, JlptLevel>();
|
||||
|
||||
for (const bank of JLPT_BANK_FILES) {
|
||||
const bankPath = path.join(dictionaryPath, bank.filename);
|
||||
if (!fs.existsSync(bankPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let rawText: string;
|
||||
try {
|
||||
rawText = fs.readFileSync(bankPath, "utf-8");
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
let rawEntries: unknown;
|
||||
try {
|
||||
rawEntries = JSON.parse(rawText) as unknown;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||
}
|
||||
|
||||
return terms;
|
||||
}
|
||||
|
||||
export async function createJlptVocabularyLookupService(
|
||||
options: JlptVocabLookupOptions,
|
||||
): Promise<(term: string) => JlptLevel | null> {
|
||||
const attemptedPaths: string[] = [];
|
||||
let foundDirectoryCount = 0;
|
||||
let foundBankCount = 0;
|
||||
for (const dictionaryPath of options.searchPaths) {
|
||||
attemptedPaths.push(dictionaryPath);
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foundDirectoryCount += 1;
|
||||
|
||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
if (terms.size > 0) {
|
||||
foundBankCount += 1;
|
||||
options.log(
|
||||
`JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`,
|
||||
);
|
||||
return (term: string): JlptLevel | null => {
|
||||
if (!term) return null;
|
||||
const normalized = normalizeJlptTerm(term);
|
||||
return normalized ? terms.get(normalized) ?? null : null;
|
||||
};
|
||||
}
|
||||
|
||||
options.log(
|
||||
`JLPT dictionary directory exists but contains no readable term_meta_bank_*.json files: ${dictionaryPath}`,
|
||||
);
|
||||
}
|
||||
|
||||
options.log(
|
||||
`JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`,
|
||||
);
|
||||
if (foundDirectoryCount > 0 && foundBankCount === 0) {
|
||||
options.log(
|
||||
"JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.",
|
||||
);
|
||||
}
|
||||
return NOOP_LOOKUP;
|
||||
}
|
||||
@@ -92,6 +92,7 @@ export async function runAppReadyRuntimeService(
|
||||
): Promise<void> {
|
||||
deps.loadSubtitlePosition();
|
||||
deps.resolveKeybindings();
|
||||
await deps.createMecabTokenizerAndCheck();
|
||||
deps.createMpvClient();
|
||||
|
||||
deps.reloadConfig();
|
||||
@@ -117,7 +118,6 @@ export async function runAppReadyRuntimeService(
|
||||
deps.log("mpv_websocket detected, skipping built-in WebSocket server");
|
||||
}
|
||||
|
||||
await deps.createMecabTokenizerAndCheck();
|
||||
deps.createSubtitleTimingTracker();
|
||||
await deps.loadYomitanExtension();
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ function makeDeps(
|
||||
setYomitanParserInitPromise: () => {},
|
||||
isKnownWord: () => false,
|
||||
getKnownWordMatchMode: () => "headword",
|
||||
getJlptLevel: () => null,
|
||||
tokenizeWithMecab: async () => null,
|
||||
...overrides,
|
||||
};
|
||||
@@ -43,10 +44,171 @@ function makeDepsFromMecabTokenizer(
|
||||
getMecabTokenizer: () => ({
|
||||
tokenize,
|
||||
}),
|
||||
getJlptLevel: () => null,
|
||||
...overrides,
|
||||
});
|
||||
}
|
||||
|
||||
test("tokenizeSubtitleService assigns JLPT level to parsed Yomitan tokens", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫です",
|
||||
makeDeps({
|
||||
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||
getYomitanParserWindow: () => ({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async () => [
|
||||
{
|
||||
source: "scanning-parser",
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: "猫",
|
||||
reading: "ねこ",
|
||||
headwords: [[{ term: "猫" }]],
|
||||
},
|
||||
{
|
||||
text: "です",
|
||||
reading: "です",
|
||||
headwords: [[{ term: "です" }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow),
|
||||
tokenizeWithMecab: async () => null,
|
||||
getJlptLevel: (text) => (text === "猫" ? "N5" : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, "N5");
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService skips JLPT level for excluded demonstratives", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"この",
|
||||
makeDeps({
|
||||
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||
getYomitanParserWindow: () => ({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async () => [
|
||||
{
|
||||
source: "scanning-parser",
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: "この",
|
||||
reading: "この",
|
||||
headwords: [[{ term: "この" }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow),
|
||||
tokenizeWithMecab: async () => null,
|
||||
getJlptLevel: (text) => (text === "この" ? "N5" : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService skips JLPT level for repeated kana SFX", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"ああ",
|
||||
makeDeps({
|
||||
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||
getYomitanParserWindow: () => ({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async () => [
|
||||
{
|
||||
source: "scanning-parser",
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: "ああ",
|
||||
reading: "ああ",
|
||||
headwords: [[{ term: "ああ" }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow),
|
||||
tokenizeWithMecab: async () => null,
|
||||
getJlptLevel: (text) => (text === "ああ" ? "N5" : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService assigns JLPT level to mecab tokens", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫です",
|
||||
makeDepsFromMecabTokenizer(async () => [
|
||||
{
|
||||
word: "猫",
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: "",
|
||||
pos2: "",
|
||||
pos3: "",
|
||||
pos4: "",
|
||||
inflectionType: "",
|
||||
inflectionForm: "",
|
||||
headword: "猫",
|
||||
katakanaReading: "ネコ",
|
||||
pronunciation: "ネコ",
|
||||
},
|
||||
], {
|
||||
getJlptLevel: (text) => (text === "猫" ? "N4" : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, "N4");
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService skips JLPT level for mecab tokens marked as ineligible", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"は",
|
||||
makeDepsFromMecabTokenizer(async () => [
|
||||
{
|
||||
word: "は",
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: "助詞",
|
||||
pos2: "",
|
||||
pos3: "",
|
||||
pos4: "",
|
||||
inflectionType: "",
|
||||
inflectionForm: "",
|
||||
headword: "は",
|
||||
katakanaReading: "ハ",
|
||||
pronunciation: "ハ",
|
||||
},
|
||||
], {
|
||||
getJlptLevel: (text) => (text === "は" ? "N5" : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.pos1, "助詞");
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService returns null tokens for empty normalized text", async () => {
|
||||
const result = await tokenizeSubtitleService(" \\n ", makeDeps());
|
||||
assert.deepEqual(result, { text: " \\n ", tokens: null });
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { BrowserWindow, Extension, session } from "electron";
|
||||
import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
|
||||
import {
|
||||
JlptLevel,
|
||||
MergedToken,
|
||||
NPlusOneMatchMode,
|
||||
PartOfSpeech,
|
||||
SubtitleData,
|
||||
Token,
|
||||
} from "../../types";
|
||||
import { shouldIgnoreJlptForMecabPos1 } from "./jlpt-token-filter-config";
|
||||
import { shouldIgnoreJlptByTerm } from "./jlpt-excluded-terms";
|
||||
|
||||
interface YomitanParseHeadword {
|
||||
term?: unknown;
|
||||
@@ -34,6 +37,8 @@ export interface TokenizerServiceDeps {
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
isKnownWord: (text: string) => boolean;
|
||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getJlptEnabled?: () => boolean;
|
||||
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
|
||||
}
|
||||
|
||||
@@ -51,6 +56,8 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
isKnownWord: (text: string) => boolean;
|
||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getMecabTokenizer: () => MecabTokenizerLike | null;
|
||||
}
|
||||
|
||||
@@ -67,6 +74,8 @@ export function createTokenizerDepsRuntimeService(
|
||||
setYomitanParserInitPromise: options.setYomitanParserInitPromise,
|
||||
isKnownWord: options.isKnownWord,
|
||||
getKnownWordMatchMode: options.getKnownWordMatchMode,
|
||||
getJlptLevel: options.getJlptLevel,
|
||||
getJlptEnabled: options.getJlptEnabled,
|
||||
tokenizeWithMecab: async (text) => {
|
||||
const mecabTokenizer = options.getMecabTokenizer();
|
||||
if (!mecabTokenizer) {
|
||||
@@ -112,6 +121,142 @@ function applyKnownWordMarking(
|
||||
});
|
||||
}
|
||||
|
||||
function resolveJlptLookupText(token: MergedToken): string {
|
||||
if (token.headword && token.headword.length > 0) {
|
||||
return token.headword;
|
||||
}
|
||||
if (token.reading && token.reading.length > 0) {
|
||||
return token.reading;
|
||||
}
|
||||
return token.surface;
|
||||
}
|
||||
|
||||
function normalizeJlptTextForExclusion(text: string): string {
|
||||
const raw = text.trim();
|
||||
if (!raw) {
|
||||
return "";
|
||||
}
|
||||
|
||||
let normalized = "";
|
||||
for (const char of raw) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (code >= 0x30a1 && code <= 0x30f6) {
|
||||
normalized += String.fromCodePoint(code - 0x60);
|
||||
continue;
|
||||
}
|
||||
|
||||
normalized += char;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function isKanaChar(char: string): boolean {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
}
|
||||
|
||||
function isRepeatedKanaSfx(text: string): boolean {
|
||||
const normalized = text.trim();
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const counts = new Map<string, number>();
|
||||
let hasAdjacentRepeat = false;
|
||||
|
||||
for (let i = 0; i < chars.length; i += 1) {
|
||||
const char = chars[i];
|
||||
counts.set(char, (counts.get(char) ?? 0) + 1);
|
||||
if (i > 0 && chars[i] === chars[i - 1]) {
|
||||
hasAdjacentRepeat = true;
|
||||
}
|
||||
}
|
||||
|
||||
const topCount = Math.max(...counts.values());
|
||||
if (chars.length <= 2) {
|
||||
return hasAdjacentRepeat || topCount >= 2;
|
||||
}
|
||||
|
||||
if (hasAdjacentRepeat) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return topCount >= Math.ceil(chars.length / 2);
|
||||
}
|
||||
|
||||
function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) return false;
|
||||
|
||||
const candidates = [
|
||||
resolveJlptLookupText(token),
|
||||
token.surface,
|
||||
token.reading,
|
||||
token.headword,
|
||||
].filter((candidate): candidate is string => typeof candidate === "string" && candidate.length > 0);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const normalizedCandidate = normalizeJlptTextForExclusion(candidate);
|
||||
if (!normalizedCandidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const trimmedCandidate = candidate.trim();
|
||||
if (
|
||||
shouldIgnoreJlptByTerm(trimmedCandidate) ||
|
||||
shouldIgnoreJlptByTerm(normalizedCandidate)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (
|
||||
isRepeatedKanaSfx(candidate) ||
|
||||
isRepeatedKanaSfx(normalizedCandidate)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function applyJlptMarking(
|
||||
tokens: MergedToken[],
|
||||
getJlptLevel: (text: string) => JlptLevel | null,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
if (!isJlptEligibleToken(token)) {
|
||||
return { ...token, jlptLevel: undefined };
|
||||
}
|
||||
|
||||
const primaryLevel = getJlptLevel(resolveJlptLookupText(token));
|
||||
const fallbackLevel = getJlptLevel(token.surface);
|
||||
|
||||
return {
|
||||
...token,
|
||||
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
||||
const headwords = segment.headwords;
|
||||
if (!Array.isArray(headwords) || headwords.length === 0) {
|
||||
@@ -131,6 +276,7 @@ function mapYomitanParseResultsToMergedTokens(
|
||||
parseResults: unknown,
|
||||
isKnownWord: (text: string) => boolean,
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
getJlptLevel: (text: string) => JlptLevel | null,
|
||||
): MergedToken[] | null {
|
||||
if (!Array.isArray(parseResults) || parseResults.length === 0) {
|
||||
return null;
|
||||
@@ -205,6 +351,7 @@ function mapYomitanParseResultsToMergedTokens(
|
||||
startPos: start,
|
||||
endPos: end,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: "",
|
||||
isMerged: true,
|
||||
isNPlusOneTarget: false,
|
||||
isKnown: (() => {
|
||||
@@ -221,6 +368,94 @@ function mapYomitanParseResultsToMergedTokens(
|
||||
return tokens.length > 0 ? tokens : null;
|
||||
}
|
||||
|
||||
function pickClosestMecabPos1(
|
||||
token: MergedToken,
|
||||
mecabTokens: MergedToken[],
|
||||
): string | undefined {
|
||||
if (mecabTokens.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokenStart = token.startPos ?? 0;
|
||||
const tokenEnd = token.endPos ?? tokenStart + token.surface.length;
|
||||
|
||||
let bestPos1: string | undefined;
|
||||
let bestOverlap = 0;
|
||||
let bestSpan = 0;
|
||||
let bestStart = Number.MAX_SAFE_INTEGER;
|
||||
|
||||
for (const mecabToken of mecabTokens) {
|
||||
if (!mecabToken.pos1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const mecabStart = mecabToken.startPos ?? 0;
|
||||
const mecabEnd = mecabToken.endPos ?? mecabStart + mecabToken.surface.length;
|
||||
const overlapStart = Math.max(tokenStart, mecabStart);
|
||||
const overlapEnd = Math.min(tokenEnd, mecabEnd);
|
||||
const overlap = Math.max(0, overlapEnd - overlapStart);
|
||||
if (overlap === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const span = mecabEnd - mecabStart;
|
||||
if (
|
||||
overlap > bestOverlap ||
|
||||
(overlap === bestOverlap &&
|
||||
(span > bestSpan ||
|
||||
(span === bestSpan && mecabStart < bestStart)))
|
||||
) {
|
||||
bestOverlap = overlap;
|
||||
bestSpan = span;
|
||||
bestStart = mecabStart;
|
||||
bestPos1 = mecabToken.pos1;
|
||||
}
|
||||
}
|
||||
|
||||
return bestOverlap > 0 ? bestPos1 : undefined;
|
||||
}
|
||||
|
||||
async function enrichYomitanPos1(
|
||||
tokens: MergedToken[],
|
||||
deps: TokenizerServiceDeps,
|
||||
text: string,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!tokens || tokens.length === 0) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
let mecabTokens: MergedToken[] | null = null;
|
||||
try {
|
||||
mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
"Failed to enrich Yomitan tokens with MeCab POS:",
|
||||
(err as Error).message,
|
||||
);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
if (!mecabTokens || mecabTokens.length === 0) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
return tokens.map((token) => {
|
||||
if (token.pos1) {
|
||||
return token;
|
||||
}
|
||||
|
||||
const pos1 = pickClosestMecabPos1(token, mecabTokens);
|
||||
if (!pos1) {
|
||||
return token;
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
pos1,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureYomitanParserWindow(
|
||||
deps: TokenizerServiceDeps,
|
||||
): Promise<boolean> {
|
||||
@@ -356,11 +591,17 @@ async function parseWithYomitanInternalParser(
|
||||
script,
|
||||
true,
|
||||
);
|
||||
return mapYomitanParseResultsToMergedTokens(
|
||||
const yomitanTokens = mapYomitanParseResultsToMergedTokens(
|
||||
parseResults,
|
||||
deps.isKnownWord,
|
||||
deps.getKnownWordMatchMode(),
|
||||
deps.getJlptLevel,
|
||||
);
|
||||
if (!yomitanTokens || yomitanTokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return enrichYomitanPos1(yomitanTokens, deps, text);
|
||||
} catch (err) {
|
||||
console.error("Yomitan parser request failed:", (err as Error).message);
|
||||
return null;
|
||||
@@ -385,6 +626,7 @@ export async function tokenizeSubtitleService(
|
||||
.replace(/\n/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
const jlptEnabled = deps.getJlptEnabled?.() !== false;
|
||||
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
@@ -393,7 +635,10 @@ export async function tokenizeSubtitleService(
|
||||
deps.isKnownWord,
|
||||
deps.getKnownWordMatchMode(),
|
||||
);
|
||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
||||
const jlptMarkedTokens = jlptEnabled
|
||||
? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
|
||||
: knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
|
||||
return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -404,7 +649,10 @@ export async function tokenizeSubtitleService(
|
||||
deps.isKnownWord,
|
||||
deps.getKnownWordMatchMode(),
|
||||
);
|
||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
||||
const jlptMarkedTokens = jlptEnabled
|
||||
? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
|
||||
: knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
|
||||
return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Tokenization error:", (err as Error).message);
|
||||
|
||||
@@ -59,6 +59,7 @@ export async function loadYomitanExtensionService(
|
||||
deps: YomitanExtensionLoaderDeps,
|
||||
): Promise<Extension | null> {
|
||||
const searchPaths = [
|
||||
path.join(__dirname, "..", "..", "vendor", "yomitan"),
|
||||
path.join(__dirname, "..", "..", "..", "vendor", "yomitan"),
|
||||
path.join(process.resourcesPath, "yomitan"),
|
||||
"/usr/share/SubMiner/yomitan",
|
||||
|
||||
141
src/main.ts
141
src/main.ts
@@ -95,6 +95,7 @@ import {
|
||||
createOverlayContentMeasurementStoreService,
|
||||
createOverlayWindowService,
|
||||
createTokenizerDepsRuntimeService,
|
||||
createJlptVocabularyLookupService,
|
||||
cycleSecondarySubModeService,
|
||||
enforceOverlayLayerOrderService,
|
||||
ensureOverlayWindowLevelService,
|
||||
@@ -227,6 +228,8 @@ const isDev =
|
||||
process.argv.includes("--dev") || process.argv.includes("--debug");
|
||||
const texthookerService = new TexthookerService();
|
||||
const subtitleWsService = new SubtitleWebSocketService();
|
||||
let jlptDictionaryLookupInitialized = false;
|
||||
let jlptDictionaryLookupInitialization: Promise<void> | null = null;
|
||||
const appLogger = {
|
||||
logInfo: (message: string) => {
|
||||
console.log(message);
|
||||
@@ -464,6 +467,139 @@ function loadSubtitlePosition(): SubtitlePosition | null {
|
||||
return appState.subtitlePosition;
|
||||
}
|
||||
|
||||
function getJlptDictionarySearchPaths(): string[] {
|
||||
const homeDir = os.homedir();
|
||||
const userDataPath = app.getPath("userData");
|
||||
return [
|
||||
path.join(__dirname, "..", "..", "vendor", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"vendor",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(__dirname, "..", "..", "..", "vendor", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"..",
|
||||
"vendor",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(process.resourcesPath, "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
process.resourcesPath,
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(app.getAppPath(), "vendor", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
app.getAppPath(),
|
||||
"vendor",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(process.resourcesPath, "app.asar", "vendor", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
process.resourcesPath,
|
||||
"app.asar",
|
||||
"vendor",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(USER_DATA_PATH, "yomitan-jlpt-vocab"),
|
||||
path.join(USER_DATA_PATH, "yomitan-jlpt-vocab", "yomitan-jlpt-vocab"),
|
||||
path.join(userDataPath, "yomitan-jlpt-vocab"),
|
||||
path.join(userDataPath, "yomitan-jlpt-vocab", "yomitan-jlpt-vocab"),
|
||||
path.join(homeDir, ".config", "SubMiner", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
homeDir,
|
||||
".config",
|
||||
"SubMiner",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(homeDir, ".config", "subminer", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
homeDir,
|
||||
".config",
|
||||
"subminer",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(
|
||||
homeDir,
|
||||
"Library",
|
||||
"Application Support",
|
||||
"SubMiner",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(
|
||||
homeDir,
|
||||
"Library",
|
||||
"Application Support",
|
||||
"SubMiner",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(
|
||||
homeDir,
|
||||
"Library",
|
||||
"Application Support",
|
||||
"subminer",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(
|
||||
homeDir,
|
||||
"Library",
|
||||
"Application Support",
|
||||
"subminer",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
path.join(process.cwd(), "vendor", "yomitan-jlpt-vocab"),
|
||||
path.join(
|
||||
process.cwd(),
|
||||
"vendor",
|
||||
"yomitan-jlpt-vocab",
|
||||
"yomitan-jlpt-vocab",
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
async function initializeJlptDictionaryLookup(): Promise<void> {
|
||||
appState.jlptLevelLookup = await createJlptVocabularyLookupService({
|
||||
searchPaths: getJlptDictionarySearchPaths(),
|
||||
log: (message) => {
|
||||
console.log(`[JLPT] ${message}`);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureJlptDictionaryLookup(): Promise<void> {
|
||||
if (!getResolvedConfig().subtitleStyle.enableJlpt) {
|
||||
return;
|
||||
}
|
||||
if (jlptDictionaryLookupInitialized) {
|
||||
return;
|
||||
}
|
||||
if (!jlptDictionaryLookupInitialization) {
|
||||
jlptDictionaryLookupInitialization = initializeJlptDictionaryLookup()
|
||||
.then(() => {
|
||||
jlptDictionaryLookupInitialized = true;
|
||||
})
|
||||
.catch((error) => {
|
||||
jlptDictionaryLookupInitialization = null;
|
||||
throw error;
|
||||
});
|
||||
}
|
||||
await jlptDictionaryLookupInitialization;
|
||||
}
|
||||
|
||||
function saveSubtitlePosition(position: SubtitlePosition): void {
|
||||
appState.subtitlePosition = position;
|
||||
saveSubtitlePositionService({
|
||||
@@ -804,6 +940,7 @@ function updateMpvSubtitleRenderMetrics(
|
||||
}
|
||||
|
||||
async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
|
||||
await ensureJlptDictionaryLookup();
|
||||
return tokenizeSubtitleService(
|
||||
text,
|
||||
createTokenizerDepsRuntimeService({
|
||||
@@ -825,6 +962,9 @@ async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
|
||||
getKnownWordMatchMode: () =>
|
||||
appState.ankiIntegration?.getKnownWordMatchMode() ??
|
||||
getResolvedConfig().ankiConnect.nPlusOne.matchMode,
|
||||
getJlptLevel: (text) => appState.jlptLevelLookup(text),
|
||||
getJlptEnabled: () =>
|
||||
getResolvedConfig().subtitleStyle.enableJlpt,
|
||||
getMecabTokenizer: () => appState.mecabTokenizer,
|
||||
}),
|
||||
);
|
||||
@@ -1345,6 +1485,7 @@ registerIpcRuntimeServices({
|
||||
...resolvedConfig.subtitleStyle,
|
||||
nPlusOneColor: resolvedConfig.ankiConnect.nPlusOne.nPlusOne,
|
||||
knownWordColor: resolvedConfig.ankiConnect.nPlusOne.knownWord,
|
||||
enableJlpt: resolvedConfig.subtitleStyle.enableJlpt,
|
||||
};
|
||||
},
|
||||
saveSubtitlePosition: (position: unknown) =>
|
||||
|
||||
@@ -6,6 +6,7 @@ import type {
|
||||
SecondarySubMode,
|
||||
SubtitlePosition,
|
||||
KikuFieldGroupingChoice,
|
||||
JlptLevel,
|
||||
} from "../types";
|
||||
import type { CliArgs } from "../cli/args";
|
||||
import type { SubtitleTimingTracker } from "../subtitle-timing-tracker";
|
||||
@@ -53,6 +54,7 @@ export interface AppState {
|
||||
backendOverride: string | null;
|
||||
autoStartOverlay: boolean;
|
||||
texthookerOnlyMode: boolean;
|
||||
jlptLevelLookup: (term: string) => JlptLevel | null;
|
||||
}
|
||||
|
||||
export interface AppStateInitialValues {
|
||||
@@ -112,6 +114,7 @@ export function createAppState(values: AppStateInitialValues): AppState {
|
||||
backendOverride: values.backendOverride ?? null,
|
||||
autoStartOverlay: values.autoStartOverlay ?? false,
|
||||
texthookerOnlyMode: values.texthookerOnlyMode ?? false,
|
||||
jlptLevelLookup: () => null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -71,6 +71,11 @@ export type RendererState = {
|
||||
|
||||
knownWordColor: string;
|
||||
nPlusOneColor: string;
|
||||
jlptN1Color: string;
|
||||
jlptN2Color: string;
|
||||
jlptN3Color: string;
|
||||
jlptN4Color: string;
|
||||
jlptN5Color: string;
|
||||
|
||||
keybindingsMap: Map<string, (string | number)[]>;
|
||||
chordPending: boolean;
|
||||
@@ -130,6 +135,11 @@ export function createRendererState(): RendererState {
|
||||
|
||||
knownWordColor: "#a6da95",
|
||||
nPlusOneColor: "#c6a0f6",
|
||||
jlptN1Color: "#ed8796",
|
||||
jlptN2Color: "#f5a97f",
|
||||
jlptN3Color: "#f9e2af",
|
||||
jlptN4Color: "#a6e3a1",
|
||||
jlptN5Color: "#8aadf4",
|
||||
|
||||
keybindingsMap: new Map(),
|
||||
chordPending: false,
|
||||
|
||||
@@ -250,6 +250,11 @@ body {
|
||||
color: #cad3f5;
|
||||
--subtitle-known-word-color: #a6da95;
|
||||
--subtitle-n-plus-one-color: #c6a0f6;
|
||||
--subtitle-jlpt-n1-color: #ed8796;
|
||||
--subtitle-jlpt-n2-color: #f5a97f;
|
||||
--subtitle-jlpt-n3-color: #f9e2af;
|
||||
--subtitle-jlpt-n4-color: #a6e3a1;
|
||||
--subtitle-jlpt-n5-color: #8aadf4;
|
||||
text-shadow:
|
||||
2px 2px 4px rgba(0, 0, 0, 0.8),
|
||||
-1px -1px 2px rgba(0, 0, 0, 0.5);
|
||||
@@ -296,6 +301,51 @@ body.settings-modal-open #subtitleContainer {
|
||||
text-shadow: 0 0 6px rgba(198, 160, 246, 0.35);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n1 {
|
||||
color: inherit;
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n1-color, #ed8796);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n2 {
|
||||
color: inherit;
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n2-color, #f5a97f);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n3 {
|
||||
color: inherit;
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n3-color, #f9e2af);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n4 {
|
||||
color: inherit;
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n4-color, #a6e3a1);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n5 {
|
||||
color: inherit;
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n5-color, #8aadf4);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot .word:hover {
|
||||
background: rgba(255, 255, 255, 0.2);
|
||||
border-radius: 3px;
|
||||
|
||||
71
src/renderer/subtitle-render.test.ts
Normal file
71
src/renderer/subtitle-render.test.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
import type { MergedToken } from "../types";
|
||||
import { PartOfSpeech } from "../types.js";
|
||||
import { computeWordClass } from "./subtitle-render.js";
|
||||
|
||||
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||
return {
|
||||
surface: "",
|
||||
reading: "",
|
||||
headword: "",
|
||||
startPos: 0,
|
||||
endPos: 0,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function extractClassBlock(cssText: string, level: number): string {
|
||||
const selector = `#subtitleRoot .word.word-jlpt-n${level}`;
|
||||
const start = cssText.indexOf(selector);
|
||||
if (start < 0) return "";
|
||||
|
||||
const openBrace = cssText.indexOf("{", start);
|
||||
if (openBrace < 0) return "";
|
||||
const closeBrace = cssText.indexOf("}", openBrace);
|
||||
if (closeBrace < 0) return "";
|
||||
|
||||
return cssText.slice(openBrace + 1, closeBrace);
|
||||
}
|
||||
|
||||
test("computeWordClass preserves known and n+1 classes while adding JLPT classes", () => {
|
||||
const knownJlpt = createToken({
|
||||
isKnown: true,
|
||||
jlptLevel: "N1",
|
||||
surface: "猫",
|
||||
});
|
||||
const nPlusOneJlpt = createToken({
|
||||
isNPlusOneTarget: true,
|
||||
jlptLevel: "N2",
|
||||
surface: "犬",
|
||||
});
|
||||
|
||||
assert.equal(computeWordClass(knownJlpt), "word word-known word-jlpt-n1");
|
||||
assert.equal(
|
||||
computeWordClass(nPlusOneJlpt),
|
||||
"word word-n-plus-one word-jlpt-n2",
|
||||
);
|
||||
});
|
||||
|
||||
test("JLPT CSS rules use underline-only styling in renderer stylesheet", () => {
|
||||
const cssText = fs.readFileSync(
|
||||
path.join(process.cwd(), "dist", "renderer", "style.css"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
for (let level = 1; level <= 5; level += 1) {
|
||||
const block = extractClassBlock(cssText, level);
|
||||
assert.ok(block.length > 0, `word-jlpt-n${level} class should exist`);
|
||||
assert.match(block, /text-decoration-line:\s*underline;/);
|
||||
assert.match(block, /text-decoration-thickness:\s*2px;/);
|
||||
assert.match(block, /text-underline-offset:\s*2px;/);
|
||||
assert.match(block, /color:\s*inherit;/);
|
||||
}
|
||||
});
|
||||
@@ -15,6 +15,15 @@ function normalizeSubtitle(text: string, trim = true): string {
|
||||
return trim ? normalized.trim() : normalized;
|
||||
}
|
||||
|
||||
const HEX_COLOR_PATTERN =
|
||||
/^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})$/;
|
||||
|
||||
function sanitizeHexColor(value: unknown, fallback: string): string {
|
||||
return typeof value === "string" && HEX_COLOR_PATTERN.test(value.trim())
|
||||
? value.trim()
|
||||
: fallback;
|
||||
}
|
||||
|
||||
function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
|
||||
const fragment = document.createDocumentFragment();
|
||||
|
||||
@@ -50,16 +59,20 @@ function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
|
||||
root.appendChild(fragment);
|
||||
}
|
||||
|
||||
function computeWordClass(token: MergedToken): string {
|
||||
export function computeWordClass(token: MergedToken): string {
|
||||
const classes = ["word"];
|
||||
|
||||
if (token.isNPlusOneTarget) {
|
||||
return "word word-n-plus-one";
|
||||
classes.push("word-n-plus-one");
|
||||
} else if (token.isKnown) {
|
||||
classes.push("word-known");
|
||||
}
|
||||
|
||||
if (token.isKnown) {
|
||||
return "word word-known";
|
||||
if (token.jlptLevel) {
|
||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||
}
|
||||
|
||||
return "word";
|
||||
return classes.join(" ");
|
||||
}
|
||||
|
||||
function renderCharacterLevel(root: HTMLElement, text: string): void {
|
||||
@@ -189,6 +202,22 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
||||
style.knownWordColor ?? ctx.state.knownWordColor ?? "#a6da95";
|
||||
const nPlusOneColor =
|
||||
style.nPlusOneColor ?? ctx.state.nPlusOneColor ?? "#c6a0f6";
|
||||
const jlptColors = {
|
||||
N1: ctx.state.jlptN1Color ?? "#ed8796",
|
||||
N2: ctx.state.jlptN2Color ?? "#f5a97f",
|
||||
N3: ctx.state.jlptN3Color ?? "#f9e2af",
|
||||
N4: ctx.state.jlptN4Color ?? "#a6e3a1",
|
||||
N5: ctx.state.jlptN5Color ?? "#8aadf4",
|
||||
...(style.jlptColors
|
||||
? {
|
||||
N1: sanitizeHexColor(style.jlptColors?.N1, ctx.state.jlptN1Color),
|
||||
N2: sanitizeHexColor(style.jlptColors?.N2, ctx.state.jlptN2Color),
|
||||
N3: sanitizeHexColor(style.jlptColors?.N3, ctx.state.jlptN3Color),
|
||||
N4: sanitizeHexColor(style.jlptColors?.N4, ctx.state.jlptN4Color),
|
||||
N5: sanitizeHexColor(style.jlptColors?.N5, ctx.state.jlptN5Color),
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
|
||||
ctx.state.knownWordColor = knownWordColor;
|
||||
ctx.state.nPlusOneColor = nPlusOneColor;
|
||||
@@ -197,6 +226,16 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
||||
knownWordColor,
|
||||
);
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-n-plus-one-color", nPlusOneColor);
|
||||
ctx.state.jlptN1Color = jlptColors.N1;
|
||||
ctx.state.jlptN2Color = jlptColors.N2;
|
||||
ctx.state.jlptN3Color = jlptColors.N3;
|
||||
ctx.state.jlptN4Color = jlptColors.N4;
|
||||
ctx.state.jlptN5Color = jlptColors.N5;
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n1-color", jlptColors.N1);
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n2-color", jlptColors.N2);
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n3-color", jlptColors.N3);
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n4-color", jlptColors.N4);
|
||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n5-color", jlptColors.N5);
|
||||
|
||||
const secondaryStyle = style.secondary;
|
||||
if (!secondaryStyle) return;
|
||||
|
||||
@@ -223,6 +223,7 @@ export function mergeTokens(
|
||||
startPos: prev.startPos,
|
||||
endPos: end,
|
||||
partOfSpeech: prev.partOfSpeech,
|
||||
pos1: prev.pos1 ?? token.pos1,
|
||||
pos2: prev.pos2 ?? token.pos2,
|
||||
pos3: prev.pos3 ?? token.pos3,
|
||||
isMerged: true,
|
||||
@@ -245,6 +246,7 @@ export function mergeTokens(
|
||||
startPos: start,
|
||||
endPos: end,
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
pos1: token.pos1,
|
||||
pos2: token.pos2,
|
||||
pos3: token.pos3,
|
||||
isMerged: false,
|
||||
|
||||
12
src/types.ts
12
src/types.ts
@@ -48,13 +48,17 @@ export interface MergedToken {
|
||||
startPos: number;
|
||||
endPos: number;
|
||||
partOfSpeech: PartOfSpeech;
|
||||
pos1?: string;
|
||||
pos2?: string;
|
||||
pos3?: string;
|
||||
isMerged: boolean;
|
||||
isKnown: boolean;
|
||||
isNPlusOneTarget: boolean;
|
||||
jlptLevel?: JlptLevel;
|
||||
}
|
||||
|
||||
export type JlptLevel = "N1" | "N2" | "N3" | "N4" | "N5";
|
||||
|
||||
export interface WindowGeometry {
|
||||
x: number;
|
||||
y: number;
|
||||
@@ -262,6 +266,7 @@ export interface AnkiConnectConfig {
|
||||
}
|
||||
|
||||
export interface SubtitleStyleConfig {
|
||||
enableJlpt?: boolean;
|
||||
fontFamily?: string;
|
||||
fontSize?: number;
|
||||
fontColor?: string;
|
||||
@@ -270,6 +275,13 @@ export interface SubtitleStyleConfig {
|
||||
backgroundColor?: string;
|
||||
nPlusOneColor?: string;
|
||||
knownWordColor?: string;
|
||||
jlptColors?: {
|
||||
N1: string;
|
||||
N2: string;
|
||||
N3: string;
|
||||
N4: string;
|
||||
N5: string;
|
||||
};
|
||||
secondary?: {
|
||||
fontFamily?: string;
|
||||
fontSize?: number;
|
||||
|
||||
Reference in New Issue
Block a user