From f21fc95d179ca466968c30607759404613f4c8f8 Mon Sep 17 00:00:00 2001 From: sudacode Date: Sun, 15 Feb 2026 23:36:06 -0800 Subject: [PATCH] Address Claude review feedback --- .../frequency-dictionary-service.test.ts | 49 +++++++++++++ .../services/frequency-dictionary-service.ts | 21 ++++-- src/core/services/tokenizer-service.test.ts | 69 +++++++++++++++++++ src/core/services/tokenizer-service.ts | 5 ++ src/main/frequency-dictionary-runtime.ts | 10 ++- src/renderer/subtitle-render.test.ts | 65 +++++++++++++++++ src/renderer/subtitle-render.ts | 19 ++--- 7 files changed, 224 insertions(+), 14 deletions(-) create mode 100644 src/core/services/frequency-dictionary-service.test.ts diff --git a/src/core/services/frequency-dictionary-service.test.ts b/src/core/services/frequency-dictionary-service.test.ts new file mode 100644 index 0000000..997c457 --- /dev/null +++ b/src/core/services/frequency-dictionary-service.test.ts @@ -0,0 +1,49 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { createFrequencyDictionaryLookupService } from "./frequency-dictionary-service"; + +test("createFrequencyDictionaryLookupService logs parse errors and returns no-op for invalid dictionaries", async () => { + const logs: string[] = []; + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "subminer-frequency-dict-")); + const bankPath = path.join(tempDir, "term_meta_bank_1.json"); + fs.writeFileSync(bankPath, "{ invalid json"); + + const lookup = await createFrequencyDictionaryLookupService({ + searchPaths: [tempDir], + log: (message) => { + logs.push(message); + }, + }); + + const rank = lookup("猫"); + + assert.equal(rank, null); + assert.equal( + logs.some((entry) => + entry.includes("Failed to parse frequency dictionary file as JSON") && + entry.includes("term_meta_bank_1.json") + ), + true, + ); +}); + +test("createFrequencyDictionaryLookupService continues with no-op lookup when search path is missing", async () => { + const logs: string[] = []; + const missingPath = path.join(os.tmpdir(), "subminer-frequency-dict-missing-dir"); + const lookup = await createFrequencyDictionaryLookupService({ + searchPaths: [missingPath], + log: (message) => { + logs.push(message); + }, + }); + + assert.equal(lookup("猫"), null); + assert.equal( + logs.some((entry) => entry.includes(`Frequency dictionary not found.`)), + true, + ); +}); diff --git a/src/core/services/frequency-dictionary-service.ts b/src/core/services/frequency-dictionary-service.ts index 399ffe7..6523749 100644 --- a/src/core/services/frequency-dictionary-service.ts +++ b/src/core/services/frequency-dictionary-service.ts @@ -100,7 +100,10 @@ function collectDictionaryFromPath( let fileNames: string[]; try { fileNames = fs.readdirSync(dictionaryPath); - } catch { + } catch (error) { + log( + `Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`, + ); return terms; } @@ -150,10 +153,21 @@ export async function createFrequencyDictionaryLookupService( for (const dictionaryPath of options.searchPaths) { attemptedPaths.push(dictionaryPath); - if (!fs.existsSync(dictionaryPath)) { + let isDirectory = false; + + try { + if (!fs.existsSync(dictionaryPath)) { + continue; + } + isDirectory = fs.statSync(dictionaryPath).isDirectory(); + } catch (error) { + options.log( + `Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`, + ); continue; } - if (!fs.statSync(dictionaryPath).isDirectory()) { + + if (!isDirectory) { continue; } @@ -186,4 +200,3 @@ export async function createFrequencyDictionaryLookupService( return NOOP_LOOKUP; } - diff --git a/src/core/services/tokenizer-service.test.ts b/src/core/services/tokenizer-service.test.ts index fa02c84..c2747d2 100644 --- a/src/core/services/tokenizer-service.test.ts +++ b/src/core/services/tokenizer-service.test.ts @@ -228,6 +228,75 @@ test("tokenizeSubtitleService applies frequency dictionary ranks", async () => { assert.equal(result.tokens?.[1]?.frequencyRank, 1200); }); +test("tokenizeSubtitleService ignores frequency lookup failures", async () => { + const result = await tokenizeSubtitleService( + "猫", + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + tokenizeWithMecab: async () => [ + { + headword: "猫", + surface: "猫", + reading: "ネコ", + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.noun, + isMerged: false, + isKnown: false, + isNPlusOneTarget: false, + }, + ], + getFrequencyRank: () => { + throw new Error("frequency lookup unavailable"); + }, + }), + ); + + assert.equal(result.tokens?.[0]?.frequencyRank, undefined); +}); + +test("tokenizeSubtitleService ignores invalid frequency ranks", async () => { + const result = await tokenizeSubtitleService( + "猫", + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + tokenizeWithMecab: async () => [ + { + headword: "猫", + surface: "猫", + reading: "ネコ", + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.noun, + isMerged: false, + isKnown: false, + isNPlusOneTarget: false, + }, + { + headword: "です", + surface: "です", + reading: "デス", + startPos: 1, + endPos: 2, + partOfSpeech: PartOfSpeech.bound_auxiliary, + isMerged: false, + isKnown: false, + isNPlusOneTarget: false, + }, + ], + getFrequencyRank: (text) => { + if (text === "猫") return Number.NaN; + if (text === "です") return -1; + return 100; + }, + }), + ); + + assert.equal(result.tokens?.length, 2); + assert.equal(result.tokens?.[0]?.frequencyRank, undefined); + assert.equal(result.tokens?.[1]?.frequencyRank, undefined); +}); + test("tokenizeSubtitleService skips frequency lookups when disabled", async () => { let frequencyCalls = 0; const result = await tokenizeSubtitleService( diff --git a/src/core/services/tokenizer-service.ts b/src/core/services/tokenizer-service.ts index 0ec192a..c4d9724 100644 --- a/src/core/services/tokenizer-service.ts +++ b/src/core/services/tokenizer-service.ts @@ -161,6 +161,11 @@ function getCachedFrequencyRank( } catch { rank = null; } + if (rank !== null) { + if (!Number.isFinite(rank) || rank <= 0) { + rank = null; + } + } cache.set(normalizedText, rank); while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) { diff --git a/src/main/frequency-dictionary-runtime.ts b/src/main/frequency-dictionary-runtime.ts index 785b78e..00f1a19 100644 --- a/src/main/frequency-dictionary-runtime.ts +++ b/src/main/frequency-dictionary-runtime.ts @@ -17,6 +17,9 @@ export interface FrequencyDictionaryRuntimeDeps { let frequencyDictionaryLookupInitialized = false; let frequencyDictionaryLookupInitialization: Promise | null = null; +// Frequency dictionary services are initialized lazily as a process-wide singleton. +// Initialization is idempotent and intentionally shared across callers. + export function getFrequencyDictionarySearchPaths( deps: FrequencyDictionarySearchPathDeps, ): string[] { @@ -24,6 +27,8 @@ export function getFrequencyDictionarySearchPaths( const sourcePath = deps.getSourcePath?.(); const rawSearchPaths: string[] = []; + // User-provided path takes precedence over bundled/default roots. + // Root list should include `vendor/jiten_freq_global` in callers. if (sourcePath && sourcePath.trim()) { rawSearchPaths.push(sourcePath.trim()); rawSearchPaths.push(path.join(sourcePath.trim(), "frequency-dictionary")); @@ -64,8 +69,9 @@ export async function ensureFrequencyDictionaryLookup( frequencyDictionaryLookupInitialized = true; }) .catch((error) => { - frequencyDictionaryLookupInitialization = null; - throw error; + frequencyDictionaryLookupInitialized = true; + deps.log(`Failed to initialize frequency dictionary: ${String(error)}`); + deps.setFrequencyRankLookup(() => null); }); } await frequencyDictionaryLookupInitialization; diff --git a/src/renderer/subtitle-render.test.ts b/src/renderer/subtitle-render.test.ts index d4a116d..596890a 100644 --- a/src/renderer/subtitle-render.test.ts +++ b/src/renderer/subtitle-render.test.ts @@ -53,6 +53,54 @@ test("computeWordClass preserves known and n+1 classes while adding JLPT classes ); }); +test("computeWordClass does not add frequency class to known or N+1 terms", () => { + const known = createToken({ + isKnown: true, + frequencyRank: 10, + surface: "既知", + }); + const nPlusOne = createToken({ + isNPlusOneTarget: true, + frequencyRank: 10, + surface: "目標", + }); + const frequency = createToken({ + frequencyRank: 10, + surface: "頻度", + }); + + assert.equal( + computeWordClass(known, { + enabled: true, + topX: 100, + mode: "single", + singleColor: "#000000", + bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const, + }), + "word word-known", + ); + assert.equal( + computeWordClass(nPlusOne, { + enabled: true, + topX: 100, + mode: "single", + singleColor: "#000000", + bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const, + }), + "word word-n-plus-one", + ); + assert.equal( + computeWordClass(frequency, { + enabled: true, + topX: 100, + mode: "single", + singleColor: "#000000", + bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const, + }), + "word word-frequency-single", + ); +}); + test("computeWordClass adds frequency class for single mode when rank is within topX", () => { const token = createToken({ surface: "猫", @@ -114,6 +162,23 @@ test("computeWordClass adds frequency class for banded mode", () => { assert.equal(actual, "word word-frequency-band-2"); }); +test("computeWordClass uses configured band count for banded mode", () => { + const token = createToken({ + surface: "犬", + frequencyRank: 2, + }); + + const actual = computeWordClass(token, { + enabled: true, + topX: 4, + mode: "banded", + singleColor: "#000000", + bandedColors: ["#111111", "#222222", "#333333"] as any, + } as any); + + assert.equal(actual, "word word-frequency-band-1"); +}); + test("computeWordClass skips frequency class when rank is out of topX", () => { const token = createToken({ surface: "犬", diff --git a/src/renderer/subtitle-render.ts b/src/renderer/subtitle-render.ts index 955b460..c701b7a 100644 --- a/src/renderer/subtitle-render.ts +++ b/src/renderer/subtitle-render.ts @@ -83,8 +83,9 @@ function getFrequencyDictionaryClass( } if (settings.mode === "banded") { - const normalizedBand = Math.ceil((rank / topX) * 5); - const band = Math.min(5, Math.max(1, normalizedBand)); + const bandCount = settings.bandedColors.length; + const normalizedBand = Math.ceil((rank / topX) * bandCount); + const band = Math.min(bandCount, Math.max(1, normalizedBand)); return `word-frequency-band-${band}`; } @@ -183,12 +184,14 @@ export function computeWordClass( classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`); } - const frequencyClass = getFrequencyDictionaryClass( - token, - resolvedFrequencySettings, - ); - if (frequencyClass) { - classes.push(frequencyClass); + if (!token.isKnown && !token.isNPlusOneTarget) { + const frequencyClass = getFrequencyDictionaryClass( + token, + resolvedFrequencySettings, + ); + if (frequencyClass) { + classes.push(frequencyClass); + } } return classes.join(" ");