Address Claude review feedback

This commit is contained in:
2026-02-15 23:36:06 -08:00
parent 8e9d392b21
commit f21fc95d17
7 changed files with 224 additions and 14 deletions

View File

@@ -0,0 +1,49 @@
import test from "node:test";
import assert from "node:assert/strict";
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { createFrequencyDictionaryLookupService } from "./frequency-dictionary-service";
test("createFrequencyDictionaryLookupService logs parse errors and returns no-op for invalid dictionaries", async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "subminer-frequency-dict-"));
const bankPath = path.join(tempDir, "term_meta_bank_1.json");
fs.writeFileSync(bankPath, "{ invalid json");
const lookup = await createFrequencyDictionaryLookupService({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
const rank = lookup("猫");
assert.equal(rank, null);
assert.equal(
logs.some((entry) =>
entry.includes("Failed to parse frequency dictionary file as JSON") &&
entry.includes("term_meta_bank_1.json")
),
true,
);
});
test("createFrequencyDictionaryLookupService continues with no-op lookup when search path is missing", async () => {
const logs: string[] = [];
const missingPath = path.join(os.tmpdir(), "subminer-frequency-dict-missing-dir");
const lookup = await createFrequencyDictionaryLookupService({
searchPaths: [missingPath],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup("猫"), null);
assert.equal(
logs.some((entry) => entry.includes(`Frequency dictionary not found.`)),
true,
);
});

View File

@@ -100,7 +100,10 @@ function collectDictionaryFromPath(
let fileNames: string[]; let fileNames: string[];
try { try {
fileNames = fs.readdirSync(dictionaryPath); fileNames = fs.readdirSync(dictionaryPath);
} catch { } catch (error) {
log(
`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`,
);
return terms; return terms;
} }
@@ -150,10 +153,21 @@ export async function createFrequencyDictionaryLookupService(
for (const dictionaryPath of options.searchPaths) { for (const dictionaryPath of options.searchPaths) {
attemptedPaths.push(dictionaryPath); attemptedPaths.push(dictionaryPath);
if (!fs.existsSync(dictionaryPath)) { let isDirectory = false;
try {
if (!fs.existsSync(dictionaryPath)) {
continue;
}
isDirectory = fs.statSync(dictionaryPath).isDirectory();
} catch (error) {
options.log(
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
);
continue; continue;
} }
if (!fs.statSync(dictionaryPath).isDirectory()) {
if (!isDirectory) {
continue; continue;
} }
@@ -186,4 +200,3 @@ export async function createFrequencyDictionaryLookupService(
return NOOP_LOOKUP; return NOOP_LOOKUP;
} }

View File

@@ -228,6 +228,75 @@ test("tokenizeSubtitleService applies frequency dictionary ranks", async () => {
assert.equal(result.tokens?.[1]?.frequencyRank, 1200); assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
}); });
test("tokenizeSubtitleService ignores frequency lookup failures", async () => {
const result = await tokenizeSubtitleService(
"猫",
makeDeps({
getFrequencyDictionaryEnabled: () => true,
tokenizeWithMecab: async () => [
{
headword: "猫",
surface: "猫",
reading: "ネコ",
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
getFrequencyRank: () => {
throw new Error("frequency lookup unavailable");
},
}),
);
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
});
test("tokenizeSubtitleService ignores invalid frequency ranks", async () => {
const result = await tokenizeSubtitleService(
"猫",
makeDeps({
getFrequencyDictionaryEnabled: () => true,
tokenizeWithMecab: async () => [
{
headword: "猫",
surface: "猫",
reading: "ネコ",
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: "です",
surface: "です",
reading: "デス",
startPos: 1,
endPos: 2,
partOfSpeech: PartOfSpeech.bound_auxiliary,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
getFrequencyRank: (text) => {
if (text === "猫") return Number.NaN;
if (text === "です") return -1;
return 100;
},
}),
);
assert.equal(result.tokens?.length, 2);
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
});
test("tokenizeSubtitleService skips frequency lookups when disabled", async () => { test("tokenizeSubtitleService skips frequency lookups when disabled", async () => {
let frequencyCalls = 0; let frequencyCalls = 0;
const result = await tokenizeSubtitleService( const result = await tokenizeSubtitleService(

View File

@@ -161,6 +161,11 @@ function getCachedFrequencyRank(
} catch { } catch {
rank = null; rank = null;
} }
if (rank !== null) {
if (!Number.isFinite(rank) || rank <= 0) {
rank = null;
}
}
cache.set(normalizedText, rank); cache.set(normalizedText, rank);
while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) { while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {

View File

@@ -17,6 +17,9 @@ export interface FrequencyDictionaryRuntimeDeps {
let frequencyDictionaryLookupInitialized = false; let frequencyDictionaryLookupInitialized = false;
let frequencyDictionaryLookupInitialization: Promise<void> | null = null; let frequencyDictionaryLookupInitialization: Promise<void> | null = null;
// Frequency dictionary services are initialized lazily as a process-wide singleton.
// Initialization is idempotent and intentionally shared across callers.
export function getFrequencyDictionarySearchPaths( export function getFrequencyDictionarySearchPaths(
deps: FrequencyDictionarySearchPathDeps, deps: FrequencyDictionarySearchPathDeps,
): string[] { ): string[] {
@@ -24,6 +27,8 @@ export function getFrequencyDictionarySearchPaths(
const sourcePath = deps.getSourcePath?.(); const sourcePath = deps.getSourcePath?.();
const rawSearchPaths: string[] = []; const rawSearchPaths: string[] = [];
// User-provided path takes precedence over bundled/default roots.
// Root list should include `vendor/jiten_freq_global` in callers.
if (sourcePath && sourcePath.trim()) { if (sourcePath && sourcePath.trim()) {
rawSearchPaths.push(sourcePath.trim()); rawSearchPaths.push(sourcePath.trim());
rawSearchPaths.push(path.join(sourcePath.trim(), "frequency-dictionary")); rawSearchPaths.push(path.join(sourcePath.trim(), "frequency-dictionary"));
@@ -64,8 +69,9 @@ export async function ensureFrequencyDictionaryLookup(
frequencyDictionaryLookupInitialized = true; frequencyDictionaryLookupInitialized = true;
}) })
.catch((error) => { .catch((error) => {
frequencyDictionaryLookupInitialization = null; frequencyDictionaryLookupInitialized = true;
throw error; deps.log(`Failed to initialize frequency dictionary: ${String(error)}`);
deps.setFrequencyRankLookup(() => null);
}); });
} }
await frequencyDictionaryLookupInitialization; await frequencyDictionaryLookupInitialization;

View File

@@ -53,6 +53,54 @@ test("computeWordClass preserves known and n+1 classes while adding JLPT classes
); );
}); });
test("computeWordClass does not add frequency class to known or N+1 terms", () => {
const known = createToken({
isKnown: true,
frequencyRank: 10,
surface: "既知",
});
const nPlusOne = createToken({
isNPlusOneTarget: true,
frequencyRank: 10,
surface: "目標",
});
const frequency = createToken({
frequencyRank: 10,
surface: "頻度",
});
assert.equal(
computeWordClass(known, {
enabled: true,
topX: 100,
mode: "single",
singleColor: "#000000",
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
}),
"word word-known",
);
assert.equal(
computeWordClass(nPlusOne, {
enabled: true,
topX: 100,
mode: "single",
singleColor: "#000000",
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
}),
"word word-n-plus-one",
);
assert.equal(
computeWordClass(frequency, {
enabled: true,
topX: 100,
mode: "single",
singleColor: "#000000",
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
}),
"word word-frequency-single",
);
});
test("computeWordClass adds frequency class for single mode when rank is within topX", () => { test("computeWordClass adds frequency class for single mode when rank is within topX", () => {
const token = createToken({ const token = createToken({
surface: "猫", surface: "猫",
@@ -114,6 +162,23 @@ test("computeWordClass adds frequency class for banded mode", () => {
assert.equal(actual, "word word-frequency-band-2"); assert.equal(actual, "word word-frequency-band-2");
}); });
test("computeWordClass uses configured band count for banded mode", () => {
const token = createToken({
surface: "犬",
frequencyRank: 2,
});
const actual = computeWordClass(token, {
enabled: true,
topX: 4,
mode: "banded",
singleColor: "#000000",
bandedColors: ["#111111", "#222222", "#333333"] as any,
} as any);
assert.equal(actual, "word word-frequency-band-1");
});
test("computeWordClass skips frequency class when rank is out of topX", () => { test("computeWordClass skips frequency class when rank is out of topX", () => {
const token = createToken({ const token = createToken({
surface: "犬", surface: "犬",

View File

@@ -83,8 +83,9 @@ function getFrequencyDictionaryClass(
} }
if (settings.mode === "banded") { if (settings.mode === "banded") {
const normalizedBand = Math.ceil((rank / topX) * 5); const bandCount = settings.bandedColors.length;
const band = Math.min(5, Math.max(1, normalizedBand)); const normalizedBand = Math.ceil((rank / topX) * bandCount);
const band = Math.min(bandCount, Math.max(1, normalizedBand));
return `word-frequency-band-${band}`; return `word-frequency-band-${band}`;
} }
@@ -183,12 +184,14 @@ export function computeWordClass(
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`); classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
} }
const frequencyClass = getFrequencyDictionaryClass( if (!token.isKnown && !token.isNPlusOneTarget) {
token, const frequencyClass = getFrequencyDictionaryClass(
resolvedFrequencySettings, token,
); resolvedFrequencySettings,
if (frequencyClass) { );
classes.push(frequencyClass); if (frequencyClass) {
classes.push(frequencyClass);
}
} }
return classes.join(" "); return classes.join(" ");