mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
Address Claude review feedback
This commit is contained in:
49
src/core/services/frequency-dictionary-service.test.ts
Normal file
49
src/core/services/frequency-dictionary-service.test.ts
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import test from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import fs from "node:fs";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { createFrequencyDictionaryLookupService } from "./frequency-dictionary-service";
|
||||||
|
|
||||||
|
test("createFrequencyDictionaryLookupService logs parse errors and returns no-op for invalid dictionaries", async () => {
|
||||||
|
const logs: string[] = [];
|
||||||
|
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "subminer-frequency-dict-"));
|
||||||
|
const bankPath = path.join(tempDir, "term_meta_bank_1.json");
|
||||||
|
fs.writeFileSync(bankPath, "{ invalid json");
|
||||||
|
|
||||||
|
const lookup = await createFrequencyDictionaryLookupService({
|
||||||
|
searchPaths: [tempDir],
|
||||||
|
log: (message) => {
|
||||||
|
logs.push(message);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const rank = lookup("猫");
|
||||||
|
|
||||||
|
assert.equal(rank, null);
|
||||||
|
assert.equal(
|
||||||
|
logs.some((entry) =>
|
||||||
|
entry.includes("Failed to parse frequency dictionary file as JSON") &&
|
||||||
|
entry.includes("term_meta_bank_1.json")
|
||||||
|
),
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("createFrequencyDictionaryLookupService continues with no-op lookup when search path is missing", async () => {
|
||||||
|
const logs: string[] = [];
|
||||||
|
const missingPath = path.join(os.tmpdir(), "subminer-frequency-dict-missing-dir");
|
||||||
|
const lookup = await createFrequencyDictionaryLookupService({
|
||||||
|
searchPaths: [missingPath],
|
||||||
|
log: (message) => {
|
||||||
|
logs.push(message);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(lookup("猫"), null);
|
||||||
|
assert.equal(
|
||||||
|
logs.some((entry) => entry.includes(`Frequency dictionary not found.`)),
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
@@ -100,7 +100,10 @@ function collectDictionaryFromPath(
|
|||||||
let fileNames: string[];
|
let fileNames: string[];
|
||||||
try {
|
try {
|
||||||
fileNames = fs.readdirSync(dictionaryPath);
|
fileNames = fs.readdirSync(dictionaryPath);
|
||||||
} catch {
|
} catch (error) {
|
||||||
|
log(
|
||||||
|
`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`,
|
||||||
|
);
|
||||||
return terms;
|
return terms;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -150,10 +153,21 @@ export async function createFrequencyDictionaryLookupService(
|
|||||||
|
|
||||||
for (const dictionaryPath of options.searchPaths) {
|
for (const dictionaryPath of options.searchPaths) {
|
||||||
attemptedPaths.push(dictionaryPath);
|
attemptedPaths.push(dictionaryPath);
|
||||||
if (!fs.existsSync(dictionaryPath)) {
|
let isDirectory = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!fs.existsSync(dictionaryPath)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
isDirectory = fs.statSync(dictionaryPath).isDirectory();
|
||||||
|
} catch (error) {
|
||||||
|
options.log(
|
||||||
|
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
|
||||||
|
if (!isDirectory) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -186,4 +200,3 @@ export async function createFrequencyDictionaryLookupService(
|
|||||||
|
|
||||||
return NOOP_LOOKUP;
|
return NOOP_LOOKUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -228,6 +228,75 @@ test("tokenizeSubtitleService applies frequency dictionary ranks", async () => {
|
|||||||
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
|
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService ignores frequency lookup failures", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"猫",
|
||||||
|
makeDeps({
|
||||||
|
getFrequencyDictionaryEnabled: () => true,
|
||||||
|
tokenizeWithMecab: async () => [
|
||||||
|
{
|
||||||
|
headword: "猫",
|
||||||
|
surface: "猫",
|
||||||
|
reading: "ネコ",
|
||||||
|
startPos: 0,
|
||||||
|
endPos: 1,
|
||||||
|
partOfSpeech: PartOfSpeech.noun,
|
||||||
|
isMerged: false,
|
||||||
|
isKnown: false,
|
||||||
|
isNPlusOneTarget: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getFrequencyRank: () => {
|
||||||
|
throw new Error("frequency lookup unavailable");
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService ignores invalid frequency ranks", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"猫",
|
||||||
|
makeDeps({
|
||||||
|
getFrequencyDictionaryEnabled: () => true,
|
||||||
|
tokenizeWithMecab: async () => [
|
||||||
|
{
|
||||||
|
headword: "猫",
|
||||||
|
surface: "猫",
|
||||||
|
reading: "ネコ",
|
||||||
|
startPos: 0,
|
||||||
|
endPos: 1,
|
||||||
|
partOfSpeech: PartOfSpeech.noun,
|
||||||
|
isMerged: false,
|
||||||
|
isKnown: false,
|
||||||
|
isNPlusOneTarget: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headword: "です",
|
||||||
|
surface: "です",
|
||||||
|
reading: "デス",
|
||||||
|
startPos: 1,
|
||||||
|
endPos: 2,
|
||||||
|
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||||
|
isMerged: false,
|
||||||
|
isKnown: false,
|
||||||
|
isNPlusOneTarget: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getFrequencyRank: (text) => {
|
||||||
|
if (text === "猫") return Number.NaN;
|
||||||
|
if (text === "です") return -1;
|
||||||
|
return 100;
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 2);
|
||||||
|
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||||
|
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
test("tokenizeSubtitleService skips frequency lookups when disabled", async () => {
|
test("tokenizeSubtitleService skips frequency lookups when disabled", async () => {
|
||||||
let frequencyCalls = 0;
|
let frequencyCalls = 0;
|
||||||
const result = await tokenizeSubtitleService(
|
const result = await tokenizeSubtitleService(
|
||||||
|
|||||||
@@ -161,6 +161,11 @@ function getCachedFrequencyRank(
|
|||||||
} catch {
|
} catch {
|
||||||
rank = null;
|
rank = null;
|
||||||
}
|
}
|
||||||
|
if (rank !== null) {
|
||||||
|
if (!Number.isFinite(rank) || rank <= 0) {
|
||||||
|
rank = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cache.set(normalizedText, rank);
|
cache.set(normalizedText, rank);
|
||||||
while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {
|
while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ export interface FrequencyDictionaryRuntimeDeps {
|
|||||||
let frequencyDictionaryLookupInitialized = false;
|
let frequencyDictionaryLookupInitialized = false;
|
||||||
let frequencyDictionaryLookupInitialization: Promise<void> | null = null;
|
let frequencyDictionaryLookupInitialization: Promise<void> | null = null;
|
||||||
|
|
||||||
|
// Frequency dictionary services are initialized lazily as a process-wide singleton.
|
||||||
|
// Initialization is idempotent and intentionally shared across callers.
|
||||||
|
|
||||||
export function getFrequencyDictionarySearchPaths(
|
export function getFrequencyDictionarySearchPaths(
|
||||||
deps: FrequencyDictionarySearchPathDeps,
|
deps: FrequencyDictionarySearchPathDeps,
|
||||||
): string[] {
|
): string[] {
|
||||||
@@ -24,6 +27,8 @@ export function getFrequencyDictionarySearchPaths(
|
|||||||
const sourcePath = deps.getSourcePath?.();
|
const sourcePath = deps.getSourcePath?.();
|
||||||
|
|
||||||
const rawSearchPaths: string[] = [];
|
const rawSearchPaths: string[] = [];
|
||||||
|
// User-provided path takes precedence over bundled/default roots.
|
||||||
|
// Root list should include `vendor/jiten_freq_global` in callers.
|
||||||
if (sourcePath && sourcePath.trim()) {
|
if (sourcePath && sourcePath.trim()) {
|
||||||
rawSearchPaths.push(sourcePath.trim());
|
rawSearchPaths.push(sourcePath.trim());
|
||||||
rawSearchPaths.push(path.join(sourcePath.trim(), "frequency-dictionary"));
|
rawSearchPaths.push(path.join(sourcePath.trim(), "frequency-dictionary"));
|
||||||
@@ -64,8 +69,9 @@ export async function ensureFrequencyDictionaryLookup(
|
|||||||
frequencyDictionaryLookupInitialized = true;
|
frequencyDictionaryLookupInitialized = true;
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
frequencyDictionaryLookupInitialization = null;
|
frequencyDictionaryLookupInitialized = true;
|
||||||
throw error;
|
deps.log(`Failed to initialize frequency dictionary: ${String(error)}`);
|
||||||
|
deps.setFrequencyRankLookup(() => null);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
await frequencyDictionaryLookupInitialization;
|
await frequencyDictionaryLookupInitialization;
|
||||||
|
|||||||
@@ -53,6 +53,54 @@ test("computeWordClass preserves known and n+1 classes while adding JLPT classes
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("computeWordClass does not add frequency class to known or N+1 terms", () => {
|
||||||
|
const known = createToken({
|
||||||
|
isKnown: true,
|
||||||
|
frequencyRank: 10,
|
||||||
|
surface: "既知",
|
||||||
|
});
|
||||||
|
const nPlusOne = createToken({
|
||||||
|
isNPlusOneTarget: true,
|
||||||
|
frequencyRank: 10,
|
||||||
|
surface: "目標",
|
||||||
|
});
|
||||||
|
const frequency = createToken({
|
||||||
|
frequencyRank: 10,
|
||||||
|
surface: "頻度",
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(
|
||||||
|
computeWordClass(known, {
|
||||||
|
enabled: true,
|
||||||
|
topX: 100,
|
||||||
|
mode: "single",
|
||||||
|
singleColor: "#000000",
|
||||||
|
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
|
||||||
|
}),
|
||||||
|
"word word-known",
|
||||||
|
);
|
||||||
|
assert.equal(
|
||||||
|
computeWordClass(nPlusOne, {
|
||||||
|
enabled: true,
|
||||||
|
topX: 100,
|
||||||
|
mode: "single",
|
||||||
|
singleColor: "#000000",
|
||||||
|
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
|
||||||
|
}),
|
||||||
|
"word word-n-plus-one",
|
||||||
|
);
|
||||||
|
assert.equal(
|
||||||
|
computeWordClass(frequency, {
|
||||||
|
enabled: true,
|
||||||
|
topX: 100,
|
||||||
|
mode: "single",
|
||||||
|
singleColor: "#000000",
|
||||||
|
bandedColors: ["#000000", "#000000", "#000000", "#000000", "#000000"] as const,
|
||||||
|
}),
|
||||||
|
"word word-frequency-single",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test("computeWordClass adds frequency class for single mode when rank is within topX", () => {
|
test("computeWordClass adds frequency class for single mode when rank is within topX", () => {
|
||||||
const token = createToken({
|
const token = createToken({
|
||||||
surface: "猫",
|
surface: "猫",
|
||||||
@@ -114,6 +162,23 @@ test("computeWordClass adds frequency class for banded mode", () => {
|
|||||||
assert.equal(actual, "word word-frequency-band-2");
|
assert.equal(actual, "word word-frequency-band-2");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("computeWordClass uses configured band count for banded mode", () => {
|
||||||
|
const token = createToken({
|
||||||
|
surface: "犬",
|
||||||
|
frequencyRank: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
const actual = computeWordClass(token, {
|
||||||
|
enabled: true,
|
||||||
|
topX: 4,
|
||||||
|
mode: "banded",
|
||||||
|
singleColor: "#000000",
|
||||||
|
bandedColors: ["#111111", "#222222", "#333333"] as any,
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
assert.equal(actual, "word word-frequency-band-1");
|
||||||
|
});
|
||||||
|
|
||||||
test("computeWordClass skips frequency class when rank is out of topX", () => {
|
test("computeWordClass skips frequency class when rank is out of topX", () => {
|
||||||
const token = createToken({
|
const token = createToken({
|
||||||
surface: "犬",
|
surface: "犬",
|
||||||
|
|||||||
@@ -83,8 +83,9 @@ function getFrequencyDictionaryClass(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (settings.mode === "banded") {
|
if (settings.mode === "banded") {
|
||||||
const normalizedBand = Math.ceil((rank / topX) * 5);
|
const bandCount = settings.bandedColors.length;
|
||||||
const band = Math.min(5, Math.max(1, normalizedBand));
|
const normalizedBand = Math.ceil((rank / topX) * bandCount);
|
||||||
|
const band = Math.min(bandCount, Math.max(1, normalizedBand));
|
||||||
return `word-frequency-band-${band}`;
|
return `word-frequency-band-${band}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,12 +184,14 @@ export function computeWordClass(
|
|||||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const frequencyClass = getFrequencyDictionaryClass(
|
if (!token.isKnown && !token.isNPlusOneTarget) {
|
||||||
token,
|
const frequencyClass = getFrequencyDictionaryClass(
|
||||||
resolvedFrequencySettings,
|
token,
|
||||||
);
|
resolvedFrequencySettings,
|
||||||
if (frequencyClass) {
|
);
|
||||||
classes.push(frequencyClass);
|
if (frequencyClass) {
|
||||||
|
classes.push(frequencyClass);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return classes.join(" ");
|
return classes.join(" ");
|
||||||
|
|||||||
Reference in New Issue
Block a user