mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
Address Claude review feedback
This commit is contained in:
49
src/core/services/frequency-dictionary-service.test.ts
Normal file
49
src/core/services/frequency-dictionary-service.test.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
import { createFrequencyDictionaryLookupService } from "./frequency-dictionary-service";
|
||||
|
||||
test("createFrequencyDictionaryLookupService logs parse errors and returns no-op for invalid dictionaries", async () => {
|
||||
const logs: string[] = [];
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "subminer-frequency-dict-"));
|
||||
const bankPath = path.join(tempDir, "term_meta_bank_1.json");
|
||||
fs.writeFileSync(bankPath, "{ invalid json");
|
||||
|
||||
const lookup = await createFrequencyDictionaryLookupService({
|
||||
searchPaths: [tempDir],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
const rank = lookup("猫");
|
||||
|
||||
assert.equal(rank, null);
|
||||
assert.equal(
|
||||
logs.some((entry) =>
|
||||
entry.includes("Failed to parse frequency dictionary file as JSON") &&
|
||||
entry.includes("term_meta_bank_1.json")
|
||||
),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("createFrequencyDictionaryLookupService continues with no-op lookup when search path is missing", async () => {
|
||||
const logs: string[] = [];
|
||||
const missingPath = path.join(os.tmpdir(), "subminer-frequency-dict-missing-dir");
|
||||
const lookup = await createFrequencyDictionaryLookupService({
|
||||
searchPaths: [missingPath],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(lookup("猫"), null);
|
||||
assert.equal(
|
||||
logs.some((entry) => entry.includes(`Frequency dictionary not found.`)),
|
||||
true,
|
||||
);
|
||||
});
|
||||
@@ -100,7 +100,10 @@ function collectDictionaryFromPath(
|
||||
let fileNames: string[];
|
||||
try {
|
||||
fileNames = fs.readdirSync(dictionaryPath);
|
||||
} catch {
|
||||
} catch (error) {
|
||||
log(
|
||||
`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`,
|
||||
);
|
||||
return terms;
|
||||
}
|
||||
|
||||
@@ -150,10 +153,21 @@ export async function createFrequencyDictionaryLookupService(
|
||||
|
||||
for (const dictionaryPath of options.searchPaths) {
|
||||
attemptedPaths.push(dictionaryPath);
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
let isDirectory = false;
|
||||
|
||||
try {
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
continue;
|
||||
}
|
||||
isDirectory = fs.statSync(dictionaryPath).isDirectory();
|
||||
} catch (error) {
|
||||
options.log(
|
||||
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
||||
|
||||
if (!isDirectory) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -186,4 +200,3 @@ export async function createFrequencyDictionaryLookupService(
|
||||
|
||||
return NOOP_LOOKUP;
|
||||
}
|
||||
|
||||
|
||||
@@ -228,6 +228,75 @@ test("tokenizeSubtitleService applies frequency dictionary ranks", async () => {
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService ignores frequency lookup failures", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫",
|
||||
makeDeps({
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
headword: "猫",
|
||||
surface: "猫",
|
||||
reading: "ネコ",
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
getFrequencyRank: () => {
|
||||
throw new Error("frequency lookup unavailable");
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService ignores invalid frequency ranks", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫",
|
||||
makeDeps({
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
headword: "猫",
|
||||
surface: "猫",
|
||||
reading: "ネコ",
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
{
|
||||
headword: "です",
|
||||
surface: "です",
|
||||
reading: "デス",
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
getFrequencyRank: (text) => {
|
||||
if (text === "猫") return Number.NaN;
|
||||
if (text === "です") return -1;
|
||||
return 100;
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService skips frequency lookups when disabled", async () => {
|
||||
let frequencyCalls = 0;
|
||||
const result = await tokenizeSubtitleService(
|
||||
|
||||
@@ -161,6 +161,11 @@ function getCachedFrequencyRank(
|
||||
} catch {
|
||||
rank = null;
|
||||
}
|
||||
if (rank !== null) {
|
||||
if (!Number.isFinite(rank) || rank <= 0) {
|
||||
rank = null;
|
||||
}
|
||||
}
|
||||
|
||||
cache.set(normalizedText, rank);
|
||||
while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) {
|
||||
|
||||
Reference in New Issue
Block a user