mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
chore: commit unstaged workspace changes
This commit is contained in:
@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
|
||||
const logs: string[] = [];
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||
fs.writeFileSync(
|
||||
bankPath,
|
||||
JSON.stringify([
|
||||
['猫', 1, { frequency: { displayValue: 100 } }],
|
||||
['猫', 2, { frequency: { displayValue: 120 } }],
|
||||
['猫', 3, { frequency: { displayValue: 110 } }],
|
||||
]),
|
||||
);
|
||||
|
||||
const lookup = await createFrequencyDictionaryLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(lookup('猫'), 100);
|
||||
assert.equal(
|
||||
logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
|
||||
1,
|
||||
);
|
||||
assert.equal(
|
||||
logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
|
||||
function addEntriesToMap(
|
||||
rawEntries: unknown,
|
||||
terms: Map<string, number>,
|
||||
log: (message: string) => void,
|
||||
): void {
|
||||
): { duplicateCount: number } {
|
||||
if (!Array.isArray(rawEntries)) {
|
||||
return;
|
||||
return { duplicateCount: 0 };
|
||||
}
|
||||
|
||||
let duplicateCount = 0;
|
||||
for (const rawEntry of rawEntries) {
|
||||
const entry = asFrequencyDictionaryEntry(rawEntry);
|
||||
if (!entry) {
|
||||
@@ -79,10 +79,10 @@ function addEntriesToMap(
|
||||
continue;
|
||||
}
|
||||
|
||||
log(
|
||||
`Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
|
||||
);
|
||||
duplicateCount += 1;
|
||||
}
|
||||
|
||||
return { duplicateCount };
|
||||
}
|
||||
|
||||
function collectDictionaryFromPath(
|
||||
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
|
||||
}
|
||||
|
||||
const beforeSize = terms.size;
|
||||
addEntriesToMap(rawEntries, terms, log);
|
||||
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
|
||||
if (duplicateCount > 0) {
|
||||
log(
|
||||
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
|
||||
duplicateCount === 1 ? 'y' : 'ies'
|
||||
} in ${bankPath} (kept strongest rank per term).`,
|
||||
);
|
||||
}
|
||||
if (terms.size === beforeSize) {
|
||||
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user