chore: commit unstaged workspace changes

This commit is contained in:
2026-02-21 02:32:00 -08:00
parent 1c424b4a0b
commit ab1d5f19fd
16 changed files with 780 additions and 37 deletions

View File

@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
true,
);
});
test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['猫', 1, { frequency: { displayValue: 100 } }],
['猫', 2, { frequency: { displayValue: 120 } }],
['猫', 3, { frequency: { displayValue: 110 } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('猫'), 100);
assert.equal(
logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
1,
);
assert.equal(
logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
false,
);
});

View File

@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
function addEntriesToMap(
rawEntries: unknown,
terms: Map<string, number>,
log: (message: string) => void,
): void {
): { duplicateCount: number } {
if (!Array.isArray(rawEntries)) {
return;
return { duplicateCount: 0 };
}
let duplicateCount = 0;
for (const rawEntry of rawEntries) {
const entry = asFrequencyDictionaryEntry(rawEntry);
if (!entry) {
@@ -79,10 +79,10 @@ function addEntriesToMap(
continue;
}
log(
`Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
);
duplicateCount += 1;
}
return { duplicateCount };
}
function collectDictionaryFromPath(
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
}
const beforeSize = terms.size;
addEntriesToMap(rawEntries, terms, log);
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
if (duplicateCount > 0) {
log(
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
duplicateCount === 1 ? 'y' : 'ies'
} in ${bankPath} (kept strongest rank per term).`,
);
}
if (terms.size === beforeSize) {
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
}