chore: commit unstaged workspace changes

This commit is contained in:
2026-02-21 02:32:00 -08:00
parent 1c424b4a0b
commit ab1d5f19fd
16 changed files with 780 additions and 37 deletions

View File

@@ -970,6 +970,12 @@ export class AnkiIntegration {
notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
getDeck: () => this.config.deck,
resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
logInfo: (message) => {
log.info(message);
},
logDebug: (message) => {
log.debug(message);
},
logWarn: (message, error) => {
log.warn(message, (error as Error).message);
},

View File

@@ -0,0 +1,265 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { findDuplicateNote, type NoteInfo } from './duplicate';
function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null {
const names = Object.keys(noteInfo.fields);
const exact = names.find((name) => name === preferredName);
if (exact) return exact;
const lower = preferredName.toLowerCase();
return names.find((name) => name.toLowerCase() === lower) ?? null;
}
test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '食べる' },
},
};
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
findNotes: async () => [100, 200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '食べる' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '食べる' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('"Expression:')) {
return [];
}
if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '食べる' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.equal(seenQueries.length, 2);
});
test('findDuplicateNote checks both source expression/word values when both fields are present', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '昨日は雨だった。' },
Word: { value: '雨' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('昨日は雨だった。')) {
return [];
}
if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '雨' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。')));
assert.ok(seenQueries.some((query) => query.includes('雨')));
});
test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('deck:Japanese')) {
return [];
}
if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('deck:Japanese')));
assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese')));
});
test('findDuplicateNote falls back to plain text query when field queries miss', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('Expression:') || query.includes('Word:')) {
return [];
}
if (query.includes('"貴様"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('Expression:')));
assert.ok(seenQueries.some((query) => query.endsWith('"貴様"')));
});
test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async () => [200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様[きさま]' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async () => [200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '<span data-x="1">貴様</span>' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote does not disable retries on findNotes calls', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenOptions: Array<{ maxRetries?: number } | undefined> = [];
await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (_query, options) => {
seenOptions.push(options);
return [];
},
notesInfo: async () => [],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.ok(seenOptions.length > 0);
assert.ok(seenOptions.every((options) => options?.maxRetries !== 0));
});

View File

@@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps {
notesInfo: (noteIds: number[]) => Promise<unknown>;
getDeck: () => string | null | undefined;
resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
logInfo?: (message: string) => void;
logDebug?: (message: string) => void;
logWarn: (message: string, error: unknown) => void;
}
@@ -21,25 +23,68 @@ export async function findDuplicateNote(
noteInfo: NoteInfo,
deps: DuplicateDetectionDeps,
): Promise<number | null> {
let fieldName = '';
for (const name of Object.keys(noteInfo.fields)) {
if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) {
fieldName = name;
break;
}
}
if (!fieldName) return null;
const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression);
if (sourceCandidates.length === 0) return null;
deps.logInfo?.(
`[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates
.map((entry) => `${entry.fieldName}:${entry.value}`)
.join('|')}`,
);
const escapedFieldName = escapeAnkiSearchValue(fieldName);
const escapedExpression = escapeAnkiSearchValue(expression);
const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : '';
const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`;
const deckValue = deps.getDeck();
const queryPrefixes = deckValue
? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, '']
: [''];
try {
const noteIds = (await deps.findNotes(query, {
maxRetries: 0,
})) as number[];
return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps);
const noteIds = new Set<number>();
const executedQueries = new Set<string>();
for (const queryPrefix of queryPrefixes) {
for (const sourceCandidate of sourceCandidates) {
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName);
for (const queryFieldName of queryFieldNames) {
const escapedFieldName = escapeAnkiSearchValue(queryFieldName);
const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`;
if (executedQueries.has(query)) continue;
executedQueries.add(query);
const results = (await deps.findNotes(query)) as number[];
deps.logDebug?.(
`[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
);
for (const noteId of results) {
noteIds.add(noteId);
}
}
}
if (noteIds.size > 0) break;
}
if (noteIds.size === 0) {
for (const queryPrefix of queryPrefixes) {
for (const sourceCandidate of sourceCandidates) {
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
const query = `${queryPrefix}"${escapedExpression}"`;
if (executedQueries.has(query)) continue;
executedQueries.add(query);
const results = (await deps.findNotes(query)) as number[];
deps.logDebug?.(
`[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
);
for (const noteId of results) {
noteIds.add(noteId);
}
}
if (noteIds.size > 0) break;
}
}
return await findFirstExactDuplicateNoteId(
noteIds,
excludeNoteId,
sourceCandidates.map((candidate) => candidate.value),
deps,
);
} catch (error) {
deps.logWarn('Duplicate search failed:', error);
return null;
@@ -47,18 +92,25 @@ export async function findDuplicateNote(
}
function findFirstExactDuplicateNoteId(
candidateNoteIds: number[],
candidateNoteIds: Iterable<number>,
excludeNoteId: number,
fieldName: string,
expression: string,
sourceValues: string[],
deps: DuplicateDetectionDeps,
): Promise<number | null> {
const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId);
const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId);
deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`);
if (candidates.length === 0) {
deps.logInfo?.('[duplicate] no candidates after query + exclude');
return Promise.resolve(null);
}
const normalizedValues = new Set(
sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0),
);
if (normalizedValues.size === 0) {
return Promise.resolve(null);
}
const normalizedExpression = normalizeDuplicateValue(expression);
const chunkSize = 50;
return (async () => {
for (let i = 0; i < candidates.length; i += chunkSize) {
@@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId(
const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
const notesInfo = notesInfoResult as NoteInfo[];
for (const noteInfo of notesInfo) {
const resolvedField = deps.resolveFieldName(noteInfo, fieldName);
if (!resolvedField) continue;
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
if (normalizeDuplicateValue(candidateValue) === normalizedExpression) {
return noteInfo.noteId;
const candidateFieldNames = ['word', 'expression'];
for (const candidateFieldName of candidateFieldNames) {
const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName);
if (!resolvedField) continue;
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) {
deps.logDebug?.(
`[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`,
);
deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`);
return noteInfo.noteId;
}
}
}
}
deps.logInfo?.('[duplicate] no exact match in candidate notes');
return null;
})();
}
function getDuplicateCandidateFieldNames(fieldName: string): string[] {
const candidates = [fieldName];
const lower = fieldName.toLowerCase();
if (lower === 'word') {
candidates.push('expression');
} else if (lower === 'expression') {
candidates.push('word');
}
return candidates;
}
function getDuplicateSourceCandidates(
noteInfo: NoteInfo,
fallbackExpression: string,
): Array<{ fieldName: string; value: string }> {
const candidates: Array<{ fieldName: string; value: string }> = [];
const dedupeKey = new Set<string>();
for (const fieldName of Object.keys(noteInfo.fields)) {
const lower = fieldName.toLowerCase();
if (lower !== 'word' && lower !== 'expression') continue;
const value = noteInfo.fields[fieldName]?.value?.trim() ?? '';
if (!value) continue;
const key = `${lower}:${normalizeDuplicateValue(value)}`;
if (dedupeKey.has(key)) continue;
dedupeKey.add(key);
candidates.push({ fieldName, value });
}
const trimmedFallback = fallbackExpression.trim();
if (trimmedFallback.length > 0) {
const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`;
if (!dedupeKey.has(fallbackKey)) {
candidates.push({ fieldName: 'expression', value: trimmedFallback });
}
}
return candidates;
}
function normalizeDuplicateValue(value: string): string {
return value.replace(/\s+/g, ' ').trim();
return value
.replace(/<[^>]*>/g, '')
.replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1')
.replace(/\s+/g, ' ')
.trim();
}
function escapeAnkiSearchValue(value: string): string {

View File

@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
true,
);
});
test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['猫', 1, { frequency: { displayValue: 100 } }],
['猫', 2, { frequency: { displayValue: 120 } }],
['猫', 3, { frequency: { displayValue: 110 } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('猫'), 100);
assert.equal(
logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
1,
);
assert.equal(
logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
false,
);
});

View File

@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
function addEntriesToMap(
rawEntries: unknown,
terms: Map<string, number>,
log: (message: string) => void,
): void {
): { duplicateCount: number } {
if (!Array.isArray(rawEntries)) {
return;
return { duplicateCount: 0 };
}
let duplicateCount = 0;
for (const rawEntry of rawEntries) {
const entry = asFrequencyDictionaryEntry(rawEntry);
if (!entry) {
@@ -79,10 +79,10 @@ function addEntriesToMap(
continue;
}
log(
`Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
);
duplicateCount += 1;
}
return { duplicateCount };
}
function collectDictionaryFromPath(
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
}
const beforeSize = terms.size;
addEntriesToMap(rawEntries, terms, log);
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
if (duplicateCount > 0) {
log(
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
duplicateCount === 1 ? 'y' : 'ies'
} in ${bankPath} (kept strongest rank per term).`,
);
}
if (terms.size === beforeSize) {
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
}

View File

@@ -1,3 +1,7 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
export type LogLevelSource = 'cli' | 'config';
@@ -107,6 +111,25 @@ function safeStringify(value: unknown): string {
}
}
function resolveLogFilePath(): string {
const envPath = process.env.SUBMINER_MPV_LOG?.trim();
if (envPath) {
return envPath;
}
const date = new Date().toISOString().slice(0, 10);
return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`);
}
function appendToLogFile(line: string): void {
try {
const logPath = resolveLogFilePath();
fs.mkdirSync(path.dirname(logPath), { recursive: true });
fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' });
} catch {
// never break runtime due to logging sink failures
}
}
function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
const minLevel = resolveMinLevel();
if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
@@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(prefix);
}
appendToLogFile(prefix);
return;
}
@@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(finalMessage);
}
appendToLogFile(finalMessage);
}
export function createLogger(scope: string): Logger {

View File

@@ -487,7 +487,13 @@ if (process.platform === 'linux') {
app.setName('SubMiner');
const DEFAULT_TEXTHOOKER_PORT = 5174;
const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
const DEFAULT_MPV_LOG_FILE = path.join(
os.homedir(),
'.config',
'SubMiner',
'logs',
`SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
);
const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
const ANILIST_SETUP_RESPONSE_TYPE = 'token';
const ANILIST_DEFAULT_CLIENT_ID = '36084';