chore: commit unstaged workspace changes

This commit is contained in:
2026-02-21 02:32:00 -08:00
parent 1c424b4a0b
commit ab1d5f19fd
16 changed files with 780 additions and 37 deletions

View File

@@ -0,0 +1,52 @@
---
id: TASK-94
title: Fix Kiku duplicate detection for Yomitan-marked duplicates
status: Done
assignee:
- codex-duplicate-kiku-20260221T043006Z-5vkz
created_date: '2026-02-21 04:33'
updated_date: '2026-02-21 01:40'
labels:
- bug
- anki
- kiku
dependencies: []
priority: high
ordinal: 65000
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Kiku field grouping no longer detects duplicate cards in scenarios where the mined card is clearly marked duplicate by Yomitan/N+1 workflow. Restore duplicate detection so duplicate note lookup succeeds for equivalent expression/word cards and Kiku grouping can run.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 Repro case covered by automated regression test in duplicate-detection path.
- [x] #2 Kiku duplicate detection returns duplicate note id for the repro case.
- [x] #3 Targeted tests for duplicate detection pass.
<!-- AC:END -->
## Implementation Notes
<!-- SECTION:NOTES:BEGIN -->
Added regression test `src/anki-integration/duplicate.test.ts` for a cross-field duplicate case where current note uses `Expression` and candidate uses `Word` with same value.
Updated duplicate matching in `src/anki-integration/duplicate.ts` to try alternate field-name aliases (`word` <-> `expression`) when resolving candidate note fields for exact-value verification.
Follow-up fix: duplicate search query now also probes alias fields (`word` <-> `expression`) and merges candidate note ids before exact verification, so duplicates are still found when only the alias field is indexed/populated on existing cards.
Second follow-up fix: duplicate detection now evaluates both source values when current note contains both `Expression` and `Word` (previously only one was used, depending on field-order). Query and exact verification now run against all source duplicate candidates.
Third follow-up fix: if deck-scoped duplicate queries return no results, detection now retries the same source/alias query set collection-wide (no deck filter) before exact verification. This aligns with cases where Yomitan shows duplicates outside the configured mining deck.
Fourth follow-up fix: if field-specific queries miss entirely, detection now falls back to phrase/plain-text queries (deck-scoped then collection-wide) and still requires exact `Expression/Word` value verification before selecting a duplicate note.
Fifth follow-up: added explicit duplicate-search debug logs (query strings, hit counts, candidate counts, exact-match note id) to improve runtime diagnosis in live launcher runs.
Verification:
- `bun run build`
- `node dist/anki-integration/duplicate.test.js`
- `node --test dist/anki-integration.test.js`
<!-- SECTION:NOTES:END -->

View File

@@ -0,0 +1,52 @@
---
id: TASK-96
title: Decouple secondary subtitle lifecycle from visible/invisible overlays
status: To Do
assignee: []
created_date: '2026-02-21 04:41'
updated_date: '2026-02-21 04:41'
labels:
- subtitles
- overlay
- architecture
dependencies: []
priority: high
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Secondary subtitle behavior should not depend on visible/invisible overlay state transitions. Introduce an independent lifecycle so secondary subtitle rendering, visibility mode (`always`/`hover`/`never`), and positioning stay stable even when primary overlays are toggled or rebound.
<!-- SECTION:DESCRIPTION:END -->
## Suggestions
<!-- SECTION:SUGGESTIONS:BEGIN -->
- Isolate secondary subtitle state management from primary overlay window orchestration.
- Route secondary subtitle updates through a dedicated service/controller boundary.
- Keep MPV secondary subtitle property handling independent from overlay visibility toggles.
<!-- SECTION:SUGGESTIONS:END -->
## Action Steps
<!-- SECTION:PLAN:BEGIN -->
1. Inventory existing coupling points between secondary subtitle updates and overlay visibility/bounds services.
2. Introduce explicit secondary subtitle lifecycle state and transitions.
3. Refactor event wiring so visible/invisible overlay toggles do not mutate secondary subtitle state.
4. Validate display modes (`always`/`hover`/`never`) continue to work with independent lifecycle.
5. Add regression tests for overlay toggles, reconnect/restart, and mode-switch behavior.
<!-- SECTION:PLAN:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 Toggling visible or invisible overlays does not alter secondary subtitle lifecycle state.
- [ ] #2 Secondary subtitle display mode behavior remains correct across overlay state transitions.
- [ ] #3 Secondary subtitle behavior survives MPV reconnect/restart without overlay-coupling regressions.
- [ ] #4 Automated tests cover decoupled lifecycle behavior and prevent re-coupling.
<!-- AC:END -->
## Definition of Done
<!-- DOD:BEGIN -->
- [ ] #1 Relevant unit/integration tests pass
- [ ] #2 Documentation/comments updated where lifecycle ownership changed
<!-- DOD:END -->

View File

@@ -0,0 +1,52 @@
---
id: TASK-97
title: Add intro skip playback control
status: To Do
assignee: []
created_date: '2026-02-21 04:41'
updated_date: '2026-02-21 04:41'
labels:
- playback
- ux
dependencies: []
priority: medium
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Add an intro skip control so users can jump past opening sequences quickly during playback. Start with a reliable manual control (shortcut/action) and clear user feedback after seek.
<!-- SECTION:DESCRIPTION:END -->
## Suggestions
<!-- SECTION:SUGGESTIONS:BEGIN -->
- Add a configurable skip duration (for example 60/75/90 seconds).
- Expose skip intro via keybinding and optional UI action in overlay/help.
- Show transient confirmation (OSD/overlay message) after skip action.
<!-- SECTION:SUGGESTIONS:END -->
## Action Steps
<!-- SECTION:PLAN:BEGIN -->
1. Define config and keybinding surface for intro skip duration and trigger.
2. Implement intro skip command that performs bounded seek in active playback session.
3. Wire command to user trigger path (keyboard + optional on-screen action if present).
4. Emit user feedback after successful skip (current time + skipped duration).
5. Add tests for command dispatch, seek bounds, and config fallback behavior.
<!-- SECTION:PLAN:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [ ] #1 User can trigger intro skip during playback with configured shortcut/action.
- [ ] #2 Skip performs bounded seek and never seeks before start or beyond stream duration.
- [ ] #3 Skip duration is configurable with sane default.
- [ ] #4 User receives visible confirmation after skip.
- [ ] #5 Automated tests cover config + seek behavior.
<!-- AC:END -->
## Definition of Done
<!-- DOD:BEGIN -->
- [ ] #1 Playback control tests pass
- [ ] #2 User-facing config/docs updated for intro skip control
<!-- DOD:END -->

View File

@@ -27,6 +27,6 @@ Read first. Keep concise.
| `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` |
| `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` |
| `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` |
| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T04:38:25Z` |
| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T10:07:58Z` |
| `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` |
| `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` |

View File

@@ -0,0 +1,29 @@
# Agent: `codex-add-backlog-tasks-20260221T044104Z-m3n8`
- alias: `codex-add-backlog-tasks`
- mission: `Add two unrelated backlog tasks requested by user`
- status: `done`
- branch: `main`
- started_at: `2026-02-21T04:41:04Z`
- heartbeat_minutes: `5`
## Current Work (newest first)
- [2026-02-21T04:44:12Z] handoff: added `TASK-96` + `TASK-97` in `backlog/tasks`; updated index row to `done`.
- [2026-02-21T04:43:00Z] progress: drafting `TASK-96` (secondary subtitle decoupling) and `TASK-97` (intro skip) under `backlog/tasks`.
- [2026-02-21T04:42:10Z] intent: add two unrelated backlog tasks only; no code behavior changes.
## Files Touched
- `docs/subagents/INDEX.md`
- `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md`
- `backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md`
- `backlog/tasks/task-97 - Add-intro-skip-playback-control.md`
## Assumptions
- User request means creating backlog tickets, not implementing either feature now.
- Existing backlog format in `backlog/tasks` remains canonical.
## Open Questions / Blockers
- None.
## Next Step
- Wait for user follow-up (prioritize one of the two new tasks for implementation).

View File

@@ -0,0 +1,74 @@
# codex-duplicate-kiku-20260221T043006Z-5vkz
- alias: `codex-duplicate-kiku`
- mission: `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards`
- status: `completed`
- start_utc: `2026-02-21T04:30:06Z`
- last_update_utc: `2026-02-21T10:07:58Z`
## Intent
- Reproduce bug where clear duplicate cards no longer trigger Kiku duplicate grouping.
- Add failing regression test first (TDD).
- Patch duplicate detection logic with minimal behavior change.
## Planned Files
- `src/anki-integration/duplicate.ts`
- `src/anki-integration/duplicate.test.ts` (or nearest duplicate-detection tests)
- `docs/subagents/INDEX.md`
- `docs/subagents/collaboration.md`
- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
## Assumptions
- Duplicate signal should still come from Anki duplicate search + Yomitan/N+1-derived fields used in note content.
- Regression likely from term/readings normalization/query escaping mismatch.
## Outcome
- Root cause: candidate-note exact-check only resolved the originating field name (`Expression` or `Word`), so duplicates failed when candidate note used the opposite alias.
- Added regression test first (RED): `Expression` current note vs `Word` candidate with same value returned `null`.
- Implemented minimal fix: candidate resolution now checks both aliases (`word` and `expression`) before exact-value compare.
- GREEN: targeted duplicate test passed; related `anki-integration` test passed.
- User follow-up repro showed remaining miss when duplicate appears only in alias field search results.
- Added second RED test for alias-query fallback.
- Implemented query-stage alias fallback: run `findNotes` for both alias fields, merge note ids, then exact-verify.
- GREEN after follow-up: duplicate tests + `anki-integration` test pass.
- User reported still failing after first follow-up.
- Added third RED regression: source note containing both `Expression` (sentence) and `Word` (term) only matched duplicates via `Word`; previous logic missed this by using only one source value.
- Implemented source-candidate expansion: gather both `Word` and `Expression` source values, query aliases for each, dedupe queries, then exact-match against normalized set.
- GREEN: duplicate tests (3/3) + `anki-integration` test pass.
- Image-backed repro indicated possible duplicate outside configured deck scope.
- Added fourth RED regression: deck-scoped query misses, collection-wide query should still detect duplicate.
- Implemented deck fallback query pass (same source/alias combinations without deck filter) when deck-scoped pass yields no candidates.
- GREEN: duplicate tests (4/4) + `anki-integration` test pass.
- User confirmed fresh build/install still failed with `貴様` repro.
- Added fifth RED regression: field-specific queries return no matches but plain text query returns candidate.
- Implemented plain-text query fallback pass (deck-scoped then global), still gated by exact `word`/`expression` value verify.
- GREEN: duplicate tests (5/5) + `anki-integration` test pass.
- Added runtime debug instrumentation for duplicate detection query/verification path:
- query string + hit count
- candidate count after exclude
- exact-match note id + field
- No behavior change from instrumentation; build + tests still green.
- User requested logging policy update: prefer console output unless explicitly captured, and persistent logs under `~/.config/SubMiner/logs/*.log`.
- Updated default launcher/app mpv log path to daily file naming: `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log`.
- Typecheck green.
- Found observability gap: app logger wrote only to stdout/stderr while launcher log file only captured wrapper messages.
- Added file sink to `src/logger.ts` so app logs also append to `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (or `SUBMINER_MPV_LOG` when set).
- Verified with typecheck + build.
## Files Touched
- `src/anki-integration/duplicate.ts`
- `src/anki-integration/duplicate.test.ts`
- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
- `docs/subagents/INDEX.md`
- `docs/subagents/collaboration.md`
- `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md`
## Handoff
- No blockers.
- Next step: run broader gate (`bun run test:fast`) when ready, then commit.

View File

@@ -0,0 +1,28 @@
# Agent Log: codex-frequency-dup-log-20260221T042815Z-r4k1
- alias: codex-frequency-dup-log
- mission: reduce frequency dictionary duplicate-term startup log spam; keep useful signal
- status: completed
- started_utc: 2026-02-21T04:28:15Z
- last_update_utc: 2026-02-21T04:32:40Z
- planned_files:
- src/core/services/frequency-dictionary.ts
- src/core/services/frequency-dictionary.test.ts
- docs/subagents/INDEX.md
- touched_files:
- src/core/services/frequency-dictionary.ts
- src/core/services/frequency-dictionary.test.ts
- docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
- docs/subagents/INDEX.md
- key_decisions:
- remove per-entry duplicate term logs
- keep one aggregate duplicate summary line per bank file at info level
- assumptions:
- duplicate entries are expected in source dictionary and should not produce per-entry info logs
- verification:
- `bun test src/core/services/frequency-dictionary.test.ts` (pass)
- full build currently blocked by unrelated Jellyfin WIP type errors on branch
- blockers:
- unrelated branch state prevents full `bun run build`
- next_step:
- optional follow-up: add true debug-level logging API if duplicate diagnostics are needed on demand

View File

@@ -25,5 +25,11 @@ Shared notes. Append-only.
- [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only.
- [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed.
- [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback.
- [2026-02-21T04:48:50Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] second follow-up fix: when source note has both `Expression` and `Word`, duplicate detection now uses both source values (not just first field by order); added regression for mixed-field source candidate scenario.
- [2026-02-21T07:23:56Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] third follow-up fix: add collection-wide fallback query pass when deck-scoped duplicate search returns no candidates; added regression for deck-scope miss case.
- [2026-02-21T09:25:53Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] fourth follow-up fix: add plain-text query fallback when field-scoped queries miss; keep exact value verification on candidate notes to avoid false positives.
- [2026-02-21T09:40:33Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] instrumentation pass: add duplicate-detection debug logs (`[duplicate] query/hits/candidates/exact-match`) to isolate remaining live repro mismatches.
- [2026-02-21T09:54:29Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] logging-path update: default persistent logs now target `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (launcher + app mpv log default).
- [2026-02-21T10:07:58Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] observability fix: app logger now also appends to daily log file, so runtime duplicate traces are available even when overlay stdout is not surfaced in launcher terminal.
- [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path).
- [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass).

View File

@@ -34,7 +34,13 @@ export const DEFAULT_YOUTUBE_SUBGEN_OUT_DIR = path.join(
'subminer',
'youtube-subs',
);
export const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
export const DEFAULT_MPV_LOG_FILE = path.join(
os.homedir(),
'.config',
'SubMiner',
'logs',
`SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
);
export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best';
export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc';
export const DEFAULT_MPV_SUBMINER_ARGS = [

View File

@@ -970,6 +970,12 @@ export class AnkiIntegration {
notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
getDeck: () => this.config.deck,
resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
logInfo: (message) => {
log.info(message);
},
logDebug: (message) => {
log.debug(message);
},
logWarn: (message, error) => {
log.warn(message, (error as Error).message);
},

View File

@@ -0,0 +1,265 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { findDuplicateNote, type NoteInfo } from './duplicate';
function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null {
const names = Object.keys(noteInfo.fields);
const exact = names.find((name) => name === preferredName);
if (exact) return exact;
const lower = preferredName.toLowerCase();
return names.find((name) => name.toLowerCase() === lower) ?? null;
}
test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '食べる' },
},
};
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
findNotes: async () => [100, 200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '食べる' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '食べる' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('"Expression:')) {
return [];
}
if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '食べる' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.equal(seenQueries.length, 2);
});
test('findDuplicateNote checks both source expression/word values when both fields are present', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '昨日は雨だった。' },
Word: { value: '雨' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('昨日は雨だった。')) {
return [];
}
if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Word: { value: '雨' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。')));
assert.ok(seenQueries.some((query) => query.includes('雨')));
});
test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('deck:Japanese')) {
return [];
}
if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('deck:Japanese')));
assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese')));
});
test('findDuplicateNote falls back to plain text query when field queries miss', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenQueries: string[] = [];
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (query) => {
seenQueries.push(query);
if (query.includes('Expression:') || query.includes('Word:')) {
return [];
}
if (query.includes('"貴様"')) {
return [200];
}
return [];
},
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
assert.ok(seenQueries.some((query) => query.includes('Expression:')));
assert.ok(seenQueries.some((query) => query.endsWith('"貴様"')));
});
test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async () => [200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '貴様[きさま]' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async () => [200],
notesInfo: async () => [
{
noteId: 200,
fields: {
Expression: { value: '<span data-x="1">貴様</span>' },
},
},
],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.equal(duplicateId, 200);
});
test('findDuplicateNote does not disable retries on findNotes calls', async () => {
const currentNote: NoteInfo = {
noteId: 100,
fields: {
Expression: { value: '貴様' },
},
};
const seenOptions: Array<{ maxRetries?: number } | undefined> = [];
await findDuplicateNote('貴様', 100, currentNote, {
findNotes: async (_query, options) => {
seenOptions.push(options);
return [];
},
notesInfo: async () => [],
getDeck: () => 'Japanese::Mining',
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
logWarn: () => {},
});
assert.ok(seenOptions.length > 0);
assert.ok(seenOptions.every((options) => options?.maxRetries !== 0));
});

View File

@@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps {
notesInfo: (noteIds: number[]) => Promise<unknown>;
getDeck: () => string | null | undefined;
resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
logInfo?: (message: string) => void;
logDebug?: (message: string) => void;
logWarn: (message: string, error: unknown) => void;
}
@@ -21,25 +23,68 @@ export async function findDuplicateNote(
noteInfo: NoteInfo,
deps: DuplicateDetectionDeps,
): Promise<number | null> {
let fieldName = '';
for (const name of Object.keys(noteInfo.fields)) {
if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) {
fieldName = name;
break;
}
}
if (!fieldName) return null;
const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression);
if (sourceCandidates.length === 0) return null;
deps.logInfo?.(
`[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates
.map((entry) => `${entry.fieldName}:${entry.value}`)
.join('|')}`,
);
const escapedFieldName = escapeAnkiSearchValue(fieldName);
const escapedExpression = escapeAnkiSearchValue(expression);
const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : '';
const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`;
const deckValue = deps.getDeck();
const queryPrefixes = deckValue
? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, '']
: [''];
try {
const noteIds = (await deps.findNotes(query, {
maxRetries: 0,
})) as number[];
return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps);
const noteIds = new Set<number>();
const executedQueries = new Set<string>();
for (const queryPrefix of queryPrefixes) {
for (const sourceCandidate of sourceCandidates) {
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName);
for (const queryFieldName of queryFieldNames) {
const escapedFieldName = escapeAnkiSearchValue(queryFieldName);
const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`;
if (executedQueries.has(query)) continue;
executedQueries.add(query);
const results = (await deps.findNotes(query)) as number[];
deps.logDebug?.(
`[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
);
for (const noteId of results) {
noteIds.add(noteId);
}
}
}
if (noteIds.size > 0) break;
}
if (noteIds.size === 0) {
for (const queryPrefix of queryPrefixes) {
for (const sourceCandidate of sourceCandidates) {
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
const query = `${queryPrefix}"${escapedExpression}"`;
if (executedQueries.has(query)) continue;
executedQueries.add(query);
const results = (await deps.findNotes(query)) as number[];
deps.logDebug?.(
`[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
);
for (const noteId of results) {
noteIds.add(noteId);
}
}
if (noteIds.size > 0) break;
}
}
return await findFirstExactDuplicateNoteId(
noteIds,
excludeNoteId,
sourceCandidates.map((candidate) => candidate.value),
deps,
);
} catch (error) {
deps.logWarn('Duplicate search failed:', error);
return null;
@@ -47,18 +92,25 @@ export async function findDuplicateNote(
}
function findFirstExactDuplicateNoteId(
candidateNoteIds: number[],
candidateNoteIds: Iterable<number>,
excludeNoteId: number,
fieldName: string,
expression: string,
sourceValues: string[],
deps: DuplicateDetectionDeps,
): Promise<number | null> {
const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId);
const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId);
deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`);
if (candidates.length === 0) {
deps.logInfo?.('[duplicate] no candidates after query + exclude');
return Promise.resolve(null);
}
const normalizedValues = new Set(
sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0),
);
if (normalizedValues.size === 0) {
return Promise.resolve(null);
}
const normalizedExpression = normalizeDuplicateValue(expression);
const chunkSize = 50;
return (async () => {
for (let i = 0; i < candidates.length; i += chunkSize) {
@@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId(
const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
const notesInfo = notesInfoResult as NoteInfo[];
for (const noteInfo of notesInfo) {
const resolvedField = deps.resolveFieldName(noteInfo, fieldName);
if (!resolvedField) continue;
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
if (normalizeDuplicateValue(candidateValue) === normalizedExpression) {
return noteInfo.noteId;
const candidateFieldNames = ['word', 'expression'];
for (const candidateFieldName of candidateFieldNames) {
const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName);
if (!resolvedField) continue;
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) {
deps.logDebug?.(
`[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`,
);
deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`);
return noteInfo.noteId;
}
}
}
}
deps.logInfo?.('[duplicate] no exact match in candidate notes');
return null;
})();
}
function getDuplicateCandidateFieldNames(fieldName: string): string[] {
const candidates = [fieldName];
const lower = fieldName.toLowerCase();
if (lower === 'word') {
candidates.push('expression');
} else if (lower === 'expression') {
candidates.push('word');
}
return candidates;
}
function getDuplicateSourceCandidates(
noteInfo: NoteInfo,
fallbackExpression: string,
): Array<{ fieldName: string; value: string }> {
const candidates: Array<{ fieldName: string; value: string }> = [];
const dedupeKey = new Set<string>();
for (const fieldName of Object.keys(noteInfo.fields)) {
const lower = fieldName.toLowerCase();
if (lower !== 'word' && lower !== 'expression') continue;
const value = noteInfo.fields[fieldName]?.value?.trim() ?? '';
if (!value) continue;
const key = `${lower}:${normalizeDuplicateValue(value)}`;
if (dedupeKey.has(key)) continue;
dedupeKey.add(key);
candidates.push({ fieldName, value });
}
const trimmedFallback = fallbackExpression.trim();
if (trimmedFallback.length > 0) {
const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`;
if (!dedupeKey.has(fallbackKey)) {
candidates.push({ fieldName: 'expression', value: trimmedFallback });
}
}
return candidates;
}
function normalizeDuplicateValue(value: string): string {
return value.replace(/\s+/g, ' ').trim();
return value
.replace(/<[^>]*>/g, '')
.replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1')
.replace(/\s+/g, ' ')
.trim();
}
function escapeAnkiSearchValue(value: string): string {

View File

@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
true,
);
});
test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['猫', 1, { frequency: { displayValue: 100 } }],
['猫', 2, { frequency: { displayValue: 120 } }],
['猫', 3, { frequency: { displayValue: 110 } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('猫'), 100);
assert.equal(
logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
1,
);
assert.equal(
logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
false,
);
});

View File

@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
function addEntriesToMap(
rawEntries: unknown,
terms: Map<string, number>,
log: (message: string) => void,
): void {
): { duplicateCount: number } {
if (!Array.isArray(rawEntries)) {
return;
return { duplicateCount: 0 };
}
let duplicateCount = 0;
for (const rawEntry of rawEntries) {
const entry = asFrequencyDictionaryEntry(rawEntry);
if (!entry) {
@@ -79,10 +79,10 @@ function addEntriesToMap(
continue;
}
log(
`Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
);
duplicateCount += 1;
}
return { duplicateCount };
}
function collectDictionaryFromPath(
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
}
const beforeSize = terms.size;
addEntriesToMap(rawEntries, terms, log);
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
if (duplicateCount > 0) {
log(
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
duplicateCount === 1 ? 'y' : 'ies'
} in ${bankPath} (kept strongest rank per term).`,
);
}
if (terms.size === beforeSize) {
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
}

View File

@@ -1,3 +1,7 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
export type LogLevelSource = 'cli' | 'config';
@@ -107,6 +111,25 @@ function safeStringify(value: unknown): string {
}
}
function resolveLogFilePath(): string {
const envPath = process.env.SUBMINER_MPV_LOG?.trim();
if (envPath) {
return envPath;
}
const date = new Date().toISOString().slice(0, 10);
return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`);
}
function appendToLogFile(line: string): void {
try {
const logPath = resolveLogFilePath();
fs.mkdirSync(path.dirname(logPath), { recursive: true });
fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' });
} catch {
// never break runtime due to logging sink failures
}
}
function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
const minLevel = resolveMinLevel();
if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
@@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(prefix);
}
appendToLogFile(prefix);
return;
}
@@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(finalMessage);
}
appendToLogFile(finalMessage);
}
export function createLogger(scope: string): Logger {

View File

@@ -487,7 +487,13 @@ if (process.platform === 'linux') {
app.setName('SubMiner');
const DEFAULT_TEXTHOOKER_PORT = 5174;
const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
const DEFAULT_MPV_LOG_FILE = path.join(
os.homedir(),
'.config',
'SubMiner',
'logs',
`SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
);
const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
const ANILIST_SETUP_RESPONSE_TYPE = 'token';
const ANILIST_DEFAULT_CLIENT_ID = '36084';