From ab1d5f19fd3ab978fd74b7a3a1259e18e300e961 Mon Sep 17 00:00:00 2001 From: sudacode Date: Sat, 21 Feb 2026 02:32:00 -0800 Subject: [PATCH] chore: commit unstaged workspace changes --- ...detection-for-Yomitan-marked-duplicates.md | 52 ++++ ...fecycle-from-visible-invisible-overlays.md | 52 ++++ ...sk-97 - Add-intro-skip-playback-control.md | 52 ++++ docs/subagents/INDEX.md | 2 +- ...add-backlog-tasks-20260221T044104Z-m3n8.md | 29 ++ ...ex-duplicate-kiku-20260221T043006Z-5vkz.md | 74 +++++ ...frequency-dup-log-20260221T042815Z-r4k1.md | 28 ++ docs/subagents/collaboration.md | 6 + launcher/types.ts | 8 +- src/anki-integration.ts | 6 + src/anki-integration/duplicate.test.ts | 265 ++++++++++++++++++ src/anki-integration/duplicate.ts | 158 +++++++++-- .../services/frequency-dictionary.test.ts | 31 ++ src/core/services/frequency-dictionary.ts | 21 +- src/logger.ts | 25 ++ src/main.ts | 8 +- 16 files changed, 780 insertions(+), 37 deletions(-) create mode 100644 backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md create mode 100644 backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md create mode 100644 backlog/tasks/task-97 - Add-intro-skip-playback-control.md create mode 100644 docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md create mode 100644 docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md create mode 100644 docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md create mode 100644 src/anki-integration/duplicate.test.ts diff --git a/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md b/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md new file mode 100644 index 0000000..8b1e09c --- /dev/null +++ b/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md @@ -0,0 +1,52 @@ +--- +id: TASK-94 +title: Fix Kiku duplicate detection for Yomitan-marked duplicates +status: Done +assignee: + - codex-duplicate-kiku-20260221T043006Z-5vkz +created_date: '2026-02-21 04:33' +updated_date: '2026-02-21 01:40' +labels: + - bug + - anki + - kiku +dependencies: [] +priority: high +ordinal: 65000 +--- + +## Description + + +Kiku field grouping no longer detects duplicate cards in scenarios where the mined card is clearly marked duplicate by Yomitan/N+1 workflow. Restore duplicate detection so duplicate note lookup succeeds for equivalent expression/word cards and Kiku grouping can run. + + +## Acceptance Criteria + +- [x] #1 Repro case covered by automated regression test in duplicate-detection path. +- [x] #2 Kiku duplicate detection returns duplicate note id for the repro case. +- [x] #3 Targeted tests for duplicate detection pass. + + +## Implementation Notes + + +Added regression test `src/anki-integration/duplicate.test.ts` for a cross-field duplicate case where current note uses `Expression` and candidate uses `Word` with same value. + +Updated duplicate matching in `src/anki-integration/duplicate.ts` to try alternate field-name aliases (`word` <-> `expression`) when resolving candidate note fields for exact-value verification. + +Follow-up fix: duplicate search query now also probes alias fields (`word` <-> `expression`) and merges candidate note ids before exact verification, so duplicates are still found when only the alias field is indexed/populated on existing cards. + +Second follow-up fix: duplicate detection now evaluates both source values when current note contains both `Expression` and `Word` (previously only one was used, depending on field-order). Query and exact verification now run against all source duplicate candidates. + +Third follow-up fix: if deck-scoped duplicate queries return no results, detection now retries the same source/alias query set collection-wide (no deck filter) before exact verification. This aligns with cases where Yomitan shows duplicates outside the configured mining deck. + +Fourth follow-up fix: if field-specific queries miss entirely, detection now falls back to phrase/plain-text queries (deck-scoped then collection-wide) and still requires exact `Expression/Word` value verification before selecting a duplicate note. + +Fifth follow-up: added explicit duplicate-search debug logs (query strings, hit counts, candidate counts, exact-match note id) to improve runtime diagnosis in live launcher runs. + +Verification: +- `bun run build` +- `node dist/anki-integration/duplicate.test.js` +- `node --test dist/anki-integration.test.js` + diff --git a/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md b/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md new file mode 100644 index 0000000..0469ee0 --- /dev/null +++ b/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md @@ -0,0 +1,52 @@ +--- +id: TASK-96 +title: Decouple secondary subtitle lifecycle from visible/invisible overlays +status: To Do +assignee: [] +created_date: '2026-02-21 04:41' +updated_date: '2026-02-21 04:41' +labels: + - subtitles + - overlay + - architecture +dependencies: [] +priority: high +--- + +## Description + + +Secondary subtitle behavior should not depend on visible/invisible overlay state transitions. Introduce an independent lifecycle so secondary subtitle rendering, visibility mode (`always`/`hover`/`never`), and positioning stay stable even when primary overlays are toggled or rebound. + + +## Suggestions + + +- Isolate secondary subtitle state management from primary overlay window orchestration. +- Route secondary subtitle updates through a dedicated service/controller boundary. +- Keep MPV secondary subtitle property handling independent from overlay visibility toggles. + + +## Action Steps + + +1. Inventory existing coupling points between secondary subtitle updates and overlay visibility/bounds services. +2. Introduce explicit secondary subtitle lifecycle state and transitions. +3. Refactor event wiring so visible/invisible overlay toggles do not mutate secondary subtitle state. +4. Validate display modes (`always`/`hover`/`never`) continue to work with independent lifecycle. +5. Add regression tests for overlay toggles, reconnect/restart, and mode-switch behavior. + + +## Acceptance Criteria + +- [ ] #1 Toggling visible or invisible overlays does not alter secondary subtitle lifecycle state. +- [ ] #2 Secondary subtitle display mode behavior remains correct across overlay state transitions. +- [ ] #3 Secondary subtitle behavior survives MPV reconnect/restart without overlay-coupling regressions. +- [ ] #4 Automated tests cover decoupled lifecycle behavior and prevent re-coupling. + + +## Definition of Done + +- [ ] #1 Relevant unit/integration tests pass +- [ ] #2 Documentation/comments updated where lifecycle ownership changed + diff --git a/backlog/tasks/task-97 - Add-intro-skip-playback-control.md b/backlog/tasks/task-97 - Add-intro-skip-playback-control.md new file mode 100644 index 0000000..bbfcb7b --- /dev/null +++ b/backlog/tasks/task-97 - Add-intro-skip-playback-control.md @@ -0,0 +1,52 @@ +--- +id: TASK-97 +title: Add intro skip playback control +status: To Do +assignee: [] +created_date: '2026-02-21 04:41' +updated_date: '2026-02-21 04:41' +labels: + - playback + - ux +dependencies: [] +priority: medium +--- + +## Description + + +Add an intro skip control so users can jump past opening sequences quickly during playback. Start with a reliable manual control (shortcut/action) and clear user feedback after seek. + + +## Suggestions + + +- Add a configurable skip duration (for example 60/75/90 seconds). +- Expose skip intro via keybinding and optional UI action in overlay/help. +- Show transient confirmation (OSD/overlay message) after skip action. + + +## Action Steps + + +1. Define config and keybinding surface for intro skip duration and trigger. +2. Implement intro skip command that performs bounded seek in active playback session. +3. Wire command to user trigger path (keyboard + optional on-screen action if present). +4. Emit user feedback after successful skip (current time + skipped duration). +5. Add tests for command dispatch, seek bounds, and config fallback behavior. + + +## Acceptance Criteria + +- [ ] #1 User can trigger intro skip during playback with configured shortcut/action. +- [ ] #2 Skip performs bounded seek and never seeks before start or beyond stream duration. +- [ ] #3 Skip duration is configurable with sane default. +- [ ] #4 User receives visible confirmation after skip. +- [ ] #5 Automated tests cover config + seek behavior. + + +## Definition of Done + +- [ ] #1 Playback control tests pass +- [ ] #2 User-facing config/docs updated for intro skip control + diff --git a/docs/subagents/INDEX.md b/docs/subagents/INDEX.md index 9cb6bd7..d6f8e65 100644 --- a/docs/subagents/INDEX.md +++ b/docs/subagents/INDEX.md @@ -27,6 +27,6 @@ Read first. Keep concise. | `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` | | `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` | | `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` | -| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T04:38:25Z` | +| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T10:07:58Z` | | `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` | | `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` | diff --git a/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md b/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md new file mode 100644 index 0000000..b01aa68 --- /dev/null +++ b/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md @@ -0,0 +1,29 @@ +# Agent: `codex-add-backlog-tasks-20260221T044104Z-m3n8` + +- alias: `codex-add-backlog-tasks` +- mission: `Add two unrelated backlog tasks requested by user` +- status: `done` +- branch: `main` +- started_at: `2026-02-21T04:41:04Z` +- heartbeat_minutes: `5` + +## Current Work (newest first) +- [2026-02-21T04:44:12Z] handoff: added `TASK-96` + `TASK-97` in `backlog/tasks`; updated index row to `done`. +- [2026-02-21T04:43:00Z] progress: drafting `TASK-96` (secondary subtitle decoupling) and `TASK-97` (intro skip) under `backlog/tasks`. +- [2026-02-21T04:42:10Z] intent: add two unrelated backlog tasks only; no code behavior changes. + +## Files Touched +- `docs/subagents/INDEX.md` +- `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` +- `backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md` +- `backlog/tasks/task-97 - Add-intro-skip-playback-control.md` + +## Assumptions +- User request means creating backlog tickets, not implementing either feature now. +- Existing backlog format in `backlog/tasks` remains canonical. + +## Open Questions / Blockers +- None. + +## Next Step +- Wait for user follow-up (prioritize one of the two new tasks for implementation). diff --git a/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md b/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md new file mode 100644 index 0000000..e052610 --- /dev/null +++ b/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md @@ -0,0 +1,74 @@ +# codex-duplicate-kiku-20260221T043006Z-5vkz + +- alias: `codex-duplicate-kiku` +- mission: `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` +- status: `completed` +- start_utc: `2026-02-21T04:30:06Z` +- last_update_utc: `2026-02-21T10:07:58Z` + +## Intent + +- Reproduce bug where clear duplicate cards no longer trigger Kiku duplicate grouping. +- Add failing regression test first (TDD). +- Patch duplicate detection logic with minimal behavior change. + +## Planned Files + +- `src/anki-integration/duplicate.ts` +- `src/anki-integration/duplicate.test.ts` (or nearest duplicate-detection tests) +- `docs/subagents/INDEX.md` +- `docs/subagents/collaboration.md` +- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md` + +## Assumptions + +- Duplicate signal should still come from Anki duplicate search + Yomitan/N+1-derived fields used in note content. +- Regression likely from term/readings normalization/query escaping mismatch. + +## Outcome + +- Root cause: candidate-note exact-check only resolved the originating field name (`Expression` or `Word`), so duplicates failed when candidate note used the opposite alias. +- Added regression test first (RED): `Expression` current note vs `Word` candidate with same value returned `null`. +- Implemented minimal fix: candidate resolution now checks both aliases (`word` and `expression`) before exact-value compare. +- GREEN: targeted duplicate test passed; related `anki-integration` test passed. +- User follow-up repro showed remaining miss when duplicate appears only in alias field search results. +- Added second RED test for alias-query fallback. +- Implemented query-stage alias fallback: run `findNotes` for both alias fields, merge note ids, then exact-verify. +- GREEN after follow-up: duplicate tests + `anki-integration` test pass. +- User reported still failing after first follow-up. +- Added third RED regression: source note containing both `Expression` (sentence) and `Word` (term) only matched duplicates via `Word`; previous logic missed this by using only one source value. +- Implemented source-candidate expansion: gather both `Word` and `Expression` source values, query aliases for each, dedupe queries, then exact-match against normalized set. +- GREEN: duplicate tests (3/3) + `anki-integration` test pass. +- Image-backed repro indicated possible duplicate outside configured deck scope. +- Added fourth RED regression: deck-scoped query misses, collection-wide query should still detect duplicate. +- Implemented deck fallback query pass (same source/alias combinations without deck filter) when deck-scoped pass yields no candidates. +- GREEN: duplicate tests (4/4) + `anki-integration` test pass. +- User confirmed fresh build/install still failed with `貴様` repro. +- Added fifth RED regression: field-specific queries return no matches but plain text query returns candidate. +- Implemented plain-text query fallback pass (deck-scoped then global), still gated by exact `word`/`expression` value verify. +- GREEN: duplicate tests (5/5) + `anki-integration` test pass. +- Added runtime debug instrumentation for duplicate detection query/verification path: + - query string + hit count + - candidate count after exclude + - exact-match note id + field +- No behavior change from instrumentation; build + tests still green. +- User requested logging policy update: prefer console output unless explicitly captured, and persistent logs under `~/.config/SubMiner/logs/*.log`. +- Updated default launcher/app mpv log path to daily file naming: `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log`. +- Typecheck green. +- Found observability gap: app logger wrote only to stdout/stderr while launcher log file only captured wrapper messages. +- Added file sink to `src/logger.ts` so app logs also append to `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (or `SUBMINER_MPV_LOG` when set). +- Verified with typecheck + build. + +## Files Touched + +- `src/anki-integration/duplicate.ts` +- `src/anki-integration/duplicate.test.ts` +- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md` +- `docs/subagents/INDEX.md` +- `docs/subagents/collaboration.md` +- `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` + +## Handoff + +- No blockers. +- Next step: run broader gate (`bun run test:fast`) when ready, then commit. diff --git a/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md b/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md new file mode 100644 index 0000000..71595b8 --- /dev/null +++ b/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md @@ -0,0 +1,28 @@ +# Agent Log: codex-frequency-dup-log-20260221T042815Z-r4k1 + +- alias: codex-frequency-dup-log +- mission: reduce frequency dictionary duplicate-term startup log spam; keep useful signal +- status: completed +- started_utc: 2026-02-21T04:28:15Z +- last_update_utc: 2026-02-21T04:32:40Z +- planned_files: + - src/core/services/frequency-dictionary.ts + - src/core/services/frequency-dictionary.test.ts + - docs/subagents/INDEX.md +- touched_files: + - src/core/services/frequency-dictionary.ts + - src/core/services/frequency-dictionary.test.ts + - docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md + - docs/subagents/INDEX.md +- key_decisions: + - remove per-entry duplicate term logs + - keep one aggregate duplicate summary line per bank file at info level +- assumptions: + - duplicate entries are expected in source dictionary and should not produce per-entry info logs +- verification: + - `bun test src/core/services/frequency-dictionary.test.ts` (pass) + - full build currently blocked by unrelated Jellyfin WIP type errors on branch +- blockers: + - unrelated branch state prevents full `bun run build` +- next_step: + - optional follow-up: add true debug-level logging API if duplicate diagnostics are needed on demand diff --git a/docs/subagents/collaboration.md b/docs/subagents/collaboration.md index 37ad529..fc211de 100644 --- a/docs/subagents/collaboration.md +++ b/docs/subagents/collaboration.md @@ -25,5 +25,11 @@ Shared notes. Append-only. - [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only. - [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed. - [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback. +- [2026-02-21T04:48:50Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] second follow-up fix: when source note has both `Expression` and `Word`, duplicate detection now uses both source values (not just first field by order); added regression for mixed-field source candidate scenario. +- [2026-02-21T07:23:56Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] third follow-up fix: add collection-wide fallback query pass when deck-scoped duplicate search returns no candidates; added regression for deck-scope miss case. +- [2026-02-21T09:25:53Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] fourth follow-up fix: add plain-text query fallback when field-scoped queries miss; keep exact value verification on candidate notes to avoid false positives. +- [2026-02-21T09:40:33Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] instrumentation pass: add duplicate-detection debug logs (`[duplicate] query/hits/candidates/exact-match`) to isolate remaining live repro mismatches. +- [2026-02-21T09:54:29Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] logging-path update: default persistent logs now target `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (launcher + app mpv log default). +- [2026-02-21T10:07:58Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] observability fix: app logger now also appends to daily log file, so runtime duplicate traces are available even when overlay stdout is not surfaced in launcher terminal. - [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path). - [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass). diff --git a/launcher/types.ts b/launcher/types.ts index 76caa36..dee8c2d 100644 --- a/launcher/types.ts +++ b/launcher/types.ts @@ -34,7 +34,13 @@ export const DEFAULT_YOUTUBE_SUBGEN_OUT_DIR = path.join( 'subminer', 'youtube-subs', ); -export const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log'); +export const DEFAULT_MPV_LOG_FILE = path.join( + os.homedir(), + '.config', + 'SubMiner', + 'logs', + `SubMiner-${new Date().toISOString().slice(0, 10)}.log`, +); export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best'; export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc'; export const DEFAULT_MPV_SUBMINER_ARGS = [ diff --git a/src/anki-integration.ts b/src/anki-integration.ts index 5444a9f..ccaa8c4 100644 --- a/src/anki-integration.ts +++ b/src/anki-integration.ts @@ -970,6 +970,12 @@ export class AnkiIntegration { notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown, getDeck: () => this.config.deck, resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName), + logInfo: (message) => { + log.info(message); + }, + logDebug: (message) => { + log.debug(message); + }, logWarn: (message, error) => { log.warn(message, (error as Error).message); }, diff --git a/src/anki-integration/duplicate.test.ts b/src/anki-integration/duplicate.test.ts new file mode 100644 index 0000000..240c6b2 --- /dev/null +++ b/src/anki-integration/duplicate.test.ts @@ -0,0 +1,265 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { findDuplicateNote, type NoteInfo } from './duplicate'; + +function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null { + const names = Object.keys(noteInfo.fields); + const exact = names.find((name) => name === preferredName); + if (exact) return exact; + const lower = preferredName.toLowerCase(); + return names.find((name) => name.toLowerCase() === lower) ?? null; +} + +test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '食べる' }, + }, + }; + + const duplicateId = await findDuplicateNote('食べる', 100, currentNote, { + findNotes: async () => [100, 200], + notesInfo: async () => [ + { + noteId: 200, + fields: { + Word: { value: '食べる' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); +}); + +test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '食べる' }, + }, + }; + + const seenQueries: string[] = []; + const duplicateId = await findDuplicateNote('食べる', 100, currentNote, { + findNotes: async (query) => { + seenQueries.push(query); + if (query.includes('"Expression:')) { + return []; + } + if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) { + return [200]; + } + return []; + }, + notesInfo: async () => [ + { + noteId: 200, + fields: { + Word: { value: '食べる' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); + assert.equal(seenQueries.length, 2); +}); + +test('findDuplicateNote checks both source expression/word values when both fields are present', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '昨日は雨だった。' }, + Word: { value: '雨' }, + }, + }; + + const seenQueries: string[] = []; + const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, { + findNotes: async (query) => { + seenQueries.push(query); + if (query.includes('昨日は雨だった。')) { + return []; + } + if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) { + return [200]; + } + return []; + }, + notesInfo: async () => [ + { + noteId: 200, + fields: { + Word: { value: '雨' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); + assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。'))); + assert.ok(seenQueries.some((query) => query.includes('雨'))); +}); + +test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '貴様' }, + }, + }; + + const seenQueries: string[] = []; + const duplicateId = await findDuplicateNote('貴様', 100, currentNote, { + findNotes: async (query) => { + seenQueries.push(query); + if (query.includes('deck:Japanese')) { + return []; + } + if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) { + return [200]; + } + return []; + }, + notesInfo: async () => [ + { + noteId: 200, + fields: { + Expression: { value: '貴様' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); + assert.ok(seenQueries.some((query) => query.includes('deck:Japanese'))); + assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese'))); +}); + +test('findDuplicateNote falls back to plain text query when field queries miss', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '貴様' }, + }, + }; + + const seenQueries: string[] = []; + const duplicateId = await findDuplicateNote('貴様', 100, currentNote, { + findNotes: async (query) => { + seenQueries.push(query); + if (query.includes('Expression:') || query.includes('Word:')) { + return []; + } + if (query.includes('"貴様"')) { + return [200]; + } + return []; + }, + notesInfo: async () => [ + { + noteId: 200, + fields: { + Expression: { value: '貴様' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); + assert.ok(seenQueries.some((query) => query.includes('Expression:'))); + assert.ok(seenQueries.some((query) => query.endsWith('"貴様"'))); +}); + +test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '貴様' }, + }, + }; + + const duplicateId = await findDuplicateNote('貴様', 100, currentNote, { + findNotes: async () => [200], + notesInfo: async () => [ + { + noteId: 200, + fields: { + Expression: { value: '貴様[きさま]' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); +}); + +test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '貴様' }, + }, + }; + + const duplicateId = await findDuplicateNote('貴様', 100, currentNote, { + findNotes: async () => [200], + notesInfo: async () => [ + { + noteId: 200, + fields: { + Expression: { value: '貴様' }, + }, + }, + ], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.equal(duplicateId, 200); +}); + +test('findDuplicateNote does not disable retries on findNotes calls', async () => { + const currentNote: NoteInfo = { + noteId: 100, + fields: { + Expression: { value: '貴様' }, + }, + }; + + const seenOptions: Array<{ maxRetries?: number } | undefined> = []; + await findDuplicateNote('貴様', 100, currentNote, { + findNotes: async (_query, options) => { + seenOptions.push(options); + return []; + }, + notesInfo: async () => [], + getDeck: () => 'Japanese::Mining', + resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName), + logWarn: () => {}, + }); + + assert.ok(seenOptions.length > 0); + assert.ok(seenOptions.every((options) => options?.maxRetries !== 0)); +}); diff --git a/src/anki-integration/duplicate.ts b/src/anki-integration/duplicate.ts index 23b33cb..52ed7ff 100644 --- a/src/anki-integration/duplicate.ts +++ b/src/anki-integration/duplicate.ts @@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps { notesInfo: (noteIds: number[]) => Promise; getDeck: () => string | null | undefined; resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null; + logInfo?: (message: string) => void; + logDebug?: (message: string) => void; logWarn: (message: string, error: unknown) => void; } @@ -21,25 +23,68 @@ export async function findDuplicateNote( noteInfo: NoteInfo, deps: DuplicateDetectionDeps, ): Promise { - let fieldName = ''; - for (const name of Object.keys(noteInfo.fields)) { - if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) { - fieldName = name; - break; - } - } - if (!fieldName) return null; + const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression); + if (sourceCandidates.length === 0) return null; + deps.logInfo?.( + `[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates + .map((entry) => `${entry.fieldName}:${entry.value}`) + .join('|')}`, + ); - const escapedFieldName = escapeAnkiSearchValue(fieldName); - const escapedExpression = escapeAnkiSearchValue(expression); - const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : ''; - const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`; + const deckValue = deps.getDeck(); + const queryPrefixes = deckValue + ? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, ''] + : ['']; try { - const noteIds = (await deps.findNotes(query, { - maxRetries: 0, - })) as number[]; - return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps); + const noteIds = new Set(); + const executedQueries = new Set(); + for (const queryPrefix of queryPrefixes) { + for (const sourceCandidate of sourceCandidates) { + const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value); + const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName); + for (const queryFieldName of queryFieldNames) { + const escapedFieldName = escapeAnkiSearchValue(queryFieldName); + const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`; + if (executedQueries.has(query)) continue; + executedQueries.add(query); + const results = (await deps.findNotes(query)) as number[]; + deps.logDebug?.( + `[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`, + ); + for (const noteId of results) { + noteIds.add(noteId); + } + } + } + if (noteIds.size > 0) break; + } + + if (noteIds.size === 0) { + for (const queryPrefix of queryPrefixes) { + for (const sourceCandidate of sourceCandidates) { + const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value); + const query = `${queryPrefix}"${escapedExpression}"`; + if (executedQueries.has(query)) continue; + executedQueries.add(query); + const results = (await deps.findNotes(query)) as number[]; + deps.logDebug?.( + `[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`, + ); + for (const noteId of results) { + noteIds.add(noteId); + } + } + if (noteIds.size > 0) break; + } + } + + return await findFirstExactDuplicateNoteId( + noteIds, + excludeNoteId, + sourceCandidates.map((candidate) => candidate.value), + deps, + ); } catch (error) { deps.logWarn('Duplicate search failed:', error); return null; @@ -47,18 +92,25 @@ export async function findDuplicateNote( } function findFirstExactDuplicateNoteId( - candidateNoteIds: number[], + candidateNoteIds: Iterable, excludeNoteId: number, - fieldName: string, - expression: string, + sourceValues: string[], deps: DuplicateDetectionDeps, ): Promise { - const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId); + const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId); + deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`); if (candidates.length === 0) { + deps.logInfo?.('[duplicate] no candidates after query + exclude'); + return Promise.resolve(null); + } + + const normalizedValues = new Set( + sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0), + ); + if (normalizedValues.size === 0) { return Promise.resolve(null); } - const normalizedExpression = normalizeDuplicateValue(expression); const chunkSize = 50; return (async () => { for (let i = 0; i < candidates.length; i += chunkSize) { @@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId( const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[]; const notesInfo = notesInfoResult as NoteInfo[]; for (const noteInfo of notesInfo) { - const resolvedField = deps.resolveFieldName(noteInfo, fieldName); - if (!resolvedField) continue; - const candidateValue = noteInfo.fields[resolvedField]?.value || ''; - if (normalizeDuplicateValue(candidateValue) === normalizedExpression) { - return noteInfo.noteId; + const candidateFieldNames = ['word', 'expression']; + for (const candidateFieldName of candidateFieldNames) { + const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName); + if (!resolvedField) continue; + const candidateValue = noteInfo.fields[resolvedField]?.value || ''; + if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) { + deps.logDebug?.( + `[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`, + ); + deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`); + return noteInfo.noteId; + } } } } + deps.logInfo?.('[duplicate] no exact match in candidate notes'); return null; })(); } +function getDuplicateCandidateFieldNames(fieldName: string): string[] { + const candidates = [fieldName]; + const lower = fieldName.toLowerCase(); + if (lower === 'word') { + candidates.push('expression'); + } else if (lower === 'expression') { + candidates.push('word'); + } + return candidates; +} + +function getDuplicateSourceCandidates( + noteInfo: NoteInfo, + fallbackExpression: string, +): Array<{ fieldName: string; value: string }> { + const candidates: Array<{ fieldName: string; value: string }> = []; + const dedupeKey = new Set(); + + for (const fieldName of Object.keys(noteInfo.fields)) { + const lower = fieldName.toLowerCase(); + if (lower !== 'word' && lower !== 'expression') continue; + const value = noteInfo.fields[fieldName]?.value?.trim() ?? ''; + if (!value) continue; + const key = `${lower}:${normalizeDuplicateValue(value)}`; + if (dedupeKey.has(key)) continue; + dedupeKey.add(key); + candidates.push({ fieldName, value }); + } + + const trimmedFallback = fallbackExpression.trim(); + if (trimmedFallback.length > 0) { + const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`; + if (!dedupeKey.has(fallbackKey)) { + candidates.push({ fieldName: 'expression', value: trimmedFallback }); + } + } + + return candidates; +} + function normalizeDuplicateValue(value: string): string { - return value.replace(/\s+/g, ' ').trim(); + return value + .replace(/<[^>]*>/g, '') + .replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1') + .replace(/\s+/g, ' ') + .trim(); } function escapeAnkiSearchValue(value: string): string { diff --git a/src/core/services/frequency-dictionary.test.ts b/src/core/services/frequency-dictionary.test.ts index ae8049d..baca354 100644 --- a/src/core/services/frequency-dictionary.test.ts +++ b/src/core/services/frequency-dictionary.test.ts @@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa true, ); }); + +test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => { + const logs: string[] = []; + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); + const bankPath = path.join(tempDir, 'term_meta_bank_1.json'); + fs.writeFileSync( + bankPath, + JSON.stringify([ + ['猫', 1, { frequency: { displayValue: 100 } }], + ['猫', 2, { frequency: { displayValue: 120 } }], + ['猫', 3, { frequency: { displayValue: 110 } }], + ]), + ); + + const lookup = await createFrequencyDictionaryLookup({ + searchPaths: [tempDir], + log: (message) => { + logs.push(message); + }, + }); + + assert.equal(lookup('猫'), 100); + assert.equal( + logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length, + 1, + ); + assert.equal( + logs.some((entry) => entry.includes('Frequency dictionary duplicate term')), + false, + ); +}); diff --git a/src/core/services/frequency-dictionary.ts b/src/core/services/frequency-dictionary.ts index ea5f9fd..b8c84af 100644 --- a/src/core/services/frequency-dictionary.ts +++ b/src/core/services/frequency-dictionary.ts @@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | function addEntriesToMap( rawEntries: unknown, terms: Map, - log: (message: string) => void, -): void { +): { duplicateCount: number } { if (!Array.isArray(rawEntries)) { - return; + return { duplicateCount: 0 }; } + let duplicateCount = 0; for (const rawEntry of rawEntries) { const entry = asFrequencyDictionaryEntry(rawEntry); if (!entry) { @@ -79,10 +79,10 @@ function addEntriesToMap( continue; } - log( - `Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`, - ); + duplicateCount += 1; } + + return { duplicateCount }; } function collectDictionaryFromPath( @@ -124,7 +124,14 @@ function collectDictionaryFromPath( } const beforeSize = terms.size; - addEntriesToMap(rawEntries, terms, log); + const { duplicateCount } = addEntriesToMap(rawEntries, terms); + if (duplicateCount > 0) { + log( + `Frequency dictionary ignored ${duplicateCount} duplicate term entr${ + duplicateCount === 1 ? 'y' : 'ies' + } in ${bankPath} (kept strongest rank per term).`, + ); + } if (terms.size === beforeSize) { log(`Frequency dictionary file contained no extractable entries: ${bankPath}`); } diff --git a/src/logger.ts b/src/logger.ts index 0d01323..3092095 100644 --- a/src/logger.ts +++ b/src/logger.ts @@ -1,3 +1,7 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + export type LogLevel = 'debug' | 'info' | 'warn' | 'error'; export type LogLevelSource = 'cli' | 'config'; @@ -107,6 +111,25 @@ function safeStringify(value: unknown): string { } } +function resolveLogFilePath(): string { + const envPath = process.env.SUBMINER_MPV_LOG?.trim(); + if (envPath) { + return envPath; + } + const date = new Date().toISOString().slice(0, 10); + return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`); +} + +function appendToLogFile(line: string): void { + try { + const logPath = resolveLogFilePath(); + fs.mkdirSync(path.dirname(logPath), { recursive: true }); + fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' }); + } catch { + // never break runtime due to logging sink failures + } +} + function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void { const minLevel = resolveMinLevel(); if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) { @@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): } else { console.info(prefix); } + appendToLogFile(prefix); return; } @@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): } else { console.info(finalMessage); } + appendToLogFile(finalMessage); } export function createLogger(scope: string): Logger { diff --git a/src/main.ts b/src/main.ts index dbd6d86..35d5e2e 100644 --- a/src/main.ts +++ b/src/main.ts @@ -487,7 +487,13 @@ if (process.platform === 'linux') { app.setName('SubMiner'); const DEFAULT_TEXTHOOKER_PORT = 5174; -const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log'); +const DEFAULT_MPV_LOG_FILE = path.join( + os.homedir(), + '.config', + 'SubMiner', + 'logs', + `SubMiner-${new Date().toISOString().slice(0, 10)}.log`, +); const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize'; const ANILIST_SETUP_RESPONSE_TYPE = 'token'; const ANILIST_DEFAULT_CLIENT_ID = '36084';