chore: commit unstaged workspace changes

2026-02-27 18:22:41 -08:00 · 2026-02-21 02:32:00 -08:00
parent 1c424b4a0b
commit ab1d5f19fd
16 changed files with 780 additions and 37 deletions
--- a/Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md
+++ b/Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md
@@ -0,0 +1,52 @@
+---
+id: TASK-94
+title: Fix Kiku duplicate detection for Yomitan-marked duplicates
+status: Done
+assignee:
+  - codex-duplicate-kiku-20260221T043006Z-5vkz
+created_date: '2026-02-21 04:33'
+updated_date: '2026-02-21 01:40'
+labels:
+  - bug
+  - anki
+  - kiku
+dependencies: []
+priority: high
+ordinal: 65000
+---
+
+## Description
+
+<!-- SECTION:DESCRIPTION:BEGIN -->
+Kiku field grouping no longer detects duplicate cards in scenarios where the mined card is clearly marked duplicate by Yomitan/N+1 workflow. Restore duplicate detection so duplicate note lookup succeeds for equivalent expression/word cards and Kiku grouping can run.
+<!-- SECTION:DESCRIPTION:END -->
+
+## Acceptance Criteria
+<!-- AC:BEGIN -->
+- [x] #1 Repro case covered by automated regression test in duplicate-detection path.
+- [x] #2 Kiku duplicate detection returns duplicate note id for the repro case.
+- [x] #3 Targeted tests for duplicate detection pass.
+<!-- AC:END -->
+
+## Implementation Notes
+
+<!-- SECTION:NOTES:BEGIN -->
+Added regression test `src/anki-integration/duplicate.test.ts` for a cross-field duplicate case where current note uses `Expression` and candidate uses `Word` with same value.
+
+Updated duplicate matching in `src/anki-integration/duplicate.ts` to try alternate field-name aliases (`word` <-> `expression`) when resolving candidate note fields for exact-value verification.
+
+Follow-up fix: duplicate search query now also probes alias fields (`word` <-> `expression`) and merges candidate note ids before exact verification, so duplicates are still found when only the alias field is indexed/populated on existing cards.
+
+Second follow-up fix: duplicate detection now evaluates both source values when current note contains both `Expression` and `Word` (previously only one was used, depending on field-order). Query and exact verification now run against all source duplicate candidates.
+
+Third follow-up fix: if deck-scoped duplicate queries return no results, detection now retries the same source/alias query set collection-wide (no deck filter) before exact verification. This aligns with cases where Yomitan shows duplicates outside the configured mining deck.
+
+Fourth follow-up fix: if field-specific queries miss entirely, detection now falls back to phrase/plain-text queries (deck-scoped then collection-wide) and still requires exact `Expression/Word` value verification before selecting a duplicate note.
+
+Fifth follow-up: added explicit duplicate-search debug logs (query strings, hit counts, candidate counts, exact-match note id) to improve runtime diagnosis in live launcher runs.
+
+Verification:
+- `bun run build`
+- `node dist/anki-integration/duplicate.test.js`
+- `node --test dist/anki-integration.test.js`
+<!-- SECTION:NOTES:END -->
--- a/Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md
+++ b/Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md
@@ -0,0 +1,52 @@
+---
+id: TASK-96
+title: Decouple secondary subtitle lifecycle from visible/invisible overlays
+status: To Do
+assignee: []
+created_date: '2026-02-21 04:41'
+updated_date: '2026-02-21 04:41'
+labels:
+  - subtitles
+  - overlay
+  - architecture
+dependencies: []
+priority: high
+---
+
+## Description
+
+<!-- SECTION:DESCRIPTION:BEGIN -->
+Secondary subtitle behavior should not depend on visible/invisible overlay state transitions. Introduce an independent lifecycle so secondary subtitle rendering, visibility mode (`always`/`hover`/`never`), and positioning stay stable even when primary overlays are toggled or rebound.
+<!-- SECTION:DESCRIPTION:END -->
+
+## Suggestions
+
+<!-- SECTION:SUGGESTIONS:BEGIN -->
+- Isolate secondary subtitle state management from primary overlay window orchestration.
+- Route secondary subtitle updates through a dedicated service/controller boundary.
+- Keep MPV secondary subtitle property handling independent from overlay visibility toggles.
+<!-- SECTION:SUGGESTIONS:END -->
+
+## Action Steps
+
+<!-- SECTION:PLAN:BEGIN -->
+1. Inventory existing coupling points between secondary subtitle updates and overlay visibility/bounds services.
+2. Introduce explicit secondary subtitle lifecycle state and transitions.
+3. Refactor event wiring so visible/invisible overlay toggles do not mutate secondary subtitle state.
+4. Validate display modes (`always`/`hover`/`never`) continue to work with independent lifecycle.
+5. Add regression tests for overlay toggles, reconnect/restart, and mode-switch behavior.
+<!-- SECTION:PLAN:END -->
+
+## Acceptance Criteria
+<!-- AC:BEGIN -->
+- [ ] #1 Toggling visible or invisible overlays does not alter secondary subtitle lifecycle state.
+- [ ] #2 Secondary subtitle display mode behavior remains correct across overlay state transitions.
+- [ ] #3 Secondary subtitle behavior survives MPV reconnect/restart without overlay-coupling regressions.
+- [ ] #4 Automated tests cover decoupled lifecycle behavior and prevent re-coupling.
+<!-- AC:END -->
+
+## Definition of Done
+<!-- DOD:BEGIN -->
+- [ ] #1 Relevant unit/integration tests pass
+- [ ] #2 Documentation/comments updated where lifecycle ownership changed
+<!-- DOD:END -->
--- a/Add-intro-skip-playback-control.md
+++ b/Add-intro-skip-playback-control.md
@@ -0,0 +1,52 @@
+---
+id: TASK-97
+title: Add intro skip playback control
+status: To Do
+assignee: []
+created_date: '2026-02-21 04:41'
+updated_date: '2026-02-21 04:41'
+labels:
+  - playback
+  - ux
+dependencies: []
+priority: medium
+---
+
+## Description
+
+<!-- SECTION:DESCRIPTION:BEGIN -->
+Add an intro skip control so users can jump past opening sequences quickly during playback. Start with a reliable manual control (shortcut/action) and clear user feedback after seek.
+<!-- SECTION:DESCRIPTION:END -->
+
+## Suggestions
+
+<!-- SECTION:SUGGESTIONS:BEGIN -->
+- Add a configurable skip duration (for example 60/75/90 seconds).
+- Expose skip intro via keybinding and optional UI action in overlay/help.
+- Show transient confirmation (OSD/overlay message) after skip action.
+<!-- SECTION:SUGGESTIONS:END -->
+
+## Action Steps
+
+<!-- SECTION:PLAN:BEGIN -->
+1. Define config and keybinding surface for intro skip duration and trigger.
+2. Implement intro skip command that performs bounded seek in active playback session.
+3. Wire command to user trigger path (keyboard + optional on-screen action if present).
+4. Emit user feedback after successful skip (current time + skipped duration).
+5. Add tests for command dispatch, seek bounds, and config fallback behavior.
+<!-- SECTION:PLAN:END -->
+
+## Acceptance Criteria
+<!-- AC:BEGIN -->
+- [ ] #1 User can trigger intro skip during playback with configured shortcut/action.
+- [ ] #2 Skip performs bounded seek and never seeks before start or beyond stream duration.
+- [ ] #3 Skip duration is configurable with sane default.
+- [ ] #4 User receives visible confirmation after skip.
+- [ ] #5 Automated tests cover config + seek behavior.
+<!-- AC:END -->
+
+## Definition of Done
+<!-- DOD:BEGIN -->
+- [ ] #1 Playback control tests pass
+- [ ] #2 User-facing config/docs updated for intro skip control
+<!-- DOD:END -->
--- a/docs/subagents/INDEX.md
+++ b/docs/subagents/INDEX.md
@@ -27,6 +27,6 @@ Read first. Keep concise.
 | `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` |
 | `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` |
 | `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` |
-| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T04:38:25Z` |
+| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T10:07:58Z` |
 | `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` |
 | `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` |
--- a/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md
+++ b/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md
@@ -0,0 +1,29 @@
+# Agent: `codex-add-backlog-tasks-20260221T044104Z-m3n8`
+
+- alias: `codex-add-backlog-tasks`
+- mission: `Add two unrelated backlog tasks requested by user`
+- status: `done`
+- branch: `main`
+- started_at: `2026-02-21T04:41:04Z`
+- heartbeat_minutes: `5`
+
+## Current Work (newest first)
+- [2026-02-21T04:44:12Z] handoff: added `TASK-96` + `TASK-97` in `backlog/tasks`; updated index row to `done`.
+- [2026-02-21T04:43:00Z] progress: drafting `TASK-96` (secondary subtitle decoupling) and `TASK-97` (intro skip) under `backlog/tasks`.
+- [2026-02-21T04:42:10Z] intent: add two unrelated backlog tasks only; no code behavior changes.
+
+## Files Touched
+- `docs/subagents/INDEX.md`
+- `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md`
+- `backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md`
+- `backlog/tasks/task-97 - Add-intro-skip-playback-control.md`
+
+## Assumptions
+- User request means creating backlog tickets, not implementing either feature now.
+- Existing backlog format in `backlog/tasks` remains canonical.
+
+## Open Questions / Blockers
+- None.
+
+## Next Step
+- Wait for user follow-up (prioritize one of the two new tasks for implementation).
--- a/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md
+++ b/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md
@@ -0,0 +1,74 @@
+# codex-duplicate-kiku-20260221T043006Z-5vkz
+
+- alias: `codex-duplicate-kiku`
+- mission: `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards`
+- status: `completed`
+- start_utc: `2026-02-21T04:30:06Z`
+- last_update_utc: `2026-02-21T10:07:58Z`
+
+## Intent
+
+- Reproduce bug where clear duplicate cards no longer trigger Kiku duplicate grouping.
+- Add failing regression test first (TDD).
+- Patch duplicate detection logic with minimal behavior change.
+
+## Planned Files
+
+- `src/anki-integration/duplicate.ts`
+- `src/anki-integration/duplicate.test.ts` (or nearest duplicate-detection tests)
+- `docs/subagents/INDEX.md`
+- `docs/subagents/collaboration.md`
+- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
+
+## Assumptions
+
+- Duplicate signal should still come from Anki duplicate search + Yomitan/N+1-derived fields used in note content.
+- Regression likely from term/readings normalization/query escaping mismatch.
+
+## Outcome
+
+- Root cause: candidate-note exact-check only resolved the originating field name (`Expression` or `Word`), so duplicates failed when candidate note used the opposite alias.
+- Added regression test first (RED): `Expression` current note vs `Word` candidate with same value returned `null`.
+- Implemented minimal fix: candidate resolution now checks both aliases (`word` and `expression`) before exact-value compare.
+- GREEN: targeted duplicate test passed; related `anki-integration` test passed.
+- User follow-up repro showed remaining miss when duplicate appears only in alias field search results.
+- Added second RED test for alias-query fallback.
+- Implemented query-stage alias fallback: run `findNotes` for both alias fields, merge note ids, then exact-verify.
+- GREEN after follow-up: duplicate tests + `anki-integration` test pass.
+- User reported still failing after first follow-up.
+- Added third RED regression: source note containing both `Expression` (sentence) and `Word` (term) only matched duplicates via `Word`; previous logic missed this by using only one source value.
+- Implemented source-candidate expansion: gather both `Word` and `Expression` source values, query aliases for each, dedupe queries, then exact-match against normalized set.
+- GREEN: duplicate tests (3/3) + `anki-integration` test pass.
+- Image-backed repro indicated possible duplicate outside configured deck scope.
+- Added fourth RED regression: deck-scoped query misses, collection-wide query should still detect duplicate.
+- Implemented deck fallback query pass (same source/alias combinations without deck filter) when deck-scoped pass yields no candidates.
+- GREEN: duplicate tests (4/4) + `anki-integration` test pass.
+- User confirmed fresh build/install still failed with `貴様` repro.
+- Added fifth RED regression: field-specific queries return no matches but plain text query returns candidate.
+- Implemented plain-text query fallback pass (deck-scoped then global), still gated by exact `word`/`expression` value verify.
+- GREEN: duplicate tests (5/5) + `anki-integration` test pass.
+- Added runtime debug instrumentation for duplicate detection query/verification path:
+  - query string + hit count
+  - candidate count after exclude
+  - exact-match note id + field
+- No behavior change from instrumentation; build + tests still green.
+- User requested logging policy update: prefer console output unless explicitly captured, and persistent logs under `~/.config/SubMiner/logs/*.log`.
+- Updated default launcher/app mpv log path to daily file naming: `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log`.
+- Typecheck green.
+- Found observability gap: app logger wrote only to stdout/stderr while launcher log file only captured wrapper messages.
+- Added file sink to `src/logger.ts` so app logs also append to `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (or `SUBMINER_MPV_LOG` when set).
+- Verified with typecheck + build.
+
+## Files Touched
+
+- `src/anki-integration/duplicate.ts`
+- `src/anki-integration/duplicate.test.ts`
+- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
+- `docs/subagents/INDEX.md`
+- `docs/subagents/collaboration.md`
+- `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md`
+
+## Handoff
+
+- No blockers.
+- Next step: run broader gate (`bun run test:fast`) when ready, then commit.
--- a/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
+++ b/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
@@ -0,0 +1,28 @@
+# Agent Log: codex-frequency-dup-log-20260221T042815Z-r4k1
+
+- alias: codex-frequency-dup-log
+- mission: reduce frequency dictionary duplicate-term startup log spam; keep useful signal
+- status: completed
+- started_utc: 2026-02-21T04:28:15Z
+- last_update_utc: 2026-02-21T04:32:40Z
+- planned_files:
+  - src/core/services/frequency-dictionary.ts
+  - src/core/services/frequency-dictionary.test.ts
+  - docs/subagents/INDEX.md
+- touched_files:
+  - src/core/services/frequency-dictionary.ts
+  - src/core/services/frequency-dictionary.test.ts
+  - docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
+  - docs/subagents/INDEX.md
+- key_decisions:
+  - remove per-entry duplicate term logs
+  - keep one aggregate duplicate summary line per bank file at info level
+- assumptions:
+  - duplicate entries are expected in source dictionary and should not produce per-entry info logs
+- verification:
+  - `bun test src/core/services/frequency-dictionary.test.ts` (pass)
+  - full build currently blocked by unrelated Jellyfin WIP type errors on branch
+- blockers:
+  - unrelated branch state prevents full `bun run build`
+- next_step:
+  - optional follow-up: add true debug-level logging API if duplicate diagnostics are needed on demand
--- a/docs/subagents/collaboration.md
+++ b/docs/subagents/collaboration.md
@@ -25,5 +25,11 @@ Shared notes. Append-only.
 - [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only.
 - [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed.
 - [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback.
+- [2026-02-21T04:48:50Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] second follow-up fix: when source note has both `Expression` and `Word`, duplicate detection now uses both source values (not just first field by order); added regression for mixed-field source candidate scenario.
+- [2026-02-21T07:23:56Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] third follow-up fix: add collection-wide fallback query pass when deck-scoped duplicate search returns no candidates; added regression for deck-scope miss case.
+- [2026-02-21T09:25:53Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] fourth follow-up fix: add plain-text query fallback when field-scoped queries miss; keep exact value verification on candidate notes to avoid false positives.
+- [2026-02-21T09:40:33Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] instrumentation pass: add duplicate-detection debug logs (`[duplicate] query/hits/candidates/exact-match`) to isolate remaining live repro mismatches.
+- [2026-02-21T09:54:29Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] logging-path update: default persistent logs now target `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (launcher + app mpv log default).
+- [2026-02-21T10:07:58Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] observability fix: app logger now also appends to daily log file, so runtime duplicate traces are available even when overlay stdout is not surfaced in launcher terminal.
 - [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path).
 - [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass).
--- a/launcher/types.ts
+++ b/launcher/types.ts
@@ -34,7 +34,13 @@ export const DEFAULT_YOUTUBE_SUBGEN_OUT_DIR = path.join(
  'subminer',
  'youtube-subs',
 );
-export const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
+export const DEFAULT_MPV_LOG_FILE = path.join(
+  os.homedir(),
+  '.config',
+  'SubMiner',
+  'logs',
+  `SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
+);
 export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best';
 export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc';
 export const DEFAULT_MPV_SUBMINER_ARGS = [
--- a/src/anki-integration.ts
+++ b/src/anki-integration.ts
@@ -970,6 +970,12 @@ export class AnkiIntegration {
      notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
      getDeck: () => this.config.deck,
      resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
+      logInfo: (message) => {
+        log.info(message);
+      },
+      logDebug: (message) => {
+        log.debug(message);
+      },
      logWarn: (message, error) => {
        log.warn(message, (error as Error).message);
      },
--- a/src/anki-integration/duplicate.test.ts
+++ b/src/anki-integration/duplicate.test.ts
@@ -0,0 +1,265 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { findDuplicateNote, type NoteInfo } from './duplicate';
+
+function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null {
+  const names = Object.keys(noteInfo.fields);
+  const exact = names.find((name) => name === preferredName);
+  if (exact) return exact;
+  const lower = preferredName.toLowerCase();
+  return names.find((name) => name.toLowerCase() === lower) ?? null;
+}
+
+test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '食べる' },
+    },
+  };
+
+  const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
+    findNotes: async () => [100, 200],
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Word: { value: '食べる' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '食べる' },
+    },
+  };
+
+  const seenQueries: string[] = [];
+  const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
+    findNotes: async (query) => {
+      seenQueries.push(query);
+      if (query.includes('"Expression:')) {
+        return [];
+      }
+      if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) {
+        return [200];
+      }
+      return [];
+    },
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Word: { value: '食べる' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+  assert.equal(seenQueries.length, 2);
+});
+
+test('findDuplicateNote checks both source expression/word values when both fields are present', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '昨日は雨だった。' },
+      Word: { value: '雨' },
+    },
+  };
+
+  const seenQueries: string[] = [];
+  const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, {
+    findNotes: async (query) => {
+      seenQueries.push(query);
+      if (query.includes('昨日は雨だった。')) {
+        return [];
+      }
+      if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) {
+        return [200];
+      }
+      return [];
+    },
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Word: { value: '雨' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+  assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。')));
+  assert.ok(seenQueries.some((query) => query.includes('雨')));
+});
+
+test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '貴様' },
+    },
+  };
+
+  const seenQueries: string[] = [];
+  const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+    findNotes: async (query) => {
+      seenQueries.push(query);
+      if (query.includes('deck:Japanese')) {
+        return [];
+      }
+      if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) {
+        return [200];
+      }
+      return [];
+    },
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Expression: { value: '貴様' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+  assert.ok(seenQueries.some((query) => query.includes('deck:Japanese')));
+  assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese')));
+});
+
+test('findDuplicateNote falls back to plain text query when field queries miss', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '貴様' },
+    },
+  };
+
+  const seenQueries: string[] = [];
+  const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+    findNotes: async (query) => {
+      seenQueries.push(query);
+      if (query.includes('Expression:') || query.includes('Word:')) {
+        return [];
+      }
+      if (query.includes('"貴様"')) {
+        return [200];
+      }
+      return [];
+    },
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Expression: { value: '貴様' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+  assert.ok(seenQueries.some((query) => query.includes('Expression:')));
+  assert.ok(seenQueries.some((query) => query.endsWith('"貴様"')));
+});
+
+test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '貴様' },
+    },
+  };
+
+  const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+    findNotes: async () => [200],
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Expression: { value: '貴様[きさま]' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '貴様' },
+    },
+  };
+
+  const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+    findNotes: async () => [200],
+    notesInfo: async () => [
+      {
+        noteId: 200,
+        fields: {
+          Expression: { value: '<span data-x="1">貴様</span>' },
+        },
+      },
+    ],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote does not disable retries on findNotes calls', async () => {
+  const currentNote: NoteInfo = {
+    noteId: 100,
+    fields: {
+      Expression: { value: '貴様' },
+    },
+  };
+
+  const seenOptions: Array<{ maxRetries?: number } | undefined> = [];
+  await findDuplicateNote('貴様', 100, currentNote, {
+    findNotes: async (_query, options) => {
+      seenOptions.push(options);
+      return [];
+    },
+    notesInfo: async () => [],
+    getDeck: () => 'Japanese::Mining',
+    resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+    logWarn: () => {},
+  });
+
+  assert.ok(seenOptions.length > 0);
+  assert.ok(seenOptions.every((options) => options?.maxRetries !== 0));
+});
--- a/src/anki-integration/duplicate.ts
+++ b/src/anki-integration/duplicate.ts
@@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps {
  notesInfo: (noteIds: number[]) => Promise<unknown>;
  getDeck: () => string | null | undefined;
  resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
+  logInfo?: (message: string) => void;
+  logDebug?: (message: string) => void;
  logWarn: (message: string, error: unknown) => void;
 }

@@ -21,25 +23,68 @@ export async function findDuplicateNote(
  noteInfo: NoteInfo,
  deps: DuplicateDetectionDeps,
 ): Promise<number | null> {
-  let fieldName = '';
-  for (const name of Object.keys(noteInfo.fields)) {
-    if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) {
-      fieldName = name;
-      break;
-    }
-  }
-  if (!fieldName) return null;
+  const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression);
+  if (sourceCandidates.length === 0) return null;
+  deps.logInfo?.(
+    `[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates
+      .map((entry) => `${entry.fieldName}:${entry.value}`)
+      .join('|')}`,
+  );

-  const escapedFieldName = escapeAnkiSearchValue(fieldName);
-  const escapedExpression = escapeAnkiSearchValue(expression);
-  const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : '';
-  const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`;
+  const deckValue = deps.getDeck();
+  const queryPrefixes = deckValue
+    ? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, '']
+    : [''];

  try {
-    const noteIds = (await deps.findNotes(query, {
-      maxRetries: 0,
-    })) as number[];
-    return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps);
+    const noteIds = new Set<number>();
+    const executedQueries = new Set<string>();
+    for (const queryPrefix of queryPrefixes) {
+      for (const sourceCandidate of sourceCandidates) {
+        const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
+        const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName);
+        for (const queryFieldName of queryFieldNames) {
+          const escapedFieldName = escapeAnkiSearchValue(queryFieldName);
+          const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`;
+          if (executedQueries.has(query)) continue;
+          executedQueries.add(query);
+          const results = (await deps.findNotes(query)) as number[];
+          deps.logDebug?.(
+            `[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
+          );
+          for (const noteId of results) {
+            noteIds.add(noteId);
+          }
+        }
+      }
+      if (noteIds.size > 0) break;
+    }
+
+    if (noteIds.size === 0) {
+      for (const queryPrefix of queryPrefixes) {
+        for (const sourceCandidate of sourceCandidates) {
+          const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
+          const query = `${queryPrefix}"${escapedExpression}"`;
+          if (executedQueries.has(query)) continue;
+          executedQueries.add(query);
+          const results = (await deps.findNotes(query)) as number[];
+          deps.logDebug?.(
+            `[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
+          );
+          for (const noteId of results) {
+            noteIds.add(noteId);
+          }
+        }
+        if (noteIds.size > 0) break;
+      }
+    }
+
+    return await findFirstExactDuplicateNoteId(
+      noteIds,
+      excludeNoteId,
+      sourceCandidates.map((candidate) => candidate.value),
+      deps,
+    );
  } catch (error) {
    deps.logWarn('Duplicate search failed:', error);
    return null;
@@ -47,18 +92,25 @@ export async function findDuplicateNote(
 }

 function findFirstExactDuplicateNoteId(
-  candidateNoteIds: number[],
+  candidateNoteIds: Iterable<number>,
  excludeNoteId: number,
-  fieldName: string,
-  expression: string,
+  sourceValues: string[],
  deps: DuplicateDetectionDeps,
 ): Promise<number | null> {
-  const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId);
+  const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId);
+  deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`);
  if (candidates.length === 0) {
+    deps.logInfo?.('[duplicate] no candidates after query + exclude');
+    return Promise.resolve(null);
+  }
+
+  const normalizedValues = new Set(
+    sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0),
+  );
+  if (normalizedValues.size === 0) {
    return Promise.resolve(null);
  }

-  const normalizedExpression = normalizeDuplicateValue(expression);
  const chunkSize = 50;
  return (async () => {
    for (let i = 0; i < candidates.length; i += chunkSize) {
@@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId(
      const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
      const notesInfo = notesInfoResult as NoteInfo[];
      for (const noteInfo of notesInfo) {
-        const resolvedField = deps.resolveFieldName(noteInfo, fieldName);
-        if (!resolvedField) continue;
-        const candidateValue = noteInfo.fields[resolvedField]?.value || '';
-        if (normalizeDuplicateValue(candidateValue) === normalizedExpression) {
-          return noteInfo.noteId;
+        const candidateFieldNames = ['word', 'expression'];
+        for (const candidateFieldName of candidateFieldNames) {
+          const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName);
+          if (!resolvedField) continue;
+          const candidateValue = noteInfo.fields[resolvedField]?.value || '';
+          if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) {
+            deps.logDebug?.(
+              `[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`,
+            );
+            deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`);
+            return noteInfo.noteId;
+          }
        }
      }
    }
+    deps.logInfo?.('[duplicate] no exact match in candidate notes');
    return null;
  })();
 }

+function getDuplicateCandidateFieldNames(fieldName: string): string[] {
+  const candidates = [fieldName];
+  const lower = fieldName.toLowerCase();
+  if (lower === 'word') {
+    candidates.push('expression');
+  } else if (lower === 'expression') {
+    candidates.push('word');
+  }
+  return candidates;
+}
+
+function getDuplicateSourceCandidates(
+  noteInfo: NoteInfo,
+  fallbackExpression: string,
+): Array<{ fieldName: string; value: string }> {
+  const candidates: Array<{ fieldName: string; value: string }> = [];
+  const dedupeKey = new Set<string>();
+
+  for (const fieldName of Object.keys(noteInfo.fields)) {
+    const lower = fieldName.toLowerCase();
+    if (lower !== 'word' && lower !== 'expression') continue;
+    const value = noteInfo.fields[fieldName]?.value?.trim() ?? '';
+    if (!value) continue;
+    const key = `${lower}:${normalizeDuplicateValue(value)}`;
+    if (dedupeKey.has(key)) continue;
+    dedupeKey.add(key);
+    candidates.push({ fieldName, value });
+  }
+
+  const trimmedFallback = fallbackExpression.trim();
+  if (trimmedFallback.length > 0) {
+    const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`;
+    if (!dedupeKey.has(fallbackKey)) {
+      candidates.push({ fieldName: 'expression', value: trimmedFallback });
+    }
+  }
+
+  return candidates;
+}
+
 function normalizeDuplicateValue(value: string): string {
-  return value.replace(/\s+/g, ' ').trim();
+  return value
+    .replace(/<[^>]*>/g, '')
+    .replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1')
+    .replace(/\s+/g, ' ')
+    .trim();
 }

 function escapeAnkiSearchValue(value: string): string {
--- a/src/core/services/frequency-dictionary.test.ts
+++ b/src/core/services/frequency-dictionary.test.ts
@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
    true,
  );
 });
+
+test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
+  const logs: string[] = [];
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
+  const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
+  fs.writeFileSync(
+    bankPath,
+    JSON.stringify([
+      ['猫', 1, { frequency: { displayValue: 100 } }],
+      ['猫', 2, { frequency: { displayValue: 120 } }],
+      ['猫', 3, { frequency: { displayValue: 110 } }],
+    ]),
+  );
+
+  const lookup = await createFrequencyDictionaryLookup({
+    searchPaths: [tempDir],
+    log: (message) => {
+      logs.push(message);
+    },
+  });
+
+  assert.equal(lookup('猫'), 100);
+  assert.equal(
+    logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
+    1,
+  );
+  assert.equal(
+    logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
+    false,
+  );
+});
--- a/src/core/services/frequency-dictionary.ts
+++ b/src/core/services/frequency-dictionary.ts
@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
 function addEntriesToMap(
  rawEntries: unknown,
  terms: Map<string, number>,
-  log: (message: string) => void,
-): void {
+): { duplicateCount: number } {
  if (!Array.isArray(rawEntries)) {
-    return;
+    return { duplicateCount: 0 };
  }

+  let duplicateCount = 0;
  for (const rawEntry of rawEntries) {
    const entry = asFrequencyDictionaryEntry(rawEntry);
    if (!entry) {
@@ -79,10 +79,10 @@ function addEntriesToMap(
      continue;
    }

-    log(
-      `Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
-    );
+    duplicateCount += 1;
  }
+
+  return { duplicateCount };
 }

 function collectDictionaryFromPath(
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
    }

    const beforeSize = terms.size;
-    addEntriesToMap(rawEntries, terms, log);
+    const { duplicateCount } = addEntriesToMap(rawEntries, terms);
+    if (duplicateCount > 0) {
+      log(
+        `Frequency dictionary ignored ${duplicateCount} duplicate term entr${
+          duplicateCount === 1 ? 'y' : 'ies'
+        } in ${bankPath} (kept strongest rank per term).`,
+      );
+    }
    if (terms.size === beforeSize) {
      log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
    }
--- a/src/logger.ts
+++ b/src/logger.ts
@@ -1,3 +1,7 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+
 export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
 export type LogLevelSource = 'cli' | 'config';

@@ -107,6 +111,25 @@ function safeStringify(value: unknown): string {
  }
 }

+function resolveLogFilePath(): string {
+  const envPath = process.env.SUBMINER_MPV_LOG?.trim();
+  if (envPath) {
+    return envPath;
+  }
+  const date = new Date().toISOString().slice(0, 10);
+  return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`);
+}
+
+function appendToLogFile(line: string): void {
+  try {
+    const logPath = resolveLogFilePath();
+    fs.mkdirSync(path.dirname(logPath), { recursive: true });
+    fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' });
+  } catch {
+    // never break runtime due to logging sink failures
+  }
+}
+
 function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
  const minLevel = resolveMinLevel();
  if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
@@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
    } else {
      console.info(prefix);
    }
+    appendToLogFile(prefix);
    return;
  }

@@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
  } else {
    console.info(finalMessage);
  }
+  appendToLogFile(finalMessage);
 }

 export function createLogger(scope: string): Logger {
--- a/src/main.ts
+++ b/src/main.ts
@@ -487,7 +487,13 @@ if (process.platform === 'linux') {
 app.setName('SubMiner');

 const DEFAULT_TEXTHOOKER_PORT = 5174;
-const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
+const DEFAULT_MPV_LOG_FILE = path.join(
+  os.homedir(),
+  '.config',
+  'SubMiner',
+  'logs',
+  `SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
+);
 const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
 const ANILIST_SETUP_RESPONSE_TYPE = 'token';
 const ANILIST_DEFAULT_CLIENT_ID = '36084';