diff --git a/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md b/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md
new file mode 100644
index 0000000..8b1e09c
--- /dev/null
+++ b/backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md
@@ -0,0 +1,52 @@
+---
+id: TASK-94
+title: Fix Kiku duplicate detection for Yomitan-marked duplicates
+status: Done
+assignee:
+ - codex-duplicate-kiku-20260221T043006Z-5vkz
+created_date: '2026-02-21 04:33'
+updated_date: '2026-02-21 01:40'
+labels:
+ - bug
+ - anki
+ - kiku
+dependencies: []
+priority: high
+ordinal: 65000
+---
+
+## Description
+
+
+Kiku field grouping no longer detects duplicate cards in scenarios where the mined card is clearly marked duplicate by Yomitan/N+1 workflow. Restore duplicate detection so duplicate note lookup succeeds for equivalent expression/word cards and Kiku grouping can run.
+
+
+## Acceptance Criteria
+
+- [x] #1 Repro case covered by automated regression test in duplicate-detection path.
+- [x] #2 Kiku duplicate detection returns duplicate note id for the repro case.
+- [x] #3 Targeted tests for duplicate detection pass.
+
+
+## Implementation Notes
+
+
+Added regression test `src/anki-integration/duplicate.test.ts` for a cross-field duplicate case where current note uses `Expression` and candidate uses `Word` with same value.
+
+Updated duplicate matching in `src/anki-integration/duplicate.ts` to try alternate field-name aliases (`word` <-> `expression`) when resolving candidate note fields for exact-value verification.
+
+Follow-up fix: duplicate search query now also probes alias fields (`word` <-> `expression`) and merges candidate note ids before exact verification, so duplicates are still found when only the alias field is indexed/populated on existing cards.
+
+Second follow-up fix: duplicate detection now evaluates both source values when current note contains both `Expression` and `Word` (previously only one was used, depending on field-order). Query and exact verification now run against all source duplicate candidates.
+
+Third follow-up fix: if deck-scoped duplicate queries return no results, detection now retries the same source/alias query set collection-wide (no deck filter) before exact verification. This aligns with cases where Yomitan shows duplicates outside the configured mining deck.
+
+Fourth follow-up fix: if field-specific queries miss entirely, detection now falls back to phrase/plain-text queries (deck-scoped then collection-wide) and still requires exact `Expression/Word` value verification before selecting a duplicate note.
+
+Fifth follow-up: added explicit duplicate-search debug logs (query strings, hit counts, candidate counts, exact-match note id) to improve runtime diagnosis in live launcher runs.
+
+Verification:
+- `bun run build`
+- `node dist/anki-integration/duplicate.test.js`
+- `node --test dist/anki-integration.test.js`
+
diff --git a/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md b/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md
new file mode 100644
index 0000000..0469ee0
--- /dev/null
+++ b/backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md
@@ -0,0 +1,52 @@
+---
+id: TASK-96
+title: Decouple secondary subtitle lifecycle from visible/invisible overlays
+status: To Do
+assignee: []
+created_date: '2026-02-21 04:41'
+updated_date: '2026-02-21 04:41'
+labels:
+ - subtitles
+ - overlay
+ - architecture
+dependencies: []
+priority: high
+---
+
+## Description
+
+
+Secondary subtitle behavior should not depend on visible/invisible overlay state transitions. Introduce an independent lifecycle so secondary subtitle rendering, visibility mode (`always`/`hover`/`never`), and positioning stay stable even when primary overlays are toggled or rebound.
+
+
+## Suggestions
+
+
+- Isolate secondary subtitle state management from primary overlay window orchestration.
+- Route secondary subtitle updates through a dedicated service/controller boundary.
+- Keep MPV secondary subtitle property handling independent from overlay visibility toggles.
+
+
+## Action Steps
+
+
+1. Inventory existing coupling points between secondary subtitle updates and overlay visibility/bounds services.
+2. Introduce explicit secondary subtitle lifecycle state and transitions.
+3. Refactor event wiring so visible/invisible overlay toggles do not mutate secondary subtitle state.
+4. Validate display modes (`always`/`hover`/`never`) continue to work with independent lifecycle.
+5. Add regression tests for overlay toggles, reconnect/restart, and mode-switch behavior.
+
+
+## Acceptance Criteria
+
+- [ ] #1 Toggling visible or invisible overlays does not alter secondary subtitle lifecycle state.
+- [ ] #2 Secondary subtitle display mode behavior remains correct across overlay state transitions.
+- [ ] #3 Secondary subtitle behavior survives MPV reconnect/restart without overlay-coupling regressions.
+- [ ] #4 Automated tests cover decoupled lifecycle behavior and prevent re-coupling.
+
+
+## Definition of Done
+
+- [ ] #1 Relevant unit/integration tests pass
+- [ ] #2 Documentation/comments updated where lifecycle ownership changed
+
diff --git a/backlog/tasks/task-97 - Add-intro-skip-playback-control.md b/backlog/tasks/task-97 - Add-intro-skip-playback-control.md
new file mode 100644
index 0000000..bbfcb7b
--- /dev/null
+++ b/backlog/tasks/task-97 - Add-intro-skip-playback-control.md
@@ -0,0 +1,52 @@
+---
+id: TASK-97
+title: Add intro skip playback control
+status: To Do
+assignee: []
+created_date: '2026-02-21 04:41'
+updated_date: '2026-02-21 04:41'
+labels:
+ - playback
+ - ux
+dependencies: []
+priority: medium
+---
+
+## Description
+
+
+Add an intro skip control so users can jump past opening sequences quickly during playback. Start with a reliable manual control (shortcut/action) and clear user feedback after seek.
+
+
+## Suggestions
+
+
+- Add a configurable skip duration (for example 60/75/90 seconds).
+- Expose skip intro via keybinding and optional UI action in overlay/help.
+- Show transient confirmation (OSD/overlay message) after skip action.
+
+
+## Action Steps
+
+
+1. Define config and keybinding surface for intro skip duration and trigger.
+2. Implement intro skip command that performs bounded seek in active playback session.
+3. Wire command to user trigger path (keyboard + optional on-screen action if present).
+4. Emit user feedback after successful skip (current time + skipped duration).
+5. Add tests for command dispatch, seek bounds, and config fallback behavior.
+
+
+## Acceptance Criteria
+
+- [ ] #1 User can trigger intro skip during playback with configured shortcut/action.
+- [ ] #2 Skip performs bounded seek and never seeks before start or beyond stream duration.
+- [ ] #3 Skip duration is configurable with sane default.
+- [ ] #4 User receives visible confirmation after skip.
+- [ ] #5 Automated tests cover config + seek behavior.
+
+
+## Definition of Done
+
+- [ ] #1 Playback control tests pass
+- [ ] #2 User-facing config/docs updated for intro skip control
+
diff --git a/docs/subagents/INDEX.md b/docs/subagents/INDEX.md
index 9cb6bd7..d6f8e65 100644
--- a/docs/subagents/INDEX.md
+++ b/docs/subagents/INDEX.md
@@ -27,6 +27,6 @@ Read first. Keep concise.
| `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` |
| `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` |
| `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` |
-| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T04:38:25Z` |
+| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T10:07:58Z` |
| `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` |
| `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` |
diff --git a/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md b/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md
new file mode 100644
index 0000000..b01aa68
--- /dev/null
+++ b/docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md
@@ -0,0 +1,29 @@
+# Agent: `codex-add-backlog-tasks-20260221T044104Z-m3n8`
+
+- alias: `codex-add-backlog-tasks`
+- mission: `Add two unrelated backlog tasks requested by user`
+- status: `done`
+- branch: `main`
+- started_at: `2026-02-21T04:41:04Z`
+- heartbeat_minutes: `5`
+
+## Current Work (newest first)
+- [2026-02-21T04:44:12Z] handoff: added `TASK-96` + `TASK-97` in `backlog/tasks`; updated index row to `done`.
+- [2026-02-21T04:43:00Z] progress: drafting `TASK-96` (secondary subtitle decoupling) and `TASK-97` (intro skip) under `backlog/tasks`.
+- [2026-02-21T04:42:10Z] intent: add two unrelated backlog tasks only; no code behavior changes.
+
+## Files Touched
+- `docs/subagents/INDEX.md`
+- `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md`
+- `backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md`
+- `backlog/tasks/task-97 - Add-intro-skip-playback-control.md`
+
+## Assumptions
+- User request means creating backlog tickets, not implementing either feature now.
+- Existing backlog format in `backlog/tasks` remains canonical.
+
+## Open Questions / Blockers
+- None.
+
+## Next Step
+- Wait for user follow-up (prioritize one of the two new tasks for implementation).
diff --git a/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md b/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md
new file mode 100644
index 0000000..e052610
--- /dev/null
+++ b/docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md
@@ -0,0 +1,74 @@
+# codex-duplicate-kiku-20260221T043006Z-5vkz
+
+- alias: `codex-duplicate-kiku`
+- mission: `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards`
+- status: `completed`
+- start_utc: `2026-02-21T04:30:06Z`
+- last_update_utc: `2026-02-21T10:07:58Z`
+
+## Intent
+
+- Reproduce bug where clear duplicate cards no longer trigger Kiku duplicate grouping.
+- Add failing regression test first (TDD).
+- Patch duplicate detection logic with minimal behavior change.
+
+## Planned Files
+
+- `src/anki-integration/duplicate.ts`
+- `src/anki-integration/duplicate.test.ts` (or nearest duplicate-detection tests)
+- `docs/subagents/INDEX.md`
+- `docs/subagents/collaboration.md`
+- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
+
+## Assumptions
+
+- Duplicate signal should still come from Anki duplicate search + Yomitan/N+1-derived fields used in note content.
+- Regression likely from term/readings normalization/query escaping mismatch.
+
+## Outcome
+
+- Root cause: candidate-note exact-check only resolved the originating field name (`Expression` or `Word`), so duplicates failed when candidate note used the opposite alias.
+- Added regression test first (RED): `Expression` current note vs `Word` candidate with same value returned `null`.
+- Implemented minimal fix: candidate resolution now checks both aliases (`word` and `expression`) before exact-value compare.
+- GREEN: targeted duplicate test passed; related `anki-integration` test passed.
+- User follow-up repro showed remaining miss when duplicate appears only in alias field search results.
+- Added second RED test for alias-query fallback.
+- Implemented query-stage alias fallback: run `findNotes` for both alias fields, merge note ids, then exact-verify.
+- GREEN after follow-up: duplicate tests + `anki-integration` test pass.
+- User reported still failing after first follow-up.
+- Added third RED regression: source note containing both `Expression` (sentence) and `Word` (term) only matched duplicates via `Word`; previous logic missed this by using only one source value.
+- Implemented source-candidate expansion: gather both `Word` and `Expression` source values, query aliases for each, dedupe queries, then exact-match against normalized set.
+- GREEN: duplicate tests (3/3) + `anki-integration` test pass.
+- Image-backed repro indicated possible duplicate outside configured deck scope.
+- Added fourth RED regression: deck-scoped query misses, collection-wide query should still detect duplicate.
+- Implemented deck fallback query pass (same source/alias combinations without deck filter) when deck-scoped pass yields no candidates.
+- GREEN: duplicate tests (4/4) + `anki-integration` test pass.
+- User confirmed fresh build/install still failed with `貴様` repro.
+- Added fifth RED regression: field-specific queries return no matches but plain text query returns candidate.
+- Implemented plain-text query fallback pass (deck-scoped then global), still gated by exact `word`/`expression` value verify.
+- GREEN: duplicate tests (5/5) + `anki-integration` test pass.
+- Added runtime debug instrumentation for duplicate detection query/verification path:
+ - query string + hit count
+ - candidate count after exclude
+ - exact-match note id + field
+- No behavior change from instrumentation; build + tests still green.
+- User requested logging policy update: prefer console output unless explicitly captured, and persistent logs under `~/.config/SubMiner/logs/*.log`.
+- Updated default launcher/app mpv log path to daily file naming: `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log`.
+- Typecheck green.
+- Found observability gap: app logger wrote only to stdout/stderr while launcher log file only captured wrapper messages.
+- Added file sink to `src/logger.ts` so app logs also append to `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (or `SUBMINER_MPV_LOG` when set).
+- Verified with typecheck + build.
+
+## Files Touched
+
+- `src/anki-integration/duplicate.ts`
+- `src/anki-integration/duplicate.test.ts`
+- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
+- `docs/subagents/INDEX.md`
+- `docs/subagents/collaboration.md`
+- `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md`
+
+## Handoff
+
+- No blockers.
+- Next step: run broader gate (`bun run test:fast`) when ready, then commit.
diff --git a/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md b/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
new file mode 100644
index 0000000..71595b8
--- /dev/null
+++ b/docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
@@ -0,0 +1,28 @@
+# Agent Log: codex-frequency-dup-log-20260221T042815Z-r4k1
+
+- alias: codex-frequency-dup-log
+- mission: reduce frequency dictionary duplicate-term startup log spam; keep useful signal
+- status: completed
+- started_utc: 2026-02-21T04:28:15Z
+- last_update_utc: 2026-02-21T04:32:40Z
+- planned_files:
+ - src/core/services/frequency-dictionary.ts
+ - src/core/services/frequency-dictionary.test.ts
+ - docs/subagents/INDEX.md
+- touched_files:
+ - src/core/services/frequency-dictionary.ts
+ - src/core/services/frequency-dictionary.test.ts
+ - docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
+ - docs/subagents/INDEX.md
+- key_decisions:
+ - remove per-entry duplicate term logs
+ - keep one aggregate duplicate summary line per bank file at info level
+- assumptions:
+ - duplicate entries are expected in source dictionary and should not produce per-entry info logs
+- verification:
+ - `bun test src/core/services/frequency-dictionary.test.ts` (pass)
+ - full build currently blocked by unrelated Jellyfin WIP type errors on branch
+- blockers:
+ - unrelated branch state prevents full `bun run build`
+- next_step:
+ - optional follow-up: add true debug-level logging API if duplicate diagnostics are needed on demand
diff --git a/docs/subagents/collaboration.md b/docs/subagents/collaboration.md
index 37ad529..fc211de 100644
--- a/docs/subagents/collaboration.md
+++ b/docs/subagents/collaboration.md
@@ -25,5 +25,11 @@ Shared notes. Append-only.
- [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only.
- [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed.
- [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback.
+- [2026-02-21T04:48:50Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] second follow-up fix: when source note has both `Expression` and `Word`, duplicate detection now uses both source values (not just first field by order); added regression for mixed-field source candidate scenario.
+- [2026-02-21T07:23:56Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] third follow-up fix: add collection-wide fallback query pass when deck-scoped duplicate search returns no candidates; added regression for deck-scope miss case.
+- [2026-02-21T09:25:53Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] fourth follow-up fix: add plain-text query fallback when field-scoped queries miss; keep exact value verification on candidate notes to avoid false positives.
+- [2026-02-21T09:40:33Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] instrumentation pass: add duplicate-detection debug logs (`[duplicate] query/hits/candidates/exact-match`) to isolate remaining live repro mismatches.
+- [2026-02-21T09:54:29Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] logging-path update: default persistent logs now target `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (launcher + app mpv log default).
+- [2026-02-21T10:07:58Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] observability fix: app logger now also appends to daily log file, so runtime duplicate traces are available even when overlay stdout is not surfaced in launcher terminal.
- [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path).
- [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass).
diff --git a/launcher/types.ts b/launcher/types.ts
index 76caa36..dee8c2d 100644
--- a/launcher/types.ts
+++ b/launcher/types.ts
@@ -34,7 +34,13 @@ export const DEFAULT_YOUTUBE_SUBGEN_OUT_DIR = path.join(
'subminer',
'youtube-subs',
);
-export const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
+export const DEFAULT_MPV_LOG_FILE = path.join(
+ os.homedir(),
+ '.config',
+ 'SubMiner',
+ 'logs',
+ `SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
+);
export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best';
export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc';
export const DEFAULT_MPV_SUBMINER_ARGS = [
diff --git a/src/anki-integration.ts b/src/anki-integration.ts
index 5444a9f..ccaa8c4 100644
--- a/src/anki-integration.ts
+++ b/src/anki-integration.ts
@@ -970,6 +970,12 @@ export class AnkiIntegration {
notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
getDeck: () => this.config.deck,
resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
+ logInfo: (message) => {
+ log.info(message);
+ },
+ logDebug: (message) => {
+ log.debug(message);
+ },
logWarn: (message, error) => {
log.warn(message, (error as Error).message);
},
diff --git a/src/anki-integration/duplicate.test.ts b/src/anki-integration/duplicate.test.ts
new file mode 100644
index 0000000..240c6b2
--- /dev/null
+++ b/src/anki-integration/duplicate.test.ts
@@ -0,0 +1,265 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { findDuplicateNote, type NoteInfo } from './duplicate';
+
+function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null {
+ const names = Object.keys(noteInfo.fields);
+ const exact = names.find((name) => name === preferredName);
+ if (exact) return exact;
+ const lower = preferredName.toLowerCase();
+ return names.find((name) => name.toLowerCase() === lower) ?? null;
+}
+
+test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '食べる' },
+ },
+ };
+
+ const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
+ findNotes: async () => [100, 200],
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Word: { value: '食べる' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '食べる' },
+ },
+ };
+
+ const seenQueries: string[] = [];
+ const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
+ findNotes: async (query) => {
+ seenQueries.push(query);
+ if (query.includes('"Expression:')) {
+ return [];
+ }
+ if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) {
+ return [200];
+ }
+ return [];
+ },
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Word: { value: '食べる' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+ assert.equal(seenQueries.length, 2);
+});
+
+test('findDuplicateNote checks both source expression/word values when both fields are present', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '昨日は雨だった。' },
+ Word: { value: '雨' },
+ },
+ };
+
+ const seenQueries: string[] = [];
+ const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, {
+ findNotes: async (query) => {
+ seenQueries.push(query);
+ if (query.includes('昨日は雨だった。')) {
+ return [];
+ }
+ if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) {
+ return [200];
+ }
+ return [];
+ },
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Word: { value: '雨' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+ assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。')));
+ assert.ok(seenQueries.some((query) => query.includes('雨')));
+});
+
+test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ };
+
+ const seenQueries: string[] = [];
+ const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+ findNotes: async (query) => {
+ seenQueries.push(query);
+ if (query.includes('deck:Japanese')) {
+ return [];
+ }
+ if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) {
+ return [200];
+ }
+ return [];
+ },
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+ assert.ok(seenQueries.some((query) => query.includes('deck:Japanese')));
+ assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese')));
+});
+
+test('findDuplicateNote falls back to plain text query when field queries miss', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ };
+
+ const seenQueries: string[] = [];
+ const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+ findNotes: async (query) => {
+ seenQueries.push(query);
+ if (query.includes('Expression:') || query.includes('Word:')) {
+ return [];
+ }
+ if (query.includes('"貴様"')) {
+ return [200];
+ }
+ return [];
+ },
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+ assert.ok(seenQueries.some((query) => query.includes('Expression:')));
+ assert.ok(seenQueries.some((query) => query.endsWith('"貴様"')));
+});
+
+test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ };
+
+ const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+ findNotes: async () => [200],
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Expression: { value: '貴様[きさま]' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ };
+
+ const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
+ findNotes: async () => [200],
+ notesInfo: async () => [
+ {
+ noteId: 200,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ },
+ ],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.equal(duplicateId, 200);
+});
+
+test('findDuplicateNote does not disable retries on findNotes calls', async () => {
+ const currentNote: NoteInfo = {
+ noteId: 100,
+ fields: {
+ Expression: { value: '貴様' },
+ },
+ };
+
+ const seenOptions: Array<{ maxRetries?: number } | undefined> = [];
+ await findDuplicateNote('貴様', 100, currentNote, {
+ findNotes: async (_query, options) => {
+ seenOptions.push(options);
+ return [];
+ },
+ notesInfo: async () => [],
+ getDeck: () => 'Japanese::Mining',
+ resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
+ logWarn: () => {},
+ });
+
+ assert.ok(seenOptions.length > 0);
+ assert.ok(seenOptions.every((options) => options?.maxRetries !== 0));
+});
diff --git a/src/anki-integration/duplicate.ts b/src/anki-integration/duplicate.ts
index 23b33cb..52ed7ff 100644
--- a/src/anki-integration/duplicate.ts
+++ b/src/anki-integration/duplicate.ts
@@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps {
notesInfo: (noteIds: number[]) => Promise;
getDeck: () => string | null | undefined;
resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
+ logInfo?: (message: string) => void;
+ logDebug?: (message: string) => void;
logWarn: (message: string, error: unknown) => void;
}
@@ -21,25 +23,68 @@ export async function findDuplicateNote(
noteInfo: NoteInfo,
deps: DuplicateDetectionDeps,
): Promise {
- let fieldName = '';
- for (const name of Object.keys(noteInfo.fields)) {
- if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) {
- fieldName = name;
- break;
- }
- }
- if (!fieldName) return null;
+ const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression);
+ if (sourceCandidates.length === 0) return null;
+ deps.logInfo?.(
+ `[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates
+ .map((entry) => `${entry.fieldName}:${entry.value}`)
+ .join('|')}`,
+ );
- const escapedFieldName = escapeAnkiSearchValue(fieldName);
- const escapedExpression = escapeAnkiSearchValue(expression);
- const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : '';
- const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`;
+ const deckValue = deps.getDeck();
+ const queryPrefixes = deckValue
+ ? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, '']
+ : [''];
try {
- const noteIds = (await deps.findNotes(query, {
- maxRetries: 0,
- })) as number[];
- return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps);
+ const noteIds = new Set();
+ const executedQueries = new Set();
+ for (const queryPrefix of queryPrefixes) {
+ for (const sourceCandidate of sourceCandidates) {
+ const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
+ const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName);
+ for (const queryFieldName of queryFieldNames) {
+ const escapedFieldName = escapeAnkiSearchValue(queryFieldName);
+ const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`;
+ if (executedQueries.has(query)) continue;
+ executedQueries.add(query);
+ const results = (await deps.findNotes(query)) as number[];
+ deps.logDebug?.(
+ `[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
+ );
+ for (const noteId of results) {
+ noteIds.add(noteId);
+ }
+ }
+ }
+ if (noteIds.size > 0) break;
+ }
+
+ if (noteIds.size === 0) {
+ for (const queryPrefix of queryPrefixes) {
+ for (const sourceCandidate of sourceCandidates) {
+ const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
+ const query = `${queryPrefix}"${escapedExpression}"`;
+ if (executedQueries.has(query)) continue;
+ executedQueries.add(query);
+ const results = (await deps.findNotes(query)) as number[];
+ deps.logDebug?.(
+ `[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
+ );
+ for (const noteId of results) {
+ noteIds.add(noteId);
+ }
+ }
+ if (noteIds.size > 0) break;
+ }
+ }
+
+ return await findFirstExactDuplicateNoteId(
+ noteIds,
+ excludeNoteId,
+ sourceCandidates.map((candidate) => candidate.value),
+ deps,
+ );
} catch (error) {
deps.logWarn('Duplicate search failed:', error);
return null;
@@ -47,18 +92,25 @@ export async function findDuplicateNote(
}
function findFirstExactDuplicateNoteId(
- candidateNoteIds: number[],
+ candidateNoteIds: Iterable,
excludeNoteId: number,
- fieldName: string,
- expression: string,
+ sourceValues: string[],
deps: DuplicateDetectionDeps,
): Promise {
- const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId);
+ const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId);
+ deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`);
if (candidates.length === 0) {
+ deps.logInfo?.('[duplicate] no candidates after query + exclude');
+ return Promise.resolve(null);
+ }
+
+ const normalizedValues = new Set(
+ sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0),
+ );
+ if (normalizedValues.size === 0) {
return Promise.resolve(null);
}
- const normalizedExpression = normalizeDuplicateValue(expression);
const chunkSize = 50;
return (async () => {
for (let i = 0; i < candidates.length; i += chunkSize) {
@@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId(
const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
const notesInfo = notesInfoResult as NoteInfo[];
for (const noteInfo of notesInfo) {
- const resolvedField = deps.resolveFieldName(noteInfo, fieldName);
- if (!resolvedField) continue;
- const candidateValue = noteInfo.fields[resolvedField]?.value || '';
- if (normalizeDuplicateValue(candidateValue) === normalizedExpression) {
- return noteInfo.noteId;
+ const candidateFieldNames = ['word', 'expression'];
+ for (const candidateFieldName of candidateFieldNames) {
+ const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName);
+ if (!resolvedField) continue;
+ const candidateValue = noteInfo.fields[resolvedField]?.value || '';
+ if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) {
+ deps.logDebug?.(
+ `[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`,
+ );
+ deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`);
+ return noteInfo.noteId;
+ }
}
}
}
+ deps.logInfo?.('[duplicate] no exact match in candidate notes');
return null;
})();
}
+function getDuplicateCandidateFieldNames(fieldName: string): string[] {
+ const candidates = [fieldName];
+ const lower = fieldName.toLowerCase();
+ if (lower === 'word') {
+ candidates.push('expression');
+ } else if (lower === 'expression') {
+ candidates.push('word');
+ }
+ return candidates;
+}
+
+function getDuplicateSourceCandidates(
+ noteInfo: NoteInfo,
+ fallbackExpression: string,
+): Array<{ fieldName: string; value: string }> {
+ const candidates: Array<{ fieldName: string; value: string }> = [];
+ const dedupeKey = new Set();
+
+ for (const fieldName of Object.keys(noteInfo.fields)) {
+ const lower = fieldName.toLowerCase();
+ if (lower !== 'word' && lower !== 'expression') continue;
+ const value = noteInfo.fields[fieldName]?.value?.trim() ?? '';
+ if (!value) continue;
+ const key = `${lower}:${normalizeDuplicateValue(value)}`;
+ if (dedupeKey.has(key)) continue;
+ dedupeKey.add(key);
+ candidates.push({ fieldName, value });
+ }
+
+ const trimmedFallback = fallbackExpression.trim();
+ if (trimmedFallback.length > 0) {
+ const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`;
+ if (!dedupeKey.has(fallbackKey)) {
+ candidates.push({ fieldName: 'expression', value: trimmedFallback });
+ }
+ }
+
+ return candidates;
+}
+
function normalizeDuplicateValue(value: string): string {
- return value.replace(/\s+/g, ' ').trim();
+ return value
+ .replace(/<[^>]*>/g, '')
+ .replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1')
+ .replace(/\s+/g, ' ')
+ .trim();
}
function escapeAnkiSearchValue(value: string): string {
diff --git a/src/core/services/frequency-dictionary.test.ts b/src/core/services/frequency-dictionary.test.ts
index ae8049d..baca354 100644
--- a/src/core/services/frequency-dictionary.test.ts
+++ b/src/core/services/frequency-dictionary.test.ts
@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
true,
);
});
+
+test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
+ const logs: string[] = [];
+ const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
+ const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
+ fs.writeFileSync(
+ bankPath,
+ JSON.stringify([
+ ['猫', 1, { frequency: { displayValue: 100 } }],
+ ['猫', 2, { frequency: { displayValue: 120 } }],
+ ['猫', 3, { frequency: { displayValue: 110 } }],
+ ]),
+ );
+
+ const lookup = await createFrequencyDictionaryLookup({
+ searchPaths: [tempDir],
+ log: (message) => {
+ logs.push(message);
+ },
+ });
+
+ assert.equal(lookup('猫'), 100);
+ assert.equal(
+ logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
+ 1,
+ );
+ assert.equal(
+ logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
+ false,
+ );
+});
diff --git a/src/core/services/frequency-dictionary.ts b/src/core/services/frequency-dictionary.ts
index ea5f9fd..b8c84af 100644
--- a/src/core/services/frequency-dictionary.ts
+++ b/src/core/services/frequency-dictionary.ts
@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
function addEntriesToMap(
rawEntries: unknown,
terms: Map,
- log: (message: string) => void,
-): void {
+): { duplicateCount: number } {
if (!Array.isArray(rawEntries)) {
- return;
+ return { duplicateCount: 0 };
}
+ let duplicateCount = 0;
for (const rawEntry of rawEntries) {
const entry = asFrequencyDictionaryEntry(rawEntry);
if (!entry) {
@@ -79,10 +79,10 @@ function addEntriesToMap(
continue;
}
- log(
- `Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
- );
+ duplicateCount += 1;
}
+
+ return { duplicateCount };
}
function collectDictionaryFromPath(
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
}
const beforeSize = terms.size;
- addEntriesToMap(rawEntries, terms, log);
+ const { duplicateCount } = addEntriesToMap(rawEntries, terms);
+ if (duplicateCount > 0) {
+ log(
+ `Frequency dictionary ignored ${duplicateCount} duplicate term entr${
+ duplicateCount === 1 ? 'y' : 'ies'
+ } in ${bankPath} (kept strongest rank per term).`,
+ );
+ }
if (terms.size === beforeSize) {
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
}
diff --git a/src/logger.ts b/src/logger.ts
index 0d01323..3092095 100644
--- a/src/logger.ts
+++ b/src/logger.ts
@@ -1,3 +1,7 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
export type LogLevelSource = 'cli' | 'config';
@@ -107,6 +111,25 @@ function safeStringify(value: unknown): string {
}
}
+function resolveLogFilePath(): string {
+ const envPath = process.env.SUBMINER_MPV_LOG?.trim();
+ if (envPath) {
+ return envPath;
+ }
+ const date = new Date().toISOString().slice(0, 10);
+ return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`);
+}
+
+function appendToLogFile(line: string): void {
+ try {
+ const logPath = resolveLogFilePath();
+ fs.mkdirSync(path.dirname(logPath), { recursive: true });
+ fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' });
+ } catch {
+ // never break runtime due to logging sink failures
+ }
+}
+
function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
const minLevel = resolveMinLevel();
if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
@@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(prefix);
}
+ appendToLogFile(prefix);
return;
}
@@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
} else {
console.info(finalMessage);
}
+ appendToLogFile(finalMessage);
}
export function createLogger(scope: string): Logger {
diff --git a/src/main.ts b/src/main.ts
index dbd6d86..35d5e2e 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -487,7 +487,13 @@ if (process.platform === 'linux') {
app.setName('SubMiner');
const DEFAULT_TEXTHOOKER_PORT = 5174;
-const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
+const DEFAULT_MPV_LOG_FILE = path.join(
+ os.homedir(),
+ '.config',
+ 'SubMiner',
+ 'logs',
+ `SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
+);
const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
const ANILIST_SETUP_RESPONSE_TYPE = 'token';
const ANILIST_DEFAULT_CLIENT_ID = '36084';