mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
chore: commit unstaged workspace changes
This commit is contained in:
@@ -0,0 +1,52 @@
|
|||||||
|
---
|
||||||
|
id: TASK-94
|
||||||
|
title: Fix Kiku duplicate detection for Yomitan-marked duplicates
|
||||||
|
status: Done
|
||||||
|
assignee:
|
||||||
|
- codex-duplicate-kiku-20260221T043006Z-5vkz
|
||||||
|
created_date: '2026-02-21 04:33'
|
||||||
|
updated_date: '2026-02-21 01:40'
|
||||||
|
labels:
|
||||||
|
- bug
|
||||||
|
- anki
|
||||||
|
- kiku
|
||||||
|
dependencies: []
|
||||||
|
priority: high
|
||||||
|
ordinal: 65000
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Kiku field grouping no longer detects duplicate cards in scenarios where the mined card is clearly marked duplicate by Yomitan/N+1 workflow. Restore duplicate detection so duplicate note lookup succeeds for equivalent expression/word cards and Kiku grouping can run.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 Repro case covered by automated regression test in duplicate-detection path.
|
||||||
|
- [x] #2 Kiku duplicate detection returns duplicate note id for the repro case.
|
||||||
|
- [x] #3 Targeted tests for duplicate detection pass.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
<!-- SECTION:NOTES:BEGIN -->
|
||||||
|
Added regression test `src/anki-integration/duplicate.test.ts` for a cross-field duplicate case where current note uses `Expression` and candidate uses `Word` with same value.
|
||||||
|
|
||||||
|
Updated duplicate matching in `src/anki-integration/duplicate.ts` to try alternate field-name aliases (`word` <-> `expression`) when resolving candidate note fields for exact-value verification.
|
||||||
|
|
||||||
|
Follow-up fix: duplicate search query now also probes alias fields (`word` <-> `expression`) and merges candidate note ids before exact verification, so duplicates are still found when only the alias field is indexed/populated on existing cards.
|
||||||
|
|
||||||
|
Second follow-up fix: duplicate detection now evaluates both source values when current note contains both `Expression` and `Word` (previously only one was used, depending on field-order). Query and exact verification now run against all source duplicate candidates.
|
||||||
|
|
||||||
|
Third follow-up fix: if deck-scoped duplicate queries return no results, detection now retries the same source/alias query set collection-wide (no deck filter) before exact verification. This aligns with cases where Yomitan shows duplicates outside the configured mining deck.
|
||||||
|
|
||||||
|
Fourth follow-up fix: if field-specific queries miss entirely, detection now falls back to phrase/plain-text queries (deck-scoped then collection-wide) and still requires exact `Expression/Word` value verification before selecting a duplicate note.
|
||||||
|
|
||||||
|
Fifth follow-up: added explicit duplicate-search debug logs (query strings, hit counts, candidate counts, exact-match note id) to improve runtime diagnosis in live launcher runs.
|
||||||
|
|
||||||
|
Verification:
|
||||||
|
- `bun run build`
|
||||||
|
- `node dist/anki-integration/duplicate.test.js`
|
||||||
|
- `node --test dist/anki-integration.test.js`
|
||||||
|
<!-- SECTION:NOTES:END -->
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
---
|
||||||
|
id: TASK-96
|
||||||
|
title: Decouple secondary subtitle lifecycle from visible/invisible overlays
|
||||||
|
status: To Do
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-02-21 04:41'
|
||||||
|
updated_date: '2026-02-21 04:41'
|
||||||
|
labels:
|
||||||
|
- subtitles
|
||||||
|
- overlay
|
||||||
|
- architecture
|
||||||
|
dependencies: []
|
||||||
|
priority: high
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Secondary subtitle behavior should not depend on visible/invisible overlay state transitions. Introduce an independent lifecycle so secondary subtitle rendering, visibility mode (`always`/`hover`/`never`), and positioning stay stable even when primary overlays are toggled or rebound.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Suggestions
|
||||||
|
|
||||||
|
<!-- SECTION:SUGGESTIONS:BEGIN -->
|
||||||
|
- Isolate secondary subtitle state management from primary overlay window orchestration.
|
||||||
|
- Route secondary subtitle updates through a dedicated service/controller boundary.
|
||||||
|
- Keep MPV secondary subtitle property handling independent from overlay visibility toggles.
|
||||||
|
<!-- SECTION:SUGGESTIONS:END -->
|
||||||
|
|
||||||
|
## Action Steps
|
||||||
|
|
||||||
|
<!-- SECTION:PLAN:BEGIN -->
|
||||||
|
1. Inventory existing coupling points between secondary subtitle updates and overlay visibility/bounds services.
|
||||||
|
2. Introduce explicit secondary subtitle lifecycle state and transitions.
|
||||||
|
3. Refactor event wiring so visible/invisible overlay toggles do not mutate secondary subtitle state.
|
||||||
|
4. Validate display modes (`always`/`hover`/`never`) continue to work with independent lifecycle.
|
||||||
|
5. Add regression tests for overlay toggles, reconnect/restart, and mode-switch behavior.
|
||||||
|
<!-- SECTION:PLAN:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [ ] #1 Toggling visible or invisible overlays does not alter secondary subtitle lifecycle state.
|
||||||
|
- [ ] #2 Secondary subtitle display mode behavior remains correct across overlay state transitions.
|
||||||
|
- [ ] #3 Secondary subtitle behavior survives MPV reconnect/restart without overlay-coupling regressions.
|
||||||
|
- [ ] #4 Automated tests cover decoupled lifecycle behavior and prevent re-coupling.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Definition of Done
|
||||||
|
<!-- DOD:BEGIN -->
|
||||||
|
- [ ] #1 Relevant unit/integration tests pass
|
||||||
|
- [ ] #2 Documentation/comments updated where lifecycle ownership changed
|
||||||
|
<!-- DOD:END -->
|
||||||
52
backlog/tasks/task-97 - Add-intro-skip-playback-control.md
Normal file
52
backlog/tasks/task-97 - Add-intro-skip-playback-control.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
---
|
||||||
|
id: TASK-97
|
||||||
|
title: Add intro skip playback control
|
||||||
|
status: To Do
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-02-21 04:41'
|
||||||
|
updated_date: '2026-02-21 04:41'
|
||||||
|
labels:
|
||||||
|
- playback
|
||||||
|
- ux
|
||||||
|
dependencies: []
|
||||||
|
priority: medium
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Add an intro skip control so users can jump past opening sequences quickly during playback. Start with a reliable manual control (shortcut/action) and clear user feedback after seek.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Suggestions
|
||||||
|
|
||||||
|
<!-- SECTION:SUGGESTIONS:BEGIN -->
|
||||||
|
- Add a configurable skip duration (for example 60/75/90 seconds).
|
||||||
|
- Expose skip intro via keybinding and optional UI action in overlay/help.
|
||||||
|
- Show transient confirmation (OSD/overlay message) after skip action.
|
||||||
|
<!-- SECTION:SUGGESTIONS:END -->
|
||||||
|
|
||||||
|
## Action Steps
|
||||||
|
|
||||||
|
<!-- SECTION:PLAN:BEGIN -->
|
||||||
|
1. Define config and keybinding surface for intro skip duration and trigger.
|
||||||
|
2. Implement intro skip command that performs bounded seek in active playback session.
|
||||||
|
3. Wire command to user trigger path (keyboard + optional on-screen action if present).
|
||||||
|
4. Emit user feedback after successful skip (current time + skipped duration).
|
||||||
|
5. Add tests for command dispatch, seek bounds, and config fallback behavior.
|
||||||
|
<!-- SECTION:PLAN:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [ ] #1 User can trigger intro skip during playback with configured shortcut/action.
|
||||||
|
- [ ] #2 Skip performs bounded seek and never seeks before start or beyond stream duration.
|
||||||
|
- [ ] #3 Skip duration is configurable with sane default.
|
||||||
|
- [ ] #4 User receives visible confirmation after skip.
|
||||||
|
- [ ] #5 Automated tests cover config + seek behavior.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Definition of Done
|
||||||
|
<!-- DOD:BEGIN -->
|
||||||
|
- [ ] #1 Playback control tests pass
|
||||||
|
- [ ] #2 User-facing config/docs updated for intro skip control
|
||||||
|
<!-- DOD:END -->
|
||||||
@@ -27,6 +27,6 @@ Read first. Keep concise.
|
|||||||
| `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` |
|
| `codex-review-refactor-cleanup-20260220T113818Z-i2ov` | `codex-review-refactor-cleanup` | `Review recent TASK-85 refactor effort and identify remaining cleanup work` | `handoff` | `docs/subagents/agents/codex-review-refactor-cleanup-20260220T113818Z-i2ov.md` | `2026-02-20T11:48:28Z` |
|
||||||
| `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` |
|
| `codex-commit-unstaged-20260220T115057Z-k7q2` | `codex-commit-unstaged` | `Commit all current unstaged repository changes with content-derived conventional message` | `in_progress` | `docs/subagents/agents/codex-commit-unstaged-20260220T115057Z-k7q2.md` | `2026-02-20T11:51:18Z` |
|
||||||
| `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` |
|
| `codex-overlay-whitespace-newline-20260221T040705Z-aw2j` | `codex-overlay-whitespace-newline` | `Fix visible overlay whitespace/newline token rendering bug with TDD regression coverage` | `completed` | `docs/subagents/agents/codex-overlay-whitespace-newline-20260221T040705Z-aw2j.md` | `2026-02-21T04:18:16Z` |
|
||||||
| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T04:38:25Z` |
|
| `codex-duplicate-kiku-20260221T043006Z-5vkz` | `codex-duplicate-kiku` | `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards` | `completed` | `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md` | `2026-02-21T10:07:58Z` |
|
||||||
| `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` |
|
| `codex-mpv-connect-log-20260221T043748Z-q7m1` | `codex-mpv-connect-log` | `Suppress repetitive MPV IPC connect-request INFO logs during startup` | `completed` | `docs/subagents/agents/codex-mpv-connect-log-20260221T043748Z-q7m1.md` | `2026-02-21T04:41:15Z` |
|
||||||
| `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` |
|
| `codex-add-backlog-tasks-20260221T044104Z-m3n8` | `codex-add-backlog-tasks` | `Add two unrelated backlog tasks: secondary subtitle decoupling and intro skip` | `done` | `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md` | `2026-02-21T04:44:12Z` |
|
||||||
|
|||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# Agent: `codex-add-backlog-tasks-20260221T044104Z-m3n8`
|
||||||
|
|
||||||
|
- alias: `codex-add-backlog-tasks`
|
||||||
|
- mission: `Add two unrelated backlog tasks requested by user`
|
||||||
|
- status: `done`
|
||||||
|
- branch: `main`
|
||||||
|
- started_at: `2026-02-21T04:41:04Z`
|
||||||
|
- heartbeat_minutes: `5`
|
||||||
|
|
||||||
|
## Current Work (newest first)
|
||||||
|
- [2026-02-21T04:44:12Z] handoff: added `TASK-96` + `TASK-97` in `backlog/tasks`; updated index row to `done`.
|
||||||
|
- [2026-02-21T04:43:00Z] progress: drafting `TASK-96` (secondary subtitle decoupling) and `TASK-97` (intro skip) under `backlog/tasks`.
|
||||||
|
- [2026-02-21T04:42:10Z] intent: add two unrelated backlog tasks only; no code behavior changes.
|
||||||
|
|
||||||
|
## Files Touched
|
||||||
|
- `docs/subagents/INDEX.md`
|
||||||
|
- `docs/subagents/agents/codex-add-backlog-tasks-20260221T044104Z-m3n8.md`
|
||||||
|
- `backlog/tasks/task-96 - Decouple-secondary-subtitle-lifecycle-from-visible-invisible-overlays.md`
|
||||||
|
- `backlog/tasks/task-97 - Add-intro-skip-playback-control.md`
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
- User request means creating backlog tickets, not implementing either feature now.
|
||||||
|
- Existing backlog format in `backlog/tasks` remains canonical.
|
||||||
|
|
||||||
|
## Open Questions / Blockers
|
||||||
|
- None.
|
||||||
|
|
||||||
|
## Next Step
|
||||||
|
- Wait for user follow-up (prioritize one of the two new tasks for implementation).
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
# codex-duplicate-kiku-20260221T043006Z-5vkz
|
||||||
|
|
||||||
|
- alias: `codex-duplicate-kiku`
|
||||||
|
- mission: `Fix Kiku duplicate-card detection/grouping regression for Yomitan duplicate-marked + N+1-highlighted cards`
|
||||||
|
- status: `completed`
|
||||||
|
- start_utc: `2026-02-21T04:30:06Z`
|
||||||
|
- last_update_utc: `2026-02-21T10:07:58Z`
|
||||||
|
|
||||||
|
## Intent
|
||||||
|
|
||||||
|
- Reproduce bug where clear duplicate cards no longer trigger Kiku duplicate grouping.
|
||||||
|
- Add failing regression test first (TDD).
|
||||||
|
- Patch duplicate detection logic with minimal behavior change.
|
||||||
|
|
||||||
|
## Planned Files
|
||||||
|
|
||||||
|
- `src/anki-integration/duplicate.ts`
|
||||||
|
- `src/anki-integration/duplicate.test.ts` (or nearest duplicate-detection tests)
|
||||||
|
- `docs/subagents/INDEX.md`
|
||||||
|
- `docs/subagents/collaboration.md`
|
||||||
|
- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- Duplicate signal should still come from Anki duplicate search + Yomitan/N+1-derived fields used in note content.
|
||||||
|
- Regression likely from term/readings normalization/query escaping mismatch.
|
||||||
|
|
||||||
|
## Outcome
|
||||||
|
|
||||||
|
- Root cause: candidate-note exact-check only resolved the originating field name (`Expression` or `Word`), so duplicates failed when candidate note used the opposite alias.
|
||||||
|
- Added regression test first (RED): `Expression` current note vs `Word` candidate with same value returned `null`.
|
||||||
|
- Implemented minimal fix: candidate resolution now checks both aliases (`word` and `expression`) before exact-value compare.
|
||||||
|
- GREEN: targeted duplicate test passed; related `anki-integration` test passed.
|
||||||
|
- User follow-up repro showed remaining miss when duplicate appears only in alias field search results.
|
||||||
|
- Added second RED test for alias-query fallback.
|
||||||
|
- Implemented query-stage alias fallback: run `findNotes` for both alias fields, merge note ids, then exact-verify.
|
||||||
|
- GREEN after follow-up: duplicate tests + `anki-integration` test pass.
|
||||||
|
- User reported still failing after first follow-up.
|
||||||
|
- Added third RED regression: source note containing both `Expression` (sentence) and `Word` (term) only matched duplicates via `Word`; previous logic missed this by using only one source value.
|
||||||
|
- Implemented source-candidate expansion: gather both `Word` and `Expression` source values, query aliases for each, dedupe queries, then exact-match against normalized set.
|
||||||
|
- GREEN: duplicate tests (3/3) + `anki-integration` test pass.
|
||||||
|
- Image-backed repro indicated possible duplicate outside configured deck scope.
|
||||||
|
- Added fourth RED regression: deck-scoped query misses, collection-wide query should still detect duplicate.
|
||||||
|
- Implemented deck fallback query pass (same source/alias combinations without deck filter) when deck-scoped pass yields no candidates.
|
||||||
|
- GREEN: duplicate tests (4/4) + `anki-integration` test pass.
|
||||||
|
- User confirmed fresh build/install still failed with `貴様` repro.
|
||||||
|
- Added fifth RED regression: field-specific queries return no matches but plain text query returns candidate.
|
||||||
|
- Implemented plain-text query fallback pass (deck-scoped then global), still gated by exact `word`/`expression` value verify.
|
||||||
|
- GREEN: duplicate tests (5/5) + `anki-integration` test pass.
|
||||||
|
- Added runtime debug instrumentation for duplicate detection query/verification path:
|
||||||
|
- query string + hit count
|
||||||
|
- candidate count after exclude
|
||||||
|
- exact-match note id + field
|
||||||
|
- No behavior change from instrumentation; build + tests still green.
|
||||||
|
- User requested logging policy update: prefer console output unless explicitly captured, and persistent logs under `~/.config/SubMiner/logs/*.log`.
|
||||||
|
- Updated default launcher/app mpv log path to daily file naming: `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log`.
|
||||||
|
- Typecheck green.
|
||||||
|
- Found observability gap: app logger wrote only to stdout/stderr while launcher log file only captured wrapper messages.
|
||||||
|
- Added file sink to `src/logger.ts` so app logs also append to `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (or `SUBMINER_MPV_LOG` when set).
|
||||||
|
- Verified with typecheck + build.
|
||||||
|
|
||||||
|
## Files Touched
|
||||||
|
|
||||||
|
- `src/anki-integration/duplicate.ts`
|
||||||
|
- `src/anki-integration/duplicate.test.ts`
|
||||||
|
- `backlog/tasks/task-94 - Fix-Kiku-duplicate-detection-for-Yomitan-marked-duplicates.md`
|
||||||
|
- `docs/subagents/INDEX.md`
|
||||||
|
- `docs/subagents/collaboration.md`
|
||||||
|
- `docs/subagents/agents/codex-duplicate-kiku-20260221T043006Z-5vkz.md`
|
||||||
|
|
||||||
|
## Handoff
|
||||||
|
|
||||||
|
- No blockers.
|
||||||
|
- Next step: run broader gate (`bun run test:fast`) when ready, then commit.
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
# Agent Log: codex-frequency-dup-log-20260221T042815Z-r4k1
|
||||||
|
|
||||||
|
- alias: codex-frequency-dup-log
|
||||||
|
- mission: reduce frequency dictionary duplicate-term startup log spam; keep useful signal
|
||||||
|
- status: completed
|
||||||
|
- started_utc: 2026-02-21T04:28:15Z
|
||||||
|
- last_update_utc: 2026-02-21T04:32:40Z
|
||||||
|
- planned_files:
|
||||||
|
- src/core/services/frequency-dictionary.ts
|
||||||
|
- src/core/services/frequency-dictionary.test.ts
|
||||||
|
- docs/subagents/INDEX.md
|
||||||
|
- touched_files:
|
||||||
|
- src/core/services/frequency-dictionary.ts
|
||||||
|
- src/core/services/frequency-dictionary.test.ts
|
||||||
|
- docs/subagents/agents/codex-frequency-dup-log-20260221T042815Z-r4k1.md
|
||||||
|
- docs/subagents/INDEX.md
|
||||||
|
- key_decisions:
|
||||||
|
- remove per-entry duplicate term logs
|
||||||
|
- keep one aggregate duplicate summary line per bank file at info level
|
||||||
|
- assumptions:
|
||||||
|
- duplicate entries are expected in source dictionary and should not produce per-entry info logs
|
||||||
|
- verification:
|
||||||
|
- `bun test src/core/services/frequency-dictionary.test.ts` (pass)
|
||||||
|
- full build currently blocked by unrelated Jellyfin WIP type errors on branch
|
||||||
|
- blockers:
|
||||||
|
- unrelated branch state prevents full `bun run build`
|
||||||
|
- next_step:
|
||||||
|
- optional follow-up: add true debug-level logging API if duplicate diagnostics are needed on demand
|
||||||
@@ -25,5 +25,11 @@ Shared notes. Append-only.
|
|||||||
- [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only.
|
- [2026-02-21T04:30:06Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] investigating Kiku duplicate grouping regression; expecting touches in `src/anki-integration/duplicate.ts` and duplicate-detection tests only.
|
||||||
- [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed.
|
- [2026-02-21T04:33:17Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] completed TASK-94: duplicate check now resolves `word`/`expression` alias fields when validating candidate notes; added regression test `src/anki-integration/duplicate.test.ts`; targeted build + duplicate/anki-integration tests passed.
|
||||||
- [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback.
|
- [2026-02-21T04:38:25Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] follow-up repro fixed: duplicate search now queries both alias fields (`word` + `expression`) and unions note ids before exact compare; added second regression test for alias-query fallback.
|
||||||
|
- [2026-02-21T04:48:50Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] second follow-up fix: when source note has both `Expression` and `Word`, duplicate detection now uses both source values (not just first field by order); added regression for mixed-field source candidate scenario.
|
||||||
|
- [2026-02-21T07:23:56Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] third follow-up fix: add collection-wide fallback query pass when deck-scoped duplicate search returns no candidates; added regression for deck-scope miss case.
|
||||||
|
- [2026-02-21T09:25:53Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] fourth follow-up fix: add plain-text query fallback when field-scoped queries miss; keep exact value verification on candidate notes to avoid false positives.
|
||||||
|
- [2026-02-21T09:40:33Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] instrumentation pass: add duplicate-detection debug logs (`[duplicate] query/hits/candidates/exact-match`) to isolate remaining live repro mismatches.
|
||||||
|
- [2026-02-21T09:54:29Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] logging-path update: default persistent logs now target `~/.config/SubMiner/logs/SubMiner-YYYY-MM-DD.log` (launcher + app mpv log default).
|
||||||
|
- [2026-02-21T10:07:58Z] [codex-duplicate-kiku-20260221T043006Z-5vkz|codex-duplicate-kiku] observability fix: app logger now also appends to daily log file, so runtime duplicate traces are available even when overlay stdout is not surfaced in launcher terminal.
|
||||||
- [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path).
|
- [2026-02-21T04:37:48Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] overlap note: touching `src/core/services/mpv.ts` + mpv service tests for startup connection-request log level gating; coordinating with historical TASK-33 behavior (same symptom, new logger path).
|
||||||
- [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass).
|
- [2026-02-21T04:41:15Z] [codex-mpv-connect-log-20260221T043748Z-q7m1|codex-mpv-connect-log] completed TASK-95: changed `MpvIpcClient.connect()` connect-request line to `logger.debug`, added regression tests for info/debug level log behavior in `src/core/services/mpv.test.ts`; verified via `bun run build && node dist/core/services/mpv.test.js` (pass).
|
||||||
|
|||||||
@@ -34,7 +34,13 @@ export const DEFAULT_YOUTUBE_SUBGEN_OUT_DIR = path.join(
|
|||||||
'subminer',
|
'subminer',
|
||||||
'youtube-subs',
|
'youtube-subs',
|
||||||
);
|
);
|
||||||
export const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
|
export const DEFAULT_MPV_LOG_FILE = path.join(
|
||||||
|
os.homedir(),
|
||||||
|
'.config',
|
||||||
|
'SubMiner',
|
||||||
|
'logs',
|
||||||
|
`SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
|
||||||
|
);
|
||||||
export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best';
|
export const DEFAULT_YOUTUBE_YTDL_FORMAT = 'bestvideo*+bestaudio/best';
|
||||||
export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc';
|
export const DEFAULT_JIMAKU_API_BASE_URL = 'https://jimaku.cc';
|
||||||
export const DEFAULT_MPV_SUBMINER_ARGS = [
|
export const DEFAULT_MPV_SUBMINER_ARGS = [
|
||||||
|
|||||||
@@ -970,6 +970,12 @@ export class AnkiIntegration {
|
|||||||
notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
|
notesInfo: async (noteIds) => (await this.client.notesInfo(noteIds)) as unknown,
|
||||||
getDeck: () => this.config.deck,
|
getDeck: () => this.config.deck,
|
||||||
resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
|
resolveFieldName: (info, preferredName) => this.resolveNoteFieldName(info, preferredName),
|
||||||
|
logInfo: (message) => {
|
||||||
|
log.info(message);
|
||||||
|
},
|
||||||
|
logDebug: (message) => {
|
||||||
|
log.debug(message);
|
||||||
|
},
|
||||||
logWarn: (message, error) => {
|
logWarn: (message, error) => {
|
||||||
log.warn(message, (error as Error).message);
|
log.warn(message, (error as Error).message);
|
||||||
},
|
},
|
||||||
|
|||||||
265
src/anki-integration/duplicate.test.ts
Normal file
265
src/anki-integration/duplicate.test.ts
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
import test from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { findDuplicateNote, type NoteInfo } from './duplicate';
|
||||||
|
|
||||||
|
function createFieldResolver(noteInfo: NoteInfo, preferredName: string): string | null {
|
||||||
|
const names = Object.keys(noteInfo.fields);
|
||||||
|
const exact = names.find((name) => name === preferredName);
|
||||||
|
if (exact) return exact;
|
||||||
|
const lower = preferredName.toLowerCase();
|
||||||
|
return names.find((name) => name.toLowerCase() === lower) ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
test('findDuplicateNote matches duplicate when candidate uses alternate word/expression field name', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '食べる' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
|
||||||
|
findNotes: async () => [100, 200],
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Word: { value: '食べる' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote falls back to alias field query when primary field query returns no candidates', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '食べる' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const seenQueries: string[] = [];
|
||||||
|
const duplicateId = await findDuplicateNote('食べる', 100, currentNote, {
|
||||||
|
findNotes: async (query) => {
|
||||||
|
seenQueries.push(query);
|
||||||
|
if (query.includes('"Expression:')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (query.includes('"word:') || query.includes('"Word:') || query.includes('"expression:')) {
|
||||||
|
return [200];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Word: { value: '食べる' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
assert.equal(seenQueries.length, 2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote checks both source expression/word values when both fields are present', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '昨日は雨だった。' },
|
||||||
|
Word: { value: '雨' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const seenQueries: string[] = [];
|
||||||
|
const duplicateId = await findDuplicateNote('昨日は雨だった。', 100, currentNote, {
|
||||||
|
findNotes: async (query) => {
|
||||||
|
seenQueries.push(query);
|
||||||
|
if (query.includes('昨日は雨だった。')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (query.includes('"Word:雨"') || query.includes('"word:雨"') || query.includes('"Expression:雨"')) {
|
||||||
|
return [200];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Word: { value: '雨' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
assert.ok(seenQueries.some((query) => query.includes('昨日は雨だった。')));
|
||||||
|
assert.ok(seenQueries.some((query) => query.includes('雨')));
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote falls back to collection-wide query when deck-scoped query has no matches', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const seenQueries: string[] = [];
|
||||||
|
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
|
||||||
|
findNotes: async (query) => {
|
||||||
|
seenQueries.push(query);
|
||||||
|
if (query.includes('deck:Japanese')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (query.includes('"Expression:貴様"') || query.includes('"Word:貴様"')) {
|
||||||
|
return [200];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
assert.ok(seenQueries.some((query) => query.includes('deck:Japanese')));
|
||||||
|
assert.ok(seenQueries.some((query) => !query.includes('deck:Japanese')));
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote falls back to plain text query when field queries miss', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const seenQueries: string[] = [];
|
||||||
|
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
|
||||||
|
findNotes: async (query) => {
|
||||||
|
seenQueries.push(query);
|
||||||
|
if (query.includes('Expression:') || query.includes('Word:')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (query.includes('"貴様"')) {
|
||||||
|
return [200];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
assert.ok(seenQueries.some((query) => query.includes('Expression:')));
|
||||||
|
assert.ok(seenQueries.some((query) => query.endsWith('"貴様"')));
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote exact compare tolerates furigana bracket markup in candidate field', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
|
||||||
|
findNotes: async () => [200],
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様[きさま]' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote exact compare tolerates html wrappers in candidate field', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const duplicateId = await findDuplicateNote('貴様', 100, currentNote, {
|
||||||
|
findNotes: async () => [200],
|
||||||
|
notesInfo: async () => [
|
||||||
|
{
|
||||||
|
noteId: 200,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '<span data-x="1">貴様</span>' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(duplicateId, 200);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('findDuplicateNote does not disable retries on findNotes calls', async () => {
|
||||||
|
const currentNote: NoteInfo = {
|
||||||
|
noteId: 100,
|
||||||
|
fields: {
|
||||||
|
Expression: { value: '貴様' },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const seenOptions: Array<{ maxRetries?: number } | undefined> = [];
|
||||||
|
await findDuplicateNote('貴様', 100, currentNote, {
|
||||||
|
findNotes: async (_query, options) => {
|
||||||
|
seenOptions.push(options);
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
notesInfo: async () => [],
|
||||||
|
getDeck: () => 'Japanese::Mining',
|
||||||
|
resolveFieldName: (noteInfo, preferredName) => createFieldResolver(noteInfo, preferredName),
|
||||||
|
logWarn: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.ok(seenOptions.length > 0);
|
||||||
|
assert.ok(seenOptions.every((options) => options?.maxRetries !== 0));
|
||||||
|
});
|
||||||
@@ -12,6 +12,8 @@ export interface DuplicateDetectionDeps {
|
|||||||
notesInfo: (noteIds: number[]) => Promise<unknown>;
|
notesInfo: (noteIds: number[]) => Promise<unknown>;
|
||||||
getDeck: () => string | null | undefined;
|
getDeck: () => string | null | undefined;
|
||||||
resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
|
resolveFieldName: (noteInfo: NoteInfo, preferredName: string) => string | null;
|
||||||
|
logInfo?: (message: string) => void;
|
||||||
|
logDebug?: (message: string) => void;
|
||||||
logWarn: (message: string, error: unknown) => void;
|
logWarn: (message: string, error: unknown) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -21,25 +23,68 @@ export async function findDuplicateNote(
|
|||||||
noteInfo: NoteInfo,
|
noteInfo: NoteInfo,
|
||||||
deps: DuplicateDetectionDeps,
|
deps: DuplicateDetectionDeps,
|
||||||
): Promise<number | null> {
|
): Promise<number | null> {
|
||||||
let fieldName = '';
|
const sourceCandidates = getDuplicateSourceCandidates(noteInfo, expression);
|
||||||
for (const name of Object.keys(noteInfo.fields)) {
|
if (sourceCandidates.length === 0) return null;
|
||||||
if (['word', 'expression'].includes(name.toLowerCase()) && noteInfo.fields[name]?.value) {
|
deps.logInfo?.(
|
||||||
fieldName = name;
|
`[duplicate] start expr="${expression}" sourceCandidates=${sourceCandidates
|
||||||
break;
|
.map((entry) => `${entry.fieldName}:${entry.value}`)
|
||||||
}
|
.join('|')}`,
|
||||||
}
|
);
|
||||||
if (!fieldName) return null;
|
|
||||||
|
|
||||||
const escapedFieldName = escapeAnkiSearchValue(fieldName);
|
const deckValue = deps.getDeck();
|
||||||
const escapedExpression = escapeAnkiSearchValue(expression);
|
const queryPrefixes = deckValue
|
||||||
const deckPrefix = deps.getDeck() ? `"deck:${escapeAnkiSearchValue(deps.getDeck()!)}" ` : '';
|
? [`"deck:${escapeAnkiSearchValue(deckValue)}" `, '']
|
||||||
const query = `${deckPrefix}"${escapedFieldName}:${escapedExpression}"`;
|
: [''];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const noteIds = (await deps.findNotes(query, {
|
const noteIds = new Set<number>();
|
||||||
maxRetries: 0,
|
const executedQueries = new Set<string>();
|
||||||
})) as number[];
|
for (const queryPrefix of queryPrefixes) {
|
||||||
return await findFirstExactDuplicateNoteId(noteIds, excludeNoteId, fieldName, expression, deps);
|
for (const sourceCandidate of sourceCandidates) {
|
||||||
|
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
|
||||||
|
const queryFieldNames = getDuplicateCandidateFieldNames(sourceCandidate.fieldName);
|
||||||
|
for (const queryFieldName of queryFieldNames) {
|
||||||
|
const escapedFieldName = escapeAnkiSearchValue(queryFieldName);
|
||||||
|
const query = `${queryPrefix}"${escapedFieldName}:${escapedExpression}"`;
|
||||||
|
if (executedQueries.has(query)) continue;
|
||||||
|
executedQueries.add(query);
|
||||||
|
const results = (await deps.findNotes(query)) as number[];
|
||||||
|
deps.logDebug?.(
|
||||||
|
`[duplicate] query(field)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
|
||||||
|
);
|
||||||
|
for (const noteId of results) {
|
||||||
|
noteIds.add(noteId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (noteIds.size > 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noteIds.size === 0) {
|
||||||
|
for (const queryPrefix of queryPrefixes) {
|
||||||
|
for (const sourceCandidate of sourceCandidates) {
|
||||||
|
const escapedExpression = escapeAnkiSearchValue(sourceCandidate.value);
|
||||||
|
const query = `${queryPrefix}"${escapedExpression}"`;
|
||||||
|
if (executedQueries.has(query)) continue;
|
||||||
|
executedQueries.add(query);
|
||||||
|
const results = (await deps.findNotes(query)) as number[];
|
||||||
|
deps.logDebug?.(
|
||||||
|
`[duplicate] query(text)="${query}" hits=${Array.isArray(results) ? results.length : 0}`,
|
||||||
|
);
|
||||||
|
for (const noteId of results) {
|
||||||
|
noteIds.add(noteId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (noteIds.size > 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return await findFirstExactDuplicateNoteId(
|
||||||
|
noteIds,
|
||||||
|
excludeNoteId,
|
||||||
|
sourceCandidates.map((candidate) => candidate.value),
|
||||||
|
deps,
|
||||||
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
deps.logWarn('Duplicate search failed:', error);
|
deps.logWarn('Duplicate search failed:', error);
|
||||||
return null;
|
return null;
|
||||||
@@ -47,18 +92,25 @@ export async function findDuplicateNote(
|
|||||||
}
|
}
|
||||||
|
|
||||||
function findFirstExactDuplicateNoteId(
|
function findFirstExactDuplicateNoteId(
|
||||||
candidateNoteIds: number[],
|
candidateNoteIds: Iterable<number>,
|
||||||
excludeNoteId: number,
|
excludeNoteId: number,
|
||||||
fieldName: string,
|
sourceValues: string[],
|
||||||
expression: string,
|
|
||||||
deps: DuplicateDetectionDeps,
|
deps: DuplicateDetectionDeps,
|
||||||
): Promise<number | null> {
|
): Promise<number | null> {
|
||||||
const candidates = candidateNoteIds.filter((id) => id !== excludeNoteId);
|
const candidates = Array.from(candidateNoteIds).filter((id) => id !== excludeNoteId);
|
||||||
|
deps.logDebug?.(`[duplicate] candidateIds=${candidates.length} exclude=${excludeNoteId}`);
|
||||||
if (candidates.length === 0) {
|
if (candidates.length === 0) {
|
||||||
|
deps.logInfo?.('[duplicate] no candidates after query + exclude');
|
||||||
|
return Promise.resolve(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizedValues = new Set(
|
||||||
|
sourceValues.map((value) => normalizeDuplicateValue(value)).filter((value) => value.length > 0),
|
||||||
|
);
|
||||||
|
if (normalizedValues.size === 0) {
|
||||||
return Promise.resolve(null);
|
return Promise.resolve(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
const normalizedExpression = normalizeDuplicateValue(expression);
|
|
||||||
const chunkSize = 50;
|
const chunkSize = 50;
|
||||||
return (async () => {
|
return (async () => {
|
||||||
for (let i = 0; i < candidates.length; i += chunkSize) {
|
for (let i = 0; i < candidates.length; i += chunkSize) {
|
||||||
@@ -66,20 +118,72 @@ function findFirstExactDuplicateNoteId(
|
|||||||
const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
|
const notesInfoResult = (await deps.notesInfo(chunk)) as unknown[];
|
||||||
const notesInfo = notesInfoResult as NoteInfo[];
|
const notesInfo = notesInfoResult as NoteInfo[];
|
||||||
for (const noteInfo of notesInfo) {
|
for (const noteInfo of notesInfo) {
|
||||||
const resolvedField = deps.resolveFieldName(noteInfo, fieldName);
|
const candidateFieldNames = ['word', 'expression'];
|
||||||
|
for (const candidateFieldName of candidateFieldNames) {
|
||||||
|
const resolvedField = deps.resolveFieldName(noteInfo, candidateFieldName);
|
||||||
if (!resolvedField) continue;
|
if (!resolvedField) continue;
|
||||||
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
|
const candidateValue = noteInfo.fields[resolvedField]?.value || '';
|
||||||
if (normalizeDuplicateValue(candidateValue) === normalizedExpression) {
|
if (normalizedValues.has(normalizeDuplicateValue(candidateValue))) {
|
||||||
|
deps.logDebug?.(
|
||||||
|
`[duplicate] exact-match noteId=${noteInfo.noteId} field=${resolvedField}`,
|
||||||
|
);
|
||||||
|
deps.logInfo?.(`[duplicate] matched noteId=${noteInfo.noteId} field=${resolvedField}`);
|
||||||
return noteInfo.noteId;
|
return noteInfo.noteId;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
deps.logInfo?.('[duplicate] no exact match in candidate notes');
|
||||||
return null;
|
return null;
|
||||||
})();
|
})();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getDuplicateCandidateFieldNames(fieldName: string): string[] {
|
||||||
|
const candidates = [fieldName];
|
||||||
|
const lower = fieldName.toLowerCase();
|
||||||
|
if (lower === 'word') {
|
||||||
|
candidates.push('expression');
|
||||||
|
} else if (lower === 'expression') {
|
||||||
|
candidates.push('word');
|
||||||
|
}
|
||||||
|
return candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDuplicateSourceCandidates(
|
||||||
|
noteInfo: NoteInfo,
|
||||||
|
fallbackExpression: string,
|
||||||
|
): Array<{ fieldName: string; value: string }> {
|
||||||
|
const candidates: Array<{ fieldName: string; value: string }> = [];
|
||||||
|
const dedupeKey = new Set<string>();
|
||||||
|
|
||||||
|
for (const fieldName of Object.keys(noteInfo.fields)) {
|
||||||
|
const lower = fieldName.toLowerCase();
|
||||||
|
if (lower !== 'word' && lower !== 'expression') continue;
|
||||||
|
const value = noteInfo.fields[fieldName]?.value?.trim() ?? '';
|
||||||
|
if (!value) continue;
|
||||||
|
const key = `${lower}:${normalizeDuplicateValue(value)}`;
|
||||||
|
if (dedupeKey.has(key)) continue;
|
||||||
|
dedupeKey.add(key);
|
||||||
|
candidates.push({ fieldName, value });
|
||||||
|
}
|
||||||
|
|
||||||
|
const trimmedFallback = fallbackExpression.trim();
|
||||||
|
if (trimmedFallback.length > 0) {
|
||||||
|
const fallbackKey = `expression:${normalizeDuplicateValue(trimmedFallback)}`;
|
||||||
|
if (!dedupeKey.has(fallbackKey)) {
|
||||||
|
candidates.push({ fieldName: 'expression', value: trimmedFallback });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return candidates;
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeDuplicateValue(value: string): string {
|
function normalizeDuplicateValue(value: string): string {
|
||||||
return value.replace(/\s+/g, ' ').trim();
|
return value
|
||||||
|
.replace(/<[^>]*>/g, '')
|
||||||
|
.replace(/([^\s\[\]]+)\[[^\]]*\]/g, '$1')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeAnkiSearchValue(value: string): string {
|
function escapeAnkiSearchValue(value: string): string {
|
||||||
|
|||||||
@@ -48,3 +48,34 @@ test('createFrequencyDictionaryLookup continues with no-op lookup when search pa
|
|||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a single summary', async () => {
|
||||||
|
const logs: string[] = [];
|
||||||
|
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||||
|
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||||
|
fs.writeFileSync(
|
||||||
|
bankPath,
|
||||||
|
JSON.stringify([
|
||||||
|
['猫', 1, { frequency: { displayValue: 100 } }],
|
||||||
|
['猫', 2, { frequency: { displayValue: 120 } }],
|
||||||
|
['猫', 3, { frequency: { displayValue: 110 } }],
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
|
||||||
|
const lookup = await createFrequencyDictionaryLookup({
|
||||||
|
searchPaths: [tempDir],
|
||||||
|
log: (message) => {
|
||||||
|
logs.push(message);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(lookup('猫'), 100);
|
||||||
|
assert.equal(
|
||||||
|
logs.filter((entry) => entry.includes('Frequency dictionary ignored 2 duplicate term entries')).length,
|
||||||
|
1,
|
||||||
|
);
|
||||||
|
assert.equal(
|
||||||
|
logs.some((entry) => entry.includes('Frequency dictionary duplicate term')),
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|||||||
@@ -62,12 +62,12 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
|
|||||||
function addEntriesToMap(
|
function addEntriesToMap(
|
||||||
rawEntries: unknown,
|
rawEntries: unknown,
|
||||||
terms: Map<string, number>,
|
terms: Map<string, number>,
|
||||||
log: (message: string) => void,
|
): { duplicateCount: number } {
|
||||||
): void {
|
|
||||||
if (!Array.isArray(rawEntries)) {
|
if (!Array.isArray(rawEntries)) {
|
||||||
return;
|
return { duplicateCount: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let duplicateCount = 0;
|
||||||
for (const rawEntry of rawEntries) {
|
for (const rawEntry of rawEntries) {
|
||||||
const entry = asFrequencyDictionaryEntry(rawEntry);
|
const entry = asFrequencyDictionaryEntry(rawEntry);
|
||||||
if (!entry) {
|
if (!entry) {
|
||||||
@@ -79,10 +79,10 @@ function addEntriesToMap(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
log(
|
duplicateCount += 1;
|
||||||
`Frequency dictionary duplicate term ${entry.term} with weaker rank ${entry.rank}; keeping ${currentRank}.`,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return { duplicateCount };
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectDictionaryFromPath(
|
function collectDictionaryFromPath(
|
||||||
@@ -124,7 +124,14 @@ function collectDictionaryFromPath(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const beforeSize = terms.size;
|
const beforeSize = terms.size;
|
||||||
addEntriesToMap(rawEntries, terms, log);
|
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
|
||||||
|
if (duplicateCount > 0) {
|
||||||
|
log(
|
||||||
|
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
|
||||||
|
duplicateCount === 1 ? 'y' : 'ies'
|
||||||
|
} in ${bankPath} (kept strongest rank per term).`,
|
||||||
|
);
|
||||||
|
}
|
||||||
if (terms.size === beforeSize) {
|
if (terms.size === beforeSize) {
|
||||||
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
|
log(`Frequency dictionary file contained no extractable entries: ${bankPath}`);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
import fs from 'node:fs';
|
||||||
|
import os from 'node:os';
|
||||||
|
import path from 'node:path';
|
||||||
|
|
||||||
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
||||||
export type LogLevelSource = 'cli' | 'config';
|
export type LogLevelSource = 'cli' | 'config';
|
||||||
|
|
||||||
@@ -107,6 +111,25 @@ function safeStringify(value: unknown): string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resolveLogFilePath(): string {
|
||||||
|
const envPath = process.env.SUBMINER_MPV_LOG?.trim();
|
||||||
|
if (envPath) {
|
||||||
|
return envPath;
|
||||||
|
}
|
||||||
|
const date = new Date().toISOString().slice(0, 10);
|
||||||
|
return path.join(os.homedir(), '.config', 'SubMiner', 'logs', `SubMiner-${date}.log`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function appendToLogFile(line: string): void {
|
||||||
|
try {
|
||||||
|
const logPath = resolveLogFilePath();
|
||||||
|
fs.mkdirSync(path.dirname(logPath), { recursive: true });
|
||||||
|
fs.appendFileSync(logPath, `${line}\n`, { encoding: 'utf8' });
|
||||||
|
} catch {
|
||||||
|
// never break runtime due to logging sink failures
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
|
function emit(level: LogLevel, scope: string, message: string, meta: unknown[]): void {
|
||||||
const minLevel = resolveMinLevel();
|
const minLevel = resolveMinLevel();
|
||||||
if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
|
if (LEVEL_PRIORITY[level] < LEVEL_PRIORITY[minLevel]) {
|
||||||
@@ -127,6 +150,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
|
|||||||
} else {
|
} else {
|
||||||
console.info(prefix);
|
console.info(prefix);
|
||||||
}
|
}
|
||||||
|
appendToLogFile(prefix);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,6 +166,7 @@ function emit(level: LogLevel, scope: string, message: string, meta: unknown[]):
|
|||||||
} else {
|
} else {
|
||||||
console.info(finalMessage);
|
console.info(finalMessage);
|
||||||
}
|
}
|
||||||
|
appendToLogFile(finalMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createLogger(scope: string): Logger {
|
export function createLogger(scope: string): Logger {
|
||||||
|
|||||||
@@ -487,7 +487,13 @@ if (process.platform === 'linux') {
|
|||||||
app.setName('SubMiner');
|
app.setName('SubMiner');
|
||||||
|
|
||||||
const DEFAULT_TEXTHOOKER_PORT = 5174;
|
const DEFAULT_TEXTHOOKER_PORT = 5174;
|
||||||
const DEFAULT_MPV_LOG_FILE = path.join(os.homedir(), '.cache', 'SubMiner', 'mp.log');
|
const DEFAULT_MPV_LOG_FILE = path.join(
|
||||||
|
os.homedir(),
|
||||||
|
'.config',
|
||||||
|
'SubMiner',
|
||||||
|
'logs',
|
||||||
|
`SubMiner-${new Date().toISOString().slice(0, 10)}.log`,
|
||||||
|
);
|
||||||
const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
|
const ANILIST_SETUP_CLIENT_ID_URL = 'https://anilist.co/api/v2/oauth/authorize';
|
||||||
const ANILIST_SETUP_RESPONSE_TYPE = 'token';
|
const ANILIST_SETUP_RESPONSE_TYPE = 'token';
|
||||||
const ANILIST_DEFAULT_CLIENT_ID = '36084';
|
const ANILIST_DEFAULT_CLIENT_ID = '36084';
|
||||||
|
|||||||
Reference in New Issue
Block a user