fix(subtitle): restore known and JLPT token annotations

This commit is contained in:
2026-03-19 18:03:20 -07:00
parent 1b5f0c6999
commit 43a0d11446
16 changed files with 258 additions and 43 deletions

View File

@@ -0,0 +1,70 @@
---
id: TASK-202
title: Use ended session media position for anime episode progress
status: Done
assignee:
- Codex
created_date: '2026-03-19 14:55'
updated_date: '2026-03-19 17:36'
labels:
- stats
- ui
- bug
milestone: m-1
dependencies: []
references:
- stats/src/components/anime/EpisodeList.tsx
- stats/src/types/stats.ts
- src/core/services/immersion-tracker/session.ts
- src/core/services/immersion-tracker/query.ts
- src/core/services/immersion-tracker/storage.ts
priority: medium
ordinal: 105720
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
The anime episode list currently computes the `Progress` column from cumulative `totalActiveMs / durationMs`, which can exceed the intended watch-position meaning after rewatches or repeated sessions. Persist the playback position at the time a session ends and drive episode progress from that stored stop position instead.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 Session finalization persists the playback position reached when the session ended.
- [x] #2 Anime episode queries expose the most recent ended-session media position for each episode.
- [x] #3 Episode-list progress renders from ended media position instead of cumulative active watch time.
- [x] #4 Regression coverage locks storage/query/UI behavior for the new progress source.
<!-- AC:END -->
## Implementation Plan
<!-- SECTION:PLAN:BEGIN -->
1. Add failing regression coverage for persisted ended media position and episode progress rendering.
2. Add `ended_media_ms` to the immersion-session schema and persist `lastMediaMs` when ending a session.
3. Thread the new field through episode queries/types and render episode progress from `endedMediaMs / durationMs`.
4. Run targeted verification plus typecheck, then record the outcome.
<!-- SECTION:PLAN:END -->
## Outcome
<!-- SECTION:OUTCOME:BEGIN -->
Added nullable `ended_media_ms` storage to immersion sessions, persisted `lastMediaMs` when sessions finalize, and exposed the most recent ended-session media position through anime episode queries/types. The anime episode list now renders `Progress` from `endedMediaMs / durationMs` instead of cumulative active watch time, so rewatches no longer inflate the displayed percentage.
Verification:
- `bun test src/core/services/immersion-tracker/storage-session.test.ts`
- `bun test src/core/services/immersion-tracker/__tests__/query.test.ts`
- `bun test stats/src/lib/yomitan-lookup.test.tsx stats/src/lib/stats-ui-navigation.test.tsx`
- `bun run typecheck`
- `bun run changelog:lint`
- `bun x prettier --check 'src/core/services/immersion-tracker/types.ts' 'src/core/services/immersion-tracker/storage.ts' 'src/core/services/immersion-tracker/session.ts' 'src/core/services/immersion-tracker/query.ts' 'src/core/services/immersion-tracker/storage-session.test.ts' 'src/core/services/immersion-tracker/__tests__/query.test.ts' 'stats/src/types/stats.ts' 'stats/src/components/anime/EpisodeList.tsx' 'stats/src/lib/yomitan-lookup.test.tsx' 'stats/src/lib/stats-ui-navigation.test.tsx' 'backlog/tasks/task-202 - Use-ended-session-media-position-for-anime-episode-progress.md' 'changes/2026-03-19-stats-ended-media-progress.md'`
- `bash .agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh --lane core 'src/core/services/immersion-tracker/types.ts' 'src/core/services/immersion-tracker/storage.ts' 'src/core/services/immersion-tracker/session.ts' 'src/core/services/immersion-tracker/query.ts' 'src/core/services/immersion-tracker/storage-session.test.ts' 'src/core/services/immersion-tracker/__tests__/query.test.ts' 'stats/src/types/stats.ts' 'stats/src/components/anime/EpisodeList.tsx' 'stats/src/lib/yomitan-lookup.test.tsx' 'stats/src/lib/stats-ui-navigation.test.tsx' 'backlog/tasks/task-202 - Use-ended-session-media-position-for-anime-episode-progress.md' 'changes/2026-03-19-stats-ended-media-progress.md'`
- Verifier artifacts: `.tmp/skill-verification/subminer-verify-20260319-173511-AV7kUg/`
<!-- SECTION:OUTCOME:END -->

View File

@@ -0,0 +1,47 @@
---
id: TASK-203
title: Restore known and JLPT annotation for reading-mismatch subtitle tokens
status: Done
assignee:
- Codex
created_date: '2026-03-19 18:25'
updated_date: '2026-03-19 18:25'
labels:
- subtitle
- bug
dependencies: []
references:
- src/core/services/tokenizer/annotation-stage.ts
- src/core/services/tokenizer/annotation-stage.test.ts
priority: medium
ordinal: 105721
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Some subtitle tokens lose both known-word coloring and JLPT underline even though the popup resolves a valid dictionary term. Repro example: `大体` in `大体 僕だって困ってたんですよ!` can be known via kana-only Anki data (`だいたい`) while JLPT lookup should still resolve from the kanji surface/headword.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 Subtitle annotation can mark a token known via its reading when the configured headword/surface lookup misses.
- [x] #2 JLPT eligibility no longer drops valid kanji terms just because their reading contains repeated kana patterns.
- [x] #3 Regression coverage locks the combined known + JLPT case for `大体`.
<!-- AC:END -->
## Outcome
<!-- SECTION:OUTCOME:BEGIN -->
Known-word annotation now falls back to the token reading after the configured headword/surface lookup misses, so kana-only known-card entries still light up matching subtitle tokens. JLPT eligibility now ignores repeated-kana noise checks on the reading when a real surface/headword is present, which preserves JLPT tagging for words like `大体`.
Verification:
- `bun test src/core/services/tokenizer/annotation-stage.test.ts`
<!-- SECTION:OUTCOME:END -->

View File

@@ -0,0 +1,4 @@
type: fixed
area: subtitle
- Restored known-word coloring and JLPT underlines for subtitle tokens like `大体` when the subtitle token is kanji but the known-word cache only matches the kana reading.

View File

@@ -0,0 +1,4 @@
type: fixed
area: stats
- Episode progress in the anime page now uses the last ended playback position instead of cumulative active watch time, avoiding distorted percentages after rewatches or repeated sessions.

View File

@@ -360,10 +360,7 @@ test('getTrendsDashboard returns chart-ready aggregated series', () => {
assert.equal(dashboard.activity.watchTime[0]?.value, 30);
assert.equal(dashboard.progress.watchTime[1]?.value, 75);
assert.equal(dashboard.progress.lookups[1]?.value, 18);
assert.equal(
dashboard.ratios.lookupsPerHundred[0]?.value,
+((8 / 120) * 100).toFixed(1),
);
assert.equal(dashboard.ratios.lookupsPerHundred[0]?.value, +((8 / 120) * 100).toFixed(1));
assert.equal(dashboard.animePerDay.watchTime[0]?.animeTitle, 'Trend Dashboard Anime');
assert.equal(dashboard.animeCumulative.watchTime[1]?.value, 75);
assert.equal(
@@ -2025,6 +2022,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
SET
ended_at_ms = ?,
status = 2,
ended_media_ms = ?,
active_watched_ms = ?,
cards_mined = ?,
tokens_seen = ?,
@@ -2034,9 +2032,9 @@ test('anime/media detail and episode queries use ended-session metrics when tele
WHERE session_id = ?
`,
);
updateSession.run(1_001_000, 3_000, 1, 10, 4, 3, now, s1);
updateSession.run(1_011_000, 4_000, 2, 20, 5, 4, now, s2);
updateSession.run(1_021_000, 5_000, 3, 30, 6, 5, now, s3);
updateSession.run(1_001_000, 2_500, 3_000, 1, 10, 4, 3, now, s1);
updateSession.run(1_011_000, 6_000, 4_000, 2, 20, 5, 4, now, s2);
updateSession.run(1_021_000, 8_000, 5_000, 3, 30, 6, 5, now, s3);
const animeDetail = getAnimeDetail(db, animeId);
assert.ok(animeDetail);
@@ -2047,6 +2045,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
assert.deepEqual(
episodes.map((row) => ({
videoId: row.videoId,
endedMediaMs: row.endedMediaMs,
totalSessions: row.totalSessions,
totalActiveMs: row.totalActiveMs,
totalCards: row.totalCards,
@@ -2055,6 +2054,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
[
{
videoId: episodeOne,
endedMediaMs: 6_000,
totalSessions: 2,
totalActiveMs: 7_000,
totalCards: 3,
@@ -2062,6 +2062,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
},
{
videoId: episodeTwo,
endedMediaMs: 8_000,
totalSessions: 1,
totalActiveMs: 5_000,
totalCards: 3,

View File

@@ -818,7 +818,10 @@ function accumulatePoints(points: TrendChartPoint[]): TrendChartPoint[] {
}
function buildAggregatedTrendRows(rollups: ImmersionSessionRollupRow[]) {
const byKey = new Map<number, { activeMin: number; cards: number; words: number; sessions: number }>();
const byKey = new Map<
number,
{ activeMin: number; cards: number; words: number; sessions: number }
>();
for (const rollup of rollups) {
const existing = byKey.get(rollup.rollupDayOrMonth) ?? {
@@ -894,14 +897,8 @@ function buildLookupsPerHundredWords(sessions: TrendSessionMetricRow[]): TrendCh
for (const session of sessions) {
const epochDay = Math.floor(session.startedAtMs / 86_400_000);
lookupsByDay.set(
epochDay,
(lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount,
);
wordsByDay.set(
epochDay,
(wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session),
);
lookupsByDay.set(epochDay, (lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount);
wordsByDay.set(epochDay, (wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session));
}
return Array.from(lookupsByDay.entries())
@@ -1005,8 +1002,13 @@ function buildCumulativePerAnime(points: TrendPerAnimePoint[]): TrendPerAnimePoi
return result;
}
function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>): Map<number, string> {
const uniqueIds = [...new Set(videoIds.filter((value): value is number => typeof value === 'number'))];
function getVideoAnimeTitleMap(
db: DatabaseSync,
videoIds: Array<number | null>,
): Map<number, string> {
const uniqueIds = [
...new Set(videoIds.filter((value): value is number => typeof value === 'number')),
];
if (uniqueIds.length === 0) {
return new Map();
}
@@ -1027,7 +1029,10 @@ function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>)
return new Map(rows.map((row) => [row.videoId, row.animeTitle]));
}
function resolveVideoAnimeTitle(videoId: number | null, titlesByVideoId: Map<number, string>): string {
function resolveVideoAnimeTitle(
videoId: number | null,
titlesByVideoId: Map<number, string>,
): string {
if (videoId === null) {
return 'Unknown';
}
@@ -1087,7 +1092,9 @@ function buildEpisodesPerAnimeFromDailyRollups(
return result;
}
function buildEpisodesPerDayFromDailyRollups(rollups: ImmersionSessionRollupRow[]): TrendChartPoint[] {
function buildEpisodesPerDayFromDailyRollups(
rollups: ImmersionSessionRollupRow[],
): TrendChartPoint[] {
const byDay = new Map<number, Set<number>>();
for (const rollup of rollups) {
@@ -1147,7 +1154,9 @@ function buildNewWordsPerDay(db: DatabaseSync, cutoffMs: number | null): TrendCh
ORDER BY epochDay ASC
`);
const rows = (cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))) as Array<{
const rows = (
cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))
) as Array<{
epochDay: number;
wordCount: number;
}>;
@@ -1186,10 +1195,8 @@ export function getTrendsDashboard(
const animePerDay = {
episodes: buildEpisodesPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId),
watchTime: buildPerAnimeFromDailyRollups(
dailyRollups,
titlesByVideoId,
(rollup) => Math.round(rollup.totalActiveMin),
watchTime: buildPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId, (rollup) =>
Math.round(rollup.totalActiveMin),
),
cards: buildPerAnimeFromDailyRollups(
dailyRollups,
@@ -1201,10 +1208,7 @@ export function getTrendsDashboard(
titlesByVideoId,
(rollup) => rollup.totalTokensSeen,
),
lookups: buildPerAnimeFromSessions(
sessions,
(session) => session.yomitanLookupCount,
),
lookups: buildPerAnimeFromSessions(sessions, (session) => session.yomitanLookupCount),
lookupsPerHundred: buildLookupsPerHundredPerAnime(sessions),
};
@@ -1740,6 +1744,14 @@ export function getAnimeEpisodes(db: DatabaseSync, animeId: number): AnimeEpisod
v.parsed_season AS season,
v.parsed_episode AS episode,
v.duration_ms AS durationMs,
(
SELECT s_recent.ended_media_ms
FROM imm_sessions s_recent
WHERE s_recent.video_id = v.video_id
AND s_recent.ended_at_ms IS NOT NULL
ORDER BY s_recent.ended_at_ms DESC, s_recent.session_id DESC
LIMIT 1
) AS endedMediaMs,
v.watched AS watched,
COUNT(DISTINCT s.session_id) AS totalSessions,
COALESCE(SUM(COALESCE(asm.activeWatchedMs, s.active_watched_ms, 0)), 0) AS totalActiveMs,

View File

@@ -39,6 +39,7 @@ export function finalizeSessionRecord(
SET
ended_at_ms = ?,
status = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
@@ -58,6 +59,7 @@ export function finalizeSessionRecord(
).run(
endedAtMs,
SESSION_STATUS_ENDED,
sessionState.lastMediaMs,
sessionState.totalWatchedMs,
sessionState.activeWatchedMs,
sessionState.linesSeen,

View File

@@ -740,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
}
});
test('finalize session persists ended media position', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
canonicalTitle: 'Slice A Ended Media',
sourcePath: '/tmp/slice-a-ended-media.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const startedAtMs = 1_234_567_000;
const endedAtMs = startedAtMs + 8_500;
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
state.lastMediaMs = 91_000;
finalizeSessionRecord(db, state, endedAtMs);
const row = db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as {
ended_media_ms: number | null;
} | null;
assert.ok(row);
assert.equal(row?.ended_media_ms, 91_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('executeQueuedWrite inserts event and telemetry rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);

View File

@@ -569,6 +569,7 @@ export function ensureSchema(db: DatabaseSync): void {
status INTEGER NOT NULL,
locale_id INTEGER, target_lang_id INTEGER,
difficulty_tier INTEGER, subtitle_mode INTEGER,
ended_media_ms INTEGER,
total_watched_ms INTEGER NOT NULL DEFAULT 0,
active_watched_ms INTEGER NOT NULL DEFAULT 0,
lines_seen INTEGER NOT NULL DEFAULT 0,
@@ -1026,6 +1027,10 @@ export function ensureSchema(db: DatabaseSync): void {
`);
}
if (currentVersion?.schema_version && currentVersion.schema_version < 15) {
addColumnIfMissing(db, 'imm_sessions', 'ended_media_ms', 'INTEGER');
}
ensureLifetimeSummaryTables(db);
db.exec(`

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 14;
export const SCHEMA_VERSION = 15;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -482,6 +482,7 @@ export interface AnimeEpisodeRow {
season: number | null;
episode: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
totalSessions: number;
totalActiveMs: number;

View File

@@ -55,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
assert.equal(surfaceResult[0]?.isKnown, false);
});
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
const tokens = [
makeToken({
surface: '大体',
headword: '大体',
reading: 'だいたい',
frequencyRank: 1895,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'だいたい',
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
}),
);
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.jlptLevel, 'N4');
assert.equal(result[0]?.frequencyRank, 1895);
});
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
const tokens = [
makeToken({

View File

@@ -560,12 +560,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return false;
}
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.reading,
token.headword,
].filter(
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -659,7 +654,16 @@ function computeTokenKnownStatus(
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
return true;
}
const normalizedReading = token.reading.trim();
if (!normalizedReading) {
return false;
}
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
}
function filterTokenFrequencyRank(

View File

@@ -82,6 +82,10 @@ export function EpisodeList({
ep.totalYomitanLookupCount,
ep.totalTokensSeen,
);
const progressPct =
ep.durationMs > 0 && ep.endedMediaMs != null
? Math.min(100, Math.round((ep.endedMediaMs / ep.durationMs) * 100))
: null;
return (
<Fragment key={ep.videoId}>
@@ -99,17 +103,17 @@ export function EpisodeList({
{ep.canonicalTitle}
</td>
<td className="py-2 pr-3 text-right">
{ep.durationMs > 0 ? (
{progressPct != null ? (
<span
className={
ep.totalActiveMs >= ep.durationMs * 0.85
progressPct >= 85
? 'text-ctp-green'
: ep.totalActiveMs >= ep.durationMs * 0.5
: progressPct >= 50
? 'text-ctp-peach'
: 'text-ctp-overlay2'
}
>
{Math.min(100, Math.round((ep.totalActiveMs / ep.durationMs) * 100))}%
{progressPct}%
</span>
) : (
<span className="text-ctp-overlay2">{'\u2014'}</span>

View File

@@ -21,6 +21,7 @@ test('EpisodeList renders explicit episode detail button alongside quick peek ro
episode: 9,
season: 1,
durationMs: 1,
endedMediaMs: null,
watched: 0,
canonicalTitle: 'Episode 9',
totalSessions: 1,

View File

@@ -87,11 +87,12 @@ test('EpisodeList renders per-episode Yomitan lookup rate', () => {
videoId: 9,
episode: 9,
season: 1,
durationMs: 1,
durationMs: 100,
endedMediaMs: 6,
watched: 0,
canonicalTitle: 'Episode 9',
totalSessions: 1,
totalActiveMs: 1,
totalActiveMs: 90,
totalCards: 1,
totalTokensSeen: 350,
totalYomitanLookupCount: 7,
@@ -103,6 +104,8 @@ test('EpisodeList renders per-episode Yomitan lookup rate', () => {
assert.match(markup, /Lookup Rate/);
assert.match(markup, /2\.0 \/ 100 tokens/);
assert.match(markup, /6%/);
assert.doesNotMatch(markup, /90%/);
});
test('AnimeOverviewStats renders aggregate Yomitan lookup metrics', () => {

View File

@@ -212,6 +212,7 @@ export interface AnimeEpisode {
episode: number | null;
season: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
canonicalTitle: string;
totalSessions: number;