fix(subtitle): restore known and JLPT token annotations

This commit is contained in:
2026-03-19 18:03:20 -07:00
parent 1b5f0c6999
commit 43a0d11446
16 changed files with 258 additions and 43 deletions

View File

@@ -360,10 +360,7 @@ test('getTrendsDashboard returns chart-ready aggregated series', () => {
assert.equal(dashboard.activity.watchTime[0]?.value, 30);
assert.equal(dashboard.progress.watchTime[1]?.value, 75);
assert.equal(dashboard.progress.lookups[1]?.value, 18);
assert.equal(
dashboard.ratios.lookupsPerHundred[0]?.value,
+((8 / 120) * 100).toFixed(1),
);
assert.equal(dashboard.ratios.lookupsPerHundred[0]?.value, +((8 / 120) * 100).toFixed(1));
assert.equal(dashboard.animePerDay.watchTime[0]?.animeTitle, 'Trend Dashboard Anime');
assert.equal(dashboard.animeCumulative.watchTime[1]?.value, 75);
assert.equal(
@@ -2025,6 +2022,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
SET
ended_at_ms = ?,
status = 2,
ended_media_ms = ?,
active_watched_ms = ?,
cards_mined = ?,
tokens_seen = ?,
@@ -2034,9 +2032,9 @@ test('anime/media detail and episode queries use ended-session metrics when tele
WHERE session_id = ?
`,
);
updateSession.run(1_001_000, 3_000, 1, 10, 4, 3, now, s1);
updateSession.run(1_011_000, 4_000, 2, 20, 5, 4, now, s2);
updateSession.run(1_021_000, 5_000, 3, 30, 6, 5, now, s3);
updateSession.run(1_001_000, 2_500, 3_000, 1, 10, 4, 3, now, s1);
updateSession.run(1_011_000, 6_000, 4_000, 2, 20, 5, 4, now, s2);
updateSession.run(1_021_000, 8_000, 5_000, 3, 30, 6, 5, now, s3);
const animeDetail = getAnimeDetail(db, animeId);
assert.ok(animeDetail);
@@ -2047,6 +2045,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
assert.deepEqual(
episodes.map((row) => ({
videoId: row.videoId,
endedMediaMs: row.endedMediaMs,
totalSessions: row.totalSessions,
totalActiveMs: row.totalActiveMs,
totalCards: row.totalCards,
@@ -2055,6 +2054,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
[
{
videoId: episodeOne,
endedMediaMs: 6_000,
totalSessions: 2,
totalActiveMs: 7_000,
totalCards: 3,
@@ -2062,6 +2062,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
},
{
videoId: episodeTwo,
endedMediaMs: 8_000,
totalSessions: 1,
totalActiveMs: 5_000,
totalCards: 3,

View File

@@ -818,7 +818,10 @@ function accumulatePoints(points: TrendChartPoint[]): TrendChartPoint[] {
}
function buildAggregatedTrendRows(rollups: ImmersionSessionRollupRow[]) {
const byKey = new Map<number, { activeMin: number; cards: number; words: number; sessions: number }>();
const byKey = new Map<
number,
{ activeMin: number; cards: number; words: number; sessions: number }
>();
for (const rollup of rollups) {
const existing = byKey.get(rollup.rollupDayOrMonth) ?? {
@@ -894,14 +897,8 @@ function buildLookupsPerHundredWords(sessions: TrendSessionMetricRow[]): TrendCh
for (const session of sessions) {
const epochDay = Math.floor(session.startedAtMs / 86_400_000);
lookupsByDay.set(
epochDay,
(lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount,
);
wordsByDay.set(
epochDay,
(wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session),
);
lookupsByDay.set(epochDay, (lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount);
wordsByDay.set(epochDay, (wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session));
}
return Array.from(lookupsByDay.entries())
@@ -1005,8 +1002,13 @@ function buildCumulativePerAnime(points: TrendPerAnimePoint[]): TrendPerAnimePoi
return result;
}
function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>): Map<number, string> {
const uniqueIds = [...new Set(videoIds.filter((value): value is number => typeof value === 'number'))];
function getVideoAnimeTitleMap(
db: DatabaseSync,
videoIds: Array<number | null>,
): Map<number, string> {
const uniqueIds = [
...new Set(videoIds.filter((value): value is number => typeof value === 'number')),
];
if (uniqueIds.length === 0) {
return new Map();
}
@@ -1027,7 +1029,10 @@ function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>)
return new Map(rows.map((row) => [row.videoId, row.animeTitle]));
}
function resolveVideoAnimeTitle(videoId: number | null, titlesByVideoId: Map<number, string>): string {
function resolveVideoAnimeTitle(
videoId: number | null,
titlesByVideoId: Map<number, string>,
): string {
if (videoId === null) {
return 'Unknown';
}
@@ -1087,7 +1092,9 @@ function buildEpisodesPerAnimeFromDailyRollups(
return result;
}
function buildEpisodesPerDayFromDailyRollups(rollups: ImmersionSessionRollupRow[]): TrendChartPoint[] {
function buildEpisodesPerDayFromDailyRollups(
rollups: ImmersionSessionRollupRow[],
): TrendChartPoint[] {
const byDay = new Map<number, Set<number>>();
for (const rollup of rollups) {
@@ -1147,7 +1154,9 @@ function buildNewWordsPerDay(db: DatabaseSync, cutoffMs: number | null): TrendCh
ORDER BY epochDay ASC
`);
const rows = (cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))) as Array<{
const rows = (
cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))
) as Array<{
epochDay: number;
wordCount: number;
}>;
@@ -1186,10 +1195,8 @@ export function getTrendsDashboard(
const animePerDay = {
episodes: buildEpisodesPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId),
watchTime: buildPerAnimeFromDailyRollups(
dailyRollups,
titlesByVideoId,
(rollup) => Math.round(rollup.totalActiveMin),
watchTime: buildPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId, (rollup) =>
Math.round(rollup.totalActiveMin),
),
cards: buildPerAnimeFromDailyRollups(
dailyRollups,
@@ -1201,10 +1208,7 @@ export function getTrendsDashboard(
titlesByVideoId,
(rollup) => rollup.totalTokensSeen,
),
lookups: buildPerAnimeFromSessions(
sessions,
(session) => session.yomitanLookupCount,
),
lookups: buildPerAnimeFromSessions(sessions, (session) => session.yomitanLookupCount),
lookupsPerHundred: buildLookupsPerHundredPerAnime(sessions),
};
@@ -1740,6 +1744,14 @@ export function getAnimeEpisodes(db: DatabaseSync, animeId: number): AnimeEpisod
v.parsed_season AS season,
v.parsed_episode AS episode,
v.duration_ms AS durationMs,
(
SELECT s_recent.ended_media_ms
FROM imm_sessions s_recent
WHERE s_recent.video_id = v.video_id
AND s_recent.ended_at_ms IS NOT NULL
ORDER BY s_recent.ended_at_ms DESC, s_recent.session_id DESC
LIMIT 1
) AS endedMediaMs,
v.watched AS watched,
COUNT(DISTINCT s.session_id) AS totalSessions,
COALESCE(SUM(COALESCE(asm.activeWatchedMs, s.active_watched_ms, 0)), 0) AS totalActiveMs,

View File

@@ -39,6 +39,7 @@ export function finalizeSessionRecord(
SET
ended_at_ms = ?,
status = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
@@ -58,6 +59,7 @@ export function finalizeSessionRecord(
).run(
endedAtMs,
SESSION_STATUS_ENDED,
sessionState.lastMediaMs,
sessionState.totalWatchedMs,
sessionState.activeWatchedMs,
sessionState.linesSeen,

View File

@@ -740,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
}
});
test('finalize session persists ended media position', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
canonicalTitle: 'Slice A Ended Media',
sourcePath: '/tmp/slice-a-ended-media.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const startedAtMs = 1_234_567_000;
const endedAtMs = startedAtMs + 8_500;
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
state.lastMediaMs = 91_000;
finalizeSessionRecord(db, state, endedAtMs);
const row = db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as {
ended_media_ms: number | null;
} | null;
assert.ok(row);
assert.equal(row?.ended_media_ms, 91_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('executeQueuedWrite inserts event and telemetry rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);

View File

@@ -569,6 +569,7 @@ export function ensureSchema(db: DatabaseSync): void {
status INTEGER NOT NULL,
locale_id INTEGER, target_lang_id INTEGER,
difficulty_tier INTEGER, subtitle_mode INTEGER,
ended_media_ms INTEGER,
total_watched_ms INTEGER NOT NULL DEFAULT 0,
active_watched_ms INTEGER NOT NULL DEFAULT 0,
lines_seen INTEGER NOT NULL DEFAULT 0,
@@ -1026,6 +1027,10 @@ export function ensureSchema(db: DatabaseSync): void {
`);
}
if (currentVersion?.schema_version && currentVersion.schema_version < 15) {
addColumnIfMissing(db, 'imm_sessions', 'ended_media_ms', 'INTEGER');
}
ensureLifetimeSummaryTables(db);
db.exec(`

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 14;
export const SCHEMA_VERSION = 15;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -482,6 +482,7 @@ export interface AnimeEpisodeRow {
season: number | null;
episode: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
totalSessions: number;
totalActiveMs: number;

View File

@@ -55,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
assert.equal(surfaceResult[0]?.isKnown, false);
});
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
const tokens = [
makeToken({
surface: '大体',
headword: '大体',
reading: 'だいたい',
frequencyRank: 1895,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'だいたい',
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
}),
);
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.jlptLevel, 'N4');
assert.equal(result[0]?.frequencyRank, 1895);
});
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
const tokens = [
makeToken({

View File

@@ -560,12 +560,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return false;
}
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.reading,
token.headword,
].filter(
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -659,7 +654,16 @@ function computeTokenKnownStatus(
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
return true;
}
const normalizedReading = token.reading.trim();
if (!normalizedReading) {
return false;
}
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
}
function filterTokenFrequencyRank(