mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 12:11:28 -07:00
fix(subtitle): restore known and JLPT token annotations
This commit is contained in:
@@ -360,10 +360,7 @@ test('getTrendsDashboard returns chart-ready aggregated series', () => {
|
||||
assert.equal(dashboard.activity.watchTime[0]?.value, 30);
|
||||
assert.equal(dashboard.progress.watchTime[1]?.value, 75);
|
||||
assert.equal(dashboard.progress.lookups[1]?.value, 18);
|
||||
assert.equal(
|
||||
dashboard.ratios.lookupsPerHundred[0]?.value,
|
||||
+((8 / 120) * 100).toFixed(1),
|
||||
);
|
||||
assert.equal(dashboard.ratios.lookupsPerHundred[0]?.value, +((8 / 120) * 100).toFixed(1));
|
||||
assert.equal(dashboard.animePerDay.watchTime[0]?.animeTitle, 'Trend Dashboard Anime');
|
||||
assert.equal(dashboard.animeCumulative.watchTime[1]?.value, 75);
|
||||
assert.equal(
|
||||
@@ -2025,6 +2022,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
|
||||
SET
|
||||
ended_at_ms = ?,
|
||||
status = 2,
|
||||
ended_media_ms = ?,
|
||||
active_watched_ms = ?,
|
||||
cards_mined = ?,
|
||||
tokens_seen = ?,
|
||||
@@ -2034,9 +2032,9 @@ test('anime/media detail and episode queries use ended-session metrics when tele
|
||||
WHERE session_id = ?
|
||||
`,
|
||||
);
|
||||
updateSession.run(1_001_000, 3_000, 1, 10, 4, 3, now, s1);
|
||||
updateSession.run(1_011_000, 4_000, 2, 20, 5, 4, now, s2);
|
||||
updateSession.run(1_021_000, 5_000, 3, 30, 6, 5, now, s3);
|
||||
updateSession.run(1_001_000, 2_500, 3_000, 1, 10, 4, 3, now, s1);
|
||||
updateSession.run(1_011_000, 6_000, 4_000, 2, 20, 5, 4, now, s2);
|
||||
updateSession.run(1_021_000, 8_000, 5_000, 3, 30, 6, 5, now, s3);
|
||||
|
||||
const animeDetail = getAnimeDetail(db, animeId);
|
||||
assert.ok(animeDetail);
|
||||
@@ -2047,6 +2045,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
|
||||
assert.deepEqual(
|
||||
episodes.map((row) => ({
|
||||
videoId: row.videoId,
|
||||
endedMediaMs: row.endedMediaMs,
|
||||
totalSessions: row.totalSessions,
|
||||
totalActiveMs: row.totalActiveMs,
|
||||
totalCards: row.totalCards,
|
||||
@@ -2055,6 +2054,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
|
||||
[
|
||||
{
|
||||
videoId: episodeOne,
|
||||
endedMediaMs: 6_000,
|
||||
totalSessions: 2,
|
||||
totalActiveMs: 7_000,
|
||||
totalCards: 3,
|
||||
@@ -2062,6 +2062,7 @@ test('anime/media detail and episode queries use ended-session metrics when tele
|
||||
},
|
||||
{
|
||||
videoId: episodeTwo,
|
||||
endedMediaMs: 8_000,
|
||||
totalSessions: 1,
|
||||
totalActiveMs: 5_000,
|
||||
totalCards: 3,
|
||||
|
||||
@@ -818,7 +818,10 @@ function accumulatePoints(points: TrendChartPoint[]): TrendChartPoint[] {
|
||||
}
|
||||
|
||||
function buildAggregatedTrendRows(rollups: ImmersionSessionRollupRow[]) {
|
||||
const byKey = new Map<number, { activeMin: number; cards: number; words: number; sessions: number }>();
|
||||
const byKey = new Map<
|
||||
number,
|
||||
{ activeMin: number; cards: number; words: number; sessions: number }
|
||||
>();
|
||||
|
||||
for (const rollup of rollups) {
|
||||
const existing = byKey.get(rollup.rollupDayOrMonth) ?? {
|
||||
@@ -894,14 +897,8 @@ function buildLookupsPerHundredWords(sessions: TrendSessionMetricRow[]): TrendCh
|
||||
|
||||
for (const session of sessions) {
|
||||
const epochDay = Math.floor(session.startedAtMs / 86_400_000);
|
||||
lookupsByDay.set(
|
||||
epochDay,
|
||||
(lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount,
|
||||
);
|
||||
wordsByDay.set(
|
||||
epochDay,
|
||||
(wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session),
|
||||
);
|
||||
lookupsByDay.set(epochDay, (lookupsByDay.get(epochDay) ?? 0) + session.yomitanLookupCount);
|
||||
wordsByDay.set(epochDay, (wordsByDay.get(epochDay) ?? 0) + getTrendSessionWordCount(session));
|
||||
}
|
||||
|
||||
return Array.from(lookupsByDay.entries())
|
||||
@@ -1005,8 +1002,13 @@ function buildCumulativePerAnime(points: TrendPerAnimePoint[]): TrendPerAnimePoi
|
||||
return result;
|
||||
}
|
||||
|
||||
function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>): Map<number, string> {
|
||||
const uniqueIds = [...new Set(videoIds.filter((value): value is number => typeof value === 'number'))];
|
||||
function getVideoAnimeTitleMap(
|
||||
db: DatabaseSync,
|
||||
videoIds: Array<number | null>,
|
||||
): Map<number, string> {
|
||||
const uniqueIds = [
|
||||
...new Set(videoIds.filter((value): value is number => typeof value === 'number')),
|
||||
];
|
||||
if (uniqueIds.length === 0) {
|
||||
return new Map();
|
||||
}
|
||||
@@ -1027,7 +1029,10 @@ function getVideoAnimeTitleMap(db: DatabaseSync, videoIds: Array<number | null>)
|
||||
return new Map(rows.map((row) => [row.videoId, row.animeTitle]));
|
||||
}
|
||||
|
||||
function resolveVideoAnimeTitle(videoId: number | null, titlesByVideoId: Map<number, string>): string {
|
||||
function resolveVideoAnimeTitle(
|
||||
videoId: number | null,
|
||||
titlesByVideoId: Map<number, string>,
|
||||
): string {
|
||||
if (videoId === null) {
|
||||
return 'Unknown';
|
||||
}
|
||||
@@ -1087,7 +1092,9 @@ function buildEpisodesPerAnimeFromDailyRollups(
|
||||
return result;
|
||||
}
|
||||
|
||||
function buildEpisodesPerDayFromDailyRollups(rollups: ImmersionSessionRollupRow[]): TrendChartPoint[] {
|
||||
function buildEpisodesPerDayFromDailyRollups(
|
||||
rollups: ImmersionSessionRollupRow[],
|
||||
): TrendChartPoint[] {
|
||||
const byDay = new Map<number, Set<number>>();
|
||||
|
||||
for (const rollup of rollups) {
|
||||
@@ -1147,7 +1154,9 @@ function buildNewWordsPerDay(db: DatabaseSync, cutoffMs: number | null): TrendCh
|
||||
ORDER BY epochDay ASC
|
||||
`);
|
||||
|
||||
const rows = (cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))) as Array<{
|
||||
const rows = (
|
||||
cutoffMs === null ? prepared.all() : prepared.all(Math.floor(cutoffMs / 1000))
|
||||
) as Array<{
|
||||
epochDay: number;
|
||||
wordCount: number;
|
||||
}>;
|
||||
@@ -1186,10 +1195,8 @@ export function getTrendsDashboard(
|
||||
|
||||
const animePerDay = {
|
||||
episodes: buildEpisodesPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId),
|
||||
watchTime: buildPerAnimeFromDailyRollups(
|
||||
dailyRollups,
|
||||
titlesByVideoId,
|
||||
(rollup) => Math.round(rollup.totalActiveMin),
|
||||
watchTime: buildPerAnimeFromDailyRollups(dailyRollups, titlesByVideoId, (rollup) =>
|
||||
Math.round(rollup.totalActiveMin),
|
||||
),
|
||||
cards: buildPerAnimeFromDailyRollups(
|
||||
dailyRollups,
|
||||
@@ -1201,10 +1208,7 @@ export function getTrendsDashboard(
|
||||
titlesByVideoId,
|
||||
(rollup) => rollup.totalTokensSeen,
|
||||
),
|
||||
lookups: buildPerAnimeFromSessions(
|
||||
sessions,
|
||||
(session) => session.yomitanLookupCount,
|
||||
),
|
||||
lookups: buildPerAnimeFromSessions(sessions, (session) => session.yomitanLookupCount),
|
||||
lookupsPerHundred: buildLookupsPerHundredPerAnime(sessions),
|
||||
};
|
||||
|
||||
@@ -1740,6 +1744,14 @@ export function getAnimeEpisodes(db: DatabaseSync, animeId: number): AnimeEpisod
|
||||
v.parsed_season AS season,
|
||||
v.parsed_episode AS episode,
|
||||
v.duration_ms AS durationMs,
|
||||
(
|
||||
SELECT s_recent.ended_media_ms
|
||||
FROM imm_sessions s_recent
|
||||
WHERE s_recent.video_id = v.video_id
|
||||
AND s_recent.ended_at_ms IS NOT NULL
|
||||
ORDER BY s_recent.ended_at_ms DESC, s_recent.session_id DESC
|
||||
LIMIT 1
|
||||
) AS endedMediaMs,
|
||||
v.watched AS watched,
|
||||
COUNT(DISTINCT s.session_id) AS totalSessions,
|
||||
COALESCE(SUM(COALESCE(asm.activeWatchedMs, s.active_watched_ms, 0)), 0) AS totalActiveMs,
|
||||
|
||||
@@ -39,6 +39,7 @@ export function finalizeSessionRecord(
|
||||
SET
|
||||
ended_at_ms = ?,
|
||||
status = ?,
|
||||
ended_media_ms = ?,
|
||||
total_watched_ms = ?,
|
||||
active_watched_ms = ?,
|
||||
lines_seen = ?,
|
||||
@@ -58,6 +59,7 @@ export function finalizeSessionRecord(
|
||||
).run(
|
||||
endedAtMs,
|
||||
SESSION_STATUS_ENDED,
|
||||
sessionState.lastMediaMs,
|
||||
sessionState.totalWatchedMs,
|
||||
sessionState.activeWatchedMs,
|
||||
sessionState.linesSeen,
|
||||
|
||||
@@ -740,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('finalize session persists ended media position', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
|
||||
canonicalTitle: 'Slice A Ended Media',
|
||||
sourcePath: '/tmp/slice-a-ended-media.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const startedAtMs = 1_234_567_000;
|
||||
const endedAtMs = startedAtMs + 8_500;
|
||||
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
|
||||
state.lastMediaMs = 91_000;
|
||||
|
||||
finalizeSessionRecord(db, state, endedAtMs);
|
||||
|
||||
const row = db
|
||||
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
|
||||
.get(sessionId) as {
|
||||
ended_media_ms: number | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(row);
|
||||
assert.equal(row?.ended_media_ms, 91_000);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('executeQueuedWrite inserts event and telemetry rows', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
@@ -569,6 +569,7 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
status INTEGER NOT NULL,
|
||||
locale_id INTEGER, target_lang_id INTEGER,
|
||||
difficulty_tier INTEGER, subtitle_mode INTEGER,
|
||||
ended_media_ms INTEGER,
|
||||
total_watched_ms INTEGER NOT NULL DEFAULT 0,
|
||||
active_watched_ms INTEGER NOT NULL DEFAULT 0,
|
||||
lines_seen INTEGER NOT NULL DEFAULT 0,
|
||||
@@ -1026,6 +1027,10 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
`);
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < 15) {
|
||||
addColumnIfMissing(db, 'imm_sessions', 'ended_media_ms', 'INTEGER');
|
||||
}
|
||||
|
||||
ensureLifetimeSummaryTables(db);
|
||||
|
||||
db.exec(`
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export const SCHEMA_VERSION = 14;
|
||||
export const SCHEMA_VERSION = 15;
|
||||
export const DEFAULT_QUEUE_CAP = 1_000;
|
||||
export const DEFAULT_BATCH_SIZE = 25;
|
||||
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
|
||||
@@ -482,6 +482,7 @@ export interface AnimeEpisodeRow {
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
durationMs: number;
|
||||
endedMediaMs: number | null;
|
||||
watched: number;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
|
||||
@@ -55,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
|
||||
assert.equal(surfaceResult[0]?.isKnown, false);
|
||||
});
|
||||
|
||||
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '大体',
|
||||
headword: '大体',
|
||||
reading: 'だいたい',
|
||||
frequencyRank: 1895,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'だいたい',
|
||||
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.jlptLevel, 'N4');
|
||||
assert.equal(result[0]?.frequencyRank, 1895);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
|
||||
@@ -560,12 +560,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
const candidates = [
|
||||
resolveJlptLookupText(token),
|
||||
token.surface,
|
||||
token.reading,
|
||||
token.headword,
|
||||
].filter(
|
||||
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
@@ -659,7 +654,16 @@ function computeTokenKnownStatus(
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
): boolean {
|
||||
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
|
||||
return token.isKnown || (matchText ? isKnownWord(matchText) : false);
|
||||
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedReading = token.reading.trim();
|
||||
if (!normalizedReading) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
|
||||
}
|
||||
|
||||
function filterTokenFrequencyRank(
|
||||
|
||||
Reference in New Issue
Block a user