Persist stats exclusions in DB and fix word metrics filtering

- Stats vocabulary exclusions stored in `imm_stats_excluded_words` (schema v18); seeded from localStorage on first load
- Session, overview, trends, and library word metrics use filtered persisted occurrences with raw fallback
- Session known-word % chart uses filtered persisted totals as denominator for both known and total
- JLPT subtitle styling changed to underline-only; no longer overrides text color
This commit is contained in:
2026-05-03 19:40:54 -07:00
parent db30c61327
commit 25d0aa47db
32 changed files with 1541 additions and 211 deletions
+79 -16
View File
@@ -20,6 +20,12 @@ type StatsServerNoteInfo = {
fields: Record<string, { value: string }>;
};
type StatsExcludedWordPayload = {
headword: string;
word: string;
reading: string;
};
function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number {
if (raw === undefined) return fallback;
const n = Number(raw);
@@ -49,6 +55,23 @@ function parseEventTypesQuery(raw: string | undefined): number[] | undefined {
return parsed.length > 0 ? parsed : undefined;
}
function parseExcludedWordsBody(body: unknown): StatsExcludedWordPayload[] | null {
if (!body || typeof body !== 'object' || !Array.isArray((body as { words?: unknown }).words)) {
return null;
}
const words: StatsExcludedWordPayload[] = [];
for (const row of (body as { words: unknown[] }).words) {
if (!row || typeof row !== 'object') return null;
const { headword, word, reading } = row as Record<string, unknown>;
if (typeof headword !== 'string' || typeof word !== 'string' || typeof reading !== 'string') {
return null;
}
words.push({ headword, word, reading });
}
return words;
}
function resolveStatsNoteFieldName(
noteInfo: StatsServerNoteInfo,
...preferredNames: (string | undefined)[]
@@ -161,6 +184,21 @@ function toKnownWordRate(knownWordsSeen: number, tokensSeen: number): number {
return Number(((knownWordsSeen / tokensSeen) * 100).toFixed(1));
}
function summarizeFilteredWordOccurrences(
wordsByLine: Array<{ lineIndex: number; headword: string; occurrenceCount: number }>,
knownWordsSet: Set<string>,
): { knownWordsSeen: number; totalWordsSeen: number } {
let knownWordsSeen = 0;
let totalWordsSeen = 0;
for (const row of wordsByLine) {
totalWordsSeen += row.occurrenceCount;
if (knownWordsSet.has(row.headword)) {
knownWordsSeen += row.occurrenceCount;
}
}
return { knownWordsSeen, totalWordsSeen };
}
async function enrichSessionsWithKnownWordMetrics(
tracker: ImmersionTrackerService,
sessions: Array<{
@@ -188,21 +226,21 @@ async function enrichSessionsWithKnownWordMetrics(
const enriched = await Promise.all(
sessions.map(async (session) => {
let knownWordsSeen = 0;
let totalWordsSeen = 0;
try {
const wordsByLine = await tracker.getSessionWordsByLine(session.sessionId);
for (const row of wordsByLine) {
if (knownWordsSet.has(row.headword)) {
knownWordsSeen += row.occurrenceCount;
}
}
const summary = summarizeFilteredWordOccurrences(wordsByLine, knownWordsSet);
knownWordsSeen = summary.knownWordsSeen;
totalWordsSeen = summary.totalWordsSeen;
} catch {
knownWordsSeen = 0;
totalWordsSeen = 0;
}
return {
...session,
knownWordsSeen,
knownWordRate: toKnownWordRate(knownWordsSeen, session.tokensSeen),
knownWordRate: toKnownWordRate(knownWordsSeen, totalWordsSeen),
};
}),
);
@@ -391,32 +429,45 @@ export function createStatsApp(
const id = parseIntQuery(c.req.param('id'), 0);
if (id <= 0) return c.json([], 400);
const knownWordsSet = loadKnownWordsSet(options?.knownWordCachePath);
if (!knownWordsSet) return c.json([]);
const knownWordsSet = loadKnownWordsSet(options?.knownWordCachePath) ?? new Set<string>();
// Get per-line word occurrences for the session.
const wordsByLine = await tracker.getSessionWordsByLine(id);
// Build cumulative known-word occurrence count per recorded line index.
// Build cumulative filtered occurrence counts per recorded line index.
// The stats UI uses line-count progress to align this series with the session
// timeline, so preserve the stored line position rather than compressing gaps.
const lineGroups = new Map<number, number>();
const totalLineGroups = new Map<number, number>();
const knownLineGroups = new Map<number, number>();
for (const row of wordsByLine) {
if (!knownWordsSet.has(row.headword)) {
continue;
totalLineGroups.set(
row.lineIndex,
(totalLineGroups.get(row.lineIndex) ?? 0) + row.occurrenceCount,
);
if (knownWordsSet.has(row.headword)) {
knownLineGroups.set(
row.lineIndex,
(knownLineGroups.get(row.lineIndex) ?? 0) + row.occurrenceCount,
);
}
lineGroups.set(row.lineIndex, (lineGroups.get(row.lineIndex) ?? 0) + row.occurrenceCount);
}
const sortedLineIndices = [...lineGroups.keys()].sort((a, b) => a - b);
const sortedLineIndices = [...totalLineGroups.keys()].sort((a, b) => a - b);
let knownWordsSeen = 0;
const knownByLinesSeen: Array<{ linesSeen: number; knownWordsSeen: number }> = [];
let totalWordsSeen = 0;
const knownByLinesSeen: Array<{
linesSeen: number;
knownWordsSeen: number;
totalWordsSeen: number;
}> = [];
for (const lineIdx of sortedLineIndices) {
knownWordsSeen += lineGroups.get(lineIdx)!;
knownWordsSeen += knownLineGroups.get(lineIdx) ?? 0;
totalWordsSeen += totalLineGroups.get(lineIdx)!;
knownByLinesSeen.push({
linesSeen: lineIdx,
knownWordsSeen,
totalWordsSeen,
});
}
@@ -430,6 +481,18 @@ export function createStatsApp(
return c.json(vocab);
});
app.get('/api/stats/excluded-words', async (c) => {
return c.json(await tracker.getStatsExcludedWords());
});
app.put('/api/stats/excluded-words', async (c) => {
const body = await c.req.json().catch(() => null);
const words = parseExcludedWordsBody(body);
if (!words) return c.body(null, 400);
await tracker.replaceStatsExcludedWords(words);
return c.json({ ok: true });
});
app.get('/api/stats/vocabulary/occurrences', async (c) => {
const headword = (c.req.query('headword') ?? '').trim();
const word = (c.req.query('word') ?? '').trim();