feat(stats): speed up session maintenance and improve stats UI (#111)

This commit is contained in:
2026-06-08 02:20:52 -07:00
committed by GitHub
parent e6a16a069b
commit 311f1e8ee5
108 changed files with 7441 additions and 729 deletions
File diff suppressed because it is too large Load Diff
@@ -34,13 +34,13 @@ test('guessAnilistMediaInfo fills missing guessit episode from filename parser',
});
});
test('guessAnilistMediaInfo ignores low-confidence parser details when guessit omits them', async () => {
test('guessAnilistMediaInfo keeps season directory scope when guessit omits details', async () => {
const result = await guessAnilistMediaInfo('/tmp/Season 2/Guessit Title.mkv', null, {
runGuessit: async () => JSON.stringify({ title: 'Guessit Title' }),
});
assert.deepEqual(result, {
title: 'Guessit Title',
season: null,
season: 2,
episode: null,
source: 'guessit',
});
+1 -1
View File
@@ -292,7 +292,7 @@ export async function guessAnilistMediaInfo(
title: buildGuessitTitle(title, alternativeTitle),
...(alternativeTitle ? { alternativeTitle } : {}),
...(year ? { year } : {}),
season: season ?? (canUseFallbackDetails ? fallback.season : null),
season: season ?? fallback.season,
episode: episode ?? (canUseFallbackDetails ? fallback.episode : null),
source: 'guessit',
};
@@ -6,6 +6,7 @@ import path from 'node:path';
import { toMonthKey } from './immersion-tracker/maintenance';
import { enqueueWrite } from './immersion-tracker/queue';
import { toDbTimestamp } from './immersion-tracker/query-shared';
import { repairJellyfinStreamVideoLinks } from './immersion-tracker/jellyfin-link-repair';
import { Database, type DatabaseSync } from './immersion-tracker/sqlite';
import { nowMs as trackerNowMs } from './immersion-tracker/time';
import {
@@ -1164,6 +1165,54 @@ test('recordSubtitleLine leaves session token counts at zero when tokenization i
}
});
test('recordSubtitleLine skips invalid cue timing and still stores the later valid cue', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
tracker.handleMediaChange('/tmp/timing.mkv', 'Timing');
tracker.recordSubtitleLine('same subtitle', 953.991, 953.891);
tracker.recordSubtitleLine('same subtitle', 953.991, 956.56);
const privateApi = tracker as unknown as {
flushTelemetry: (force?: boolean) => void;
flushNow: () => void;
};
privateApi.flushTelemetry(true);
privateApi.flushNow();
const db = new Database(dbPath);
const rows = db
.prepare(
`SELECT line_index, segment_start_ms, segment_end_ms, text
FROM imm_subtitle_lines
ORDER BY line_id ASC`,
)
.all() as Array<{
line_index: number;
segment_start_ms: number | null;
segment_end_ms: number | null;
text: string;
}>;
db.close();
assert.deepEqual(rows, [
{
line_index: 1,
segment_start_ms: 953991,
segment_end_ms: 956560,
text: 'same subtitle',
},
]);
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('subtitle-line event payload omits duplicated subtitle text', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
@@ -1470,7 +1519,7 @@ test('handleMediaChange links parsed anime metadata on the active video row', as
assert.equal(row?.parsed_season, 2);
assert.equal(row?.parsed_episode, 5);
assert.ok(row?.parser_source === 'guessit' || row?.parser_source === 'fallback');
assert.equal(row?.anime_title, 'Little Witch Academia');
assert.equal(row?.anime_title, 'Little Witch Academia Season 2');
assert.equal(row?.anilist_id, null);
} finally {
tracker?.destroy();
@@ -1535,13 +1584,13 @@ test('handleMediaChange reuses the same provisional anime row across matching fi
{
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
parsedEpisode: 5,
animeTitle: 'Little Witch Academia',
animeTitle: 'Little Witch Academia Season 2',
anilistId: null,
},
{
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
parsedEpisode: 6,
animeTitle: 'Little Witch Academia',
animeTitle: 'Little Witch Academia Season 2',
anilistId: null,
},
],
@@ -1552,6 +1601,81 @@ test('handleMediaChange reuses the same provisional anime row across matching fi
}
});
test('handleMediaChange splits matching parsed titles across distinct seasons', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
tracker.handleMediaChange('/tmp/KonoSuba/Season 1/KonoSuba S01E05.mkv', 'Episode 5');
await waitForPendingAnimeMetadata(tracker);
tracker.handleMediaChange('/tmp/KonoSuba/Season 2/KonoSuba S02E05.mkv', 'Episode 5');
await waitForPendingAnimeMetadata(tracker);
const privateApi = tracker as unknown as {
db: DatabaseSync;
};
const rows = privateApi.db
.prepare(
`
SELECT
v.source_path,
v.anime_id,
v.parsed_season,
a.canonical_title AS anime_title,
a.normalized_title_key
FROM imm_videos v
LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.source_path IN (?, ?)
ORDER BY v.source_path
`,
)
.all(
'/tmp/KonoSuba/Season 1/KonoSuba S01E05.mkv',
'/tmp/KonoSuba/Season 2/KonoSuba S02E05.mkv',
) as Array<{
source_path: string | null;
anime_id: number | null;
parsed_season: number | null;
anime_title: string | null;
normalized_title_key: string | null;
}>;
assert.equal(rows.length, 2);
assert.ok(rows[0]?.anime_id);
assert.ok(rows[1]?.anime_id);
assert.notEqual(rows[0]?.anime_id, rows[1]?.anime_id);
assert.deepEqual(
rows.map((row) => ({
sourcePath: row.source_path,
parsedSeason: row.parsed_season,
animeTitle: row.anime_title,
normalizedTitleKey: row.normalized_title_key,
})),
[
{
sourcePath: '/tmp/KonoSuba/Season 1/KonoSuba S01E05.mkv',
parsedSeason: 1,
animeTitle: 'KonoSuba Season 1',
normalizedTitleKey: 'konosuba season 1',
},
{
sourcePath: '/tmp/KonoSuba/Season 2/KonoSuba S02E05.mkv',
parsedSeason: 2,
animeTitle: 'KonoSuba Season 2',
normalizedTitleKey: 'konosuba season 2',
},
],
);
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('Jellyfin playback metadata links stream videos to existing series title', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
@@ -1595,8 +1719,41 @@ test('Jellyfin playback metadata links stream videos to existing series title',
'http://jellyfin.local/Videos/item-2/stream?static=true&api_key=token&MediaSourceId=ms-1&StartTimeTicks=12000000',
'The Beginning After the End S02E02 The Princess Begins Adventuring',
);
tracker.handleMediaChange(null, null);
tracker.recordJellyfinPlaybackMetadata({
mediaPath:
'http://jellyfin.local/Videos/item-3/stream?static=true&api_key=token&MediaSourceId=ms-2',
displayTitle: 'The Beginning After the End S02E03 Dragon Has Left the Building',
itemTitle: 'Dragon Has Left the Building',
seriesTitle: 'The Beginning After the End',
seasonNumber: 2,
episodeNumber: 3,
itemId: 'item-3',
});
tracker.handleMediaChange(
'http://jellyfin.local/Videos/item-3/stream?static=true&api_key=token&MediaSourceId=ms-2&AudioStreamIndex=3&SubtitleStreamIndex=4',
'The Beginning After the End S02E03 Dragon Has Left the Building',
);
await waitForPendingAnimeMetadata(tracker);
const privateApi = tracker as unknown as { db: DatabaseSync };
const videoRows = privateApi.db
.prepare(
`
SELECT source_url, canonical_title AS video_title
FROM imm_videos
ORDER BY video_id
`,
)
.all() as Array<{ source_url: string | null; video_title: string }>;
assert.equal(videoRows.length, 3);
assert.equal(
videoRows.some(
(row) => row.source_url?.includes('api_key=') || row.video_title.includes('api_key='),
),
false,
);
const rows = privateApi.db
.prepare(
`
@@ -1623,7 +1780,7 @@ test('Jellyfin playback metadata links stream videos to existing series title',
anime_title: string;
}>;
assert.equal(rows.length, 2);
assert.equal(rows.length, 3);
assert.equal(new Set(rows.map((row) => row.anime_title)).size, 1);
const jellyfinRow = rows.find(
(row) => row.source_url === 'jellyfin://jellyfin.local/item/item-2',
@@ -1637,7 +1794,250 @@ test('Jellyfin playback metadata links stream videos to existing series title',
assert.equal(jellyfinRow.parsed_season, 2);
assert.equal(jellyfinRow.parsed_episode, 2);
assert.equal(jellyfinRow.parser_source, 'jellyfin');
assert.equal(jellyfinRow.anime_title, 'The Beginning After the End');
assert.equal(jellyfinRow.anime_title, 'The Beginning After the End Season 2');
const streamVariantRow = rows.find(
(row) => row.source_url === 'jellyfin://jellyfin.local/item/item-3',
);
assert.ok(streamVariantRow);
assert.equal(
streamVariantRow.video_title,
'The Beginning After the End S02E03 Dragon Has Left the Building',
);
assert.equal(streamVariantRow.source_url?.includes('api_key='), false);
assert.equal(streamVariantRow.video_title.includes('api_key='), false);
assert.equal(streamVariantRow.video_title.includes('stream?'), false);
assert.equal(streamVariantRow.parsed_title, 'The Beginning After the End');
assert.equal(streamVariantRow.parsed_season, 2);
assert.equal(streamVariantRow.parsed_episode, 3);
assert.equal(streamVariantRow.parser_source, 'jellyfin');
assert.equal(streamVariantRow.anime_title, 'The Beginning After the End Season 2');
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('startup repairs existing Jellyfin stream video links to metadata rows', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
const streamUrl =
'http://jellyfin.local/Videos/item-9/stream?static=true&api_key=secret-token&MediaSourceId=ms-1&AudioStreamIndex=3&SubtitleStreamIndex=4';
tracker.handleMediaChange(
streamUrl,
'stream?static=true&api_key=secret-token&MediaSourceId=ms-1&AudioStreamIndex=3&SubtitleStreamIndex=4',
);
tracker.handleMediaChange(null, null);
const titledStreamUrl =
'http://jellyfin.local/Videos/item-10/stream?static=true&api_key=secret-token&MediaSourceId=ms-2';
tracker.handleMediaChange(titledStreamUrl, 'KonoSuba S01E06 Decision! Class Rep');
tracker.handleMediaChange(null, null);
tracker.recordJellyfinPlaybackMetadata({
mediaPath: 'http://jellyfin.local/Videos/item-9/stream?static=true&api_key=secret-token',
displayTitle: 'Frieren S01E09 Aura the Guillotine',
itemTitle: 'Aura the Guillotine',
seriesTitle: 'Frieren',
seasonNumber: 1,
episodeNumber: 9,
itemId: 'item-9',
});
tracker.destroy();
tracker = null;
tracker = new Ctor({ dbPath });
const privateApi = tracker as unknown as { db: DatabaseSync };
const videoRows = privateApi.db
.prepare(
`
SELECT
video_id,
video_key,
source_url,
canonical_title,
parser_source,
parsed_basename,
parsed_title,
parse_metadata_json
FROM imm_videos
ORDER BY video_id
`,
)
.all() as Array<{
video_id: number;
video_key: string;
source_url: string | null;
canonical_title: string;
parser_source: string | null;
parsed_basename: string | null;
parsed_title: string | null;
parse_metadata_json: string | null;
}>;
assert.equal(videoRows.length, 3);
const frierenRows = videoRows.filter(
(row) => row.source_url === 'jellyfin://jellyfin.local/item/item-9',
);
assert.equal(frierenRows.length, 2);
for (const row of frierenRows) {
assert.equal(row.source_url, 'jellyfin://jellyfin.local/item/item-9');
assert.equal(row.canonical_title, 'Frieren S01E09 Aura the Guillotine');
assert.equal(row.parser_source, 'jellyfin');
assert.equal(row.video_key.includes('api_key='), false);
assert.equal(row.source_url?.includes('api_key='), false);
assert.equal(row.canonical_title.includes('api_key='), false);
}
const titledRow = videoRows.find(
(row) => row.source_url === 'jellyfin://jellyfin.local/item/item-10',
);
assert.ok(titledRow);
assert.equal(titledRow.canonical_title, 'KonoSuba S01E06 Decision! Class Rep');
assert.equal(titledRow.video_key.includes('api_key='), false);
assert.equal(titledRow.source_url?.includes('api_key='), false);
assert.equal(JSON.stringify(videoRows).includes('api_key='), false);
assert.equal(JSON.stringify(videoRows).includes('secret-token'), false);
const animeRows = privateApi.db
.prepare(
`
SELECT canonical_title, normalized_title_key
FROM imm_anime
ORDER BY anime_id
`,
)
.all() as Array<{ canonical_title: string; normalized_title_key: string }>;
assert.equal(JSON.stringify(animeRows).includes('api_key='), false);
assert.equal(JSON.stringify(animeRows).includes('api key'), false);
assert.equal(JSON.stringify(animeRows).includes('secret-token'), false);
const sessionRows = privateApi.db
.prepare(
`
SELECT v.source_url, v.canonical_title
FROM imm_sessions s
JOIN imm_videos v ON v.video_id = s.video_id
ORDER BY s.session_id
`,
)
.all() as Array<{ source_url: string | null; canonical_title: string }>;
assert.deepEqual(
sessionRows.map((row) => row.canonical_title),
['Frieren S01E09 Aura the Guillotine', 'KonoSuba S01E06 Decision! Class Rep'],
);
assert.equal(
sessionRows.some((row) => row.source_url?.includes('api_key=')),
false,
);
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('Jellyfin link repair removes merged leaked anime rows and sanitizes orphan video titles', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
const privateApi = tracker as unknown as { db: DatabaseSync };
const db = privateApi.db;
const timestamp = toDbTimestamp(trackerNowMs());
const leakedTitle =
'http://jellyfin.local/Videos/item-20/stream?static=true&api_key=secret-token&MediaSourceId=ms-1';
const orphanLeakedTitle =
'http://jellyfin.local/Videos/item-21/stream?static=true&api_key=secret-token&MediaSourceId=ms-2&AudioStreamIndex=3';
const existingAnime = db
.prepare(
`
INSERT INTO imm_anime (
normalized_title_key,
canonical_title,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES ('frieren', 'Frieren', ?, ?)
RETURNING anime_id
`,
)
.get(timestamp, timestamp) as { anime_id: number };
const leakedAnime = db
.prepare(
`
INSERT INTO imm_anime (
normalized_title_key,
canonical_title,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES ('http jellyfin local videos item 20 stream static true api key secret token mediasourceid ms 1', ?, ?, ?)
RETURNING anime_id
`,
)
.get(leakedTitle, timestamp, timestamp) as { anime_id: number };
db.prepare(
`
INSERT INTO imm_videos (
video_key,
anime_id,
canonical_title,
source_type,
source_url,
duration_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, ?, 'Frieren', 2, ?, 0, ?, ?)
`,
).run(`remote:${leakedTitle}`, leakedAnime.anime_id, leakedTitle, timestamp, timestamp);
db.prepare(
`
INSERT INTO imm_videos (
video_key,
anime_id,
canonical_title,
source_type,
source_url,
duration_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, NULL, ?, 2, ?, 0, ?, ?)
`,
).run(
`remote:${orphanLeakedTitle}`,
orphanLeakedTitle,
orphanLeakedTitle,
timestamp,
timestamp,
);
const summary = repairJellyfinStreamVideoLinks(db);
assert.equal(summary.repaired, 3);
const leakedAnimeRow = db
.prepare('SELECT anime_id FROM imm_anime WHERE anime_id = ?')
.get(leakedAnime.anime_id);
assert.equal(leakedAnimeRow, undefined);
const reparentedCount = db
.prepare('SELECT COUNT(*) AS count FROM imm_videos WHERE anime_id = ?')
.get(existingAnime.anime_id) as { count: number };
assert.equal(reparentedCount.count, 1);
const orphanVideo = db
.prepare(
`
SELECT canonical_title
FROM imm_videos
WHERE source_url = 'jellyfin://jellyfin.local/item/item-21'
`,
)
.get() as { canonical_title: string };
assert.equal(orphanVideo.canonical_title, 'Jellyfin Video');
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
+62 -7
View File
@@ -55,6 +55,7 @@ import {
getStatsExcludedWords,
getVocabularyStats,
replaceStatsExcludedWords,
searchSubtitleSentences,
getWordAnimeAppearances,
getWordDetail,
getWordOccurrences,
@@ -89,6 +90,7 @@ import {
markVideoWatched,
upsertCoverArt,
} from './immersion-tracker/query-maintenance';
import { repairJellyfinStreamVideoLinks } from './immersion-tracker/jellyfin-link-repair';
import {
buildVideoKey,
deriveCanonicalTitle,
@@ -148,6 +150,8 @@ import {
type MediaLibraryRow,
type NewAnimePerDayRow,
type QueuedWrite,
type SentenceSearchOptions,
type SentenceSearchResultRow,
type SessionEventRow,
type SessionState,
type SessionSummaryQueryRow,
@@ -328,6 +332,34 @@ function buildJellyfinStatsMediaPath(mediaPath: string, itemId: string): string
}
}
const JELLYFIN_MEDIA_ALIAS_QUERY_KEYS = [
'api_key',
'StartTimeTicks',
'AudioStreamIndex',
'SubtitleStreamIndex',
];
function deleteSearchParamsCaseInsensitive(searchParams: URLSearchParams, names: string[]): void {
const loweredNames = new Set(names.map((name) => name.toLowerCase()));
for (const key of [...searchParams.keys()]) {
if (loweredNames.has(key.toLowerCase())) {
searchParams.delete(key);
}
}
}
function buildJellyfinMediaPathAliasCandidates(mediaPath: string): string[] {
const candidates = new Set<string>([mediaPath]);
try {
const parsed = new URL(mediaPath);
deleteSearchParamsCaseInsensitive(parsed.searchParams, JELLYFIN_MEDIA_ALIAS_QUERY_KEYS);
candidates.add(parsed.toString());
} catch {
// Non-URL fallback paths are already represented by the raw candidate.
}
return [...candidates];
}
export class ImmersionTrackerService {
private readonly logger = createLogger('main:immersion-tracker');
private readonly db: DatabaseSync;
@@ -437,6 +469,12 @@ export class ImmersionTrackerService {
`Recovered stale active sessions on startup: reconciledSessions=${reconciledSessions}`,
);
}
const jellyfinRepair = repairJellyfinStreamVideoLinks(this.db);
if (jellyfinRepair.repaired > 0) {
this.logger.info(
`Repaired Jellyfin stats links on startup: scanned=${jellyfinRepair.scanned} repaired=${jellyfinRepair.repaired}`,
);
}
if (shouldBackfillLifetimeSummaries(this.db)) {
const result = rebuildLifetimeSummaryTables(this.db);
if (result.appliedSessions > 0) {
@@ -568,6 +606,14 @@ export class ImmersionTrackerService {
return getKanjiOccurrences(this.db, kanji, limit, offset);
}
async searchSubtitleSentences(
query: string,
limit = 50,
options?: SentenceSearchOptions,
): Promise<SentenceSearchResultRow[]> {
return searchSubtitleSentences(this.db, query, limit, options);
}
async getSessionEvents(
sessionId: number,
limit = 500,
@@ -1149,7 +1195,9 @@ export class ImmersionTrackerService {
return;
}
const normalizedPath = buildJellyfinStatsMediaPath(rawPath, metadata.itemId);
this.mediaPathAliases.set(rawPath, normalizedPath);
for (const alias of buildJellyfinMediaPathAliasCandidates(rawPath)) {
this.mediaPathAliases.set(alias, normalizedPath);
}
const displayTitle =
normalizeText(metadata.displayTitle) ||
@@ -1158,6 +1206,8 @@ export class ImmersionTrackerService {
const itemTitle = normalizeText(metadata.itemTitle) || displayTitle;
const seriesTitle = normalizeText(metadata.seriesTitle);
const libraryTitle = seriesTitle || itemTitle;
const seasonNumber = normalizeMetadataInt(metadata.seasonNumber);
const episodeNumber = normalizeMetadataInt(metadata.episodeNumber);
if (!libraryTitle) {
return;
}
@@ -1181,12 +1231,13 @@ export class ImmersionTrackerService {
itemTitle,
seriesTitle: seriesTitle || null,
displayTitle,
seasonNumber: normalizeMetadataInt(metadata.seasonNumber),
episodeNumber: normalizeMetadataInt(metadata.episodeNumber),
seasonNumber,
episodeNumber,
});
const animeId = getOrCreateAnimeRecord(this.db, {
parsedTitle: libraryTitle,
canonicalTitle: libraryTitle,
seasonScope: seasonNumber,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
@@ -1197,8 +1248,8 @@ export class ImmersionTrackerService {
animeId,
parsedBasename: null,
parsedTitle: libraryTitle,
parsedSeason: normalizeMetadataInt(metadata.seasonNumber),
parsedEpisode: normalizeMetadataInt(metadata.episodeNumber),
parsedSeason: seasonNumber,
parsedEpisode: episodeNumber,
parserSource: 'jellyfin',
parserConfidence: 1,
parseMetadataJson: metadataJson,
@@ -1221,7 +1272,10 @@ export class ImmersionTrackerService {
handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
const rawPath = normalizeMediaPath(mediaPath);
const normalizedPath = this.mediaPathAliases.get(rawPath) ?? rawPath;
const normalizedPath =
buildJellyfinMediaPathAliasCandidates(rawPath)
.map((alias) => this.mediaPathAliases.get(alias))
.find((alias): alias is string => Boolean(alias)) ?? rawPath;
const normalizedTitle = normalizeText(mediaTitle);
this.logger.info(
`handleMediaChange called with path=${normalizedPath || '<empty>'} title=${normalizedTitle || '<empty>'}`,
@@ -1294,7 +1348,7 @@ export class ImmersionTrackerService {
const cleaned = normalizeText(text);
if (!cleaned) return;
if (!endSec || endSec <= 0) {
if (!Number.isFinite(startSec) || !Number.isFinite(endSec) || endSec <= startSec) {
return;
}
@@ -1826,6 +1880,7 @@ export class ImmersionTrackerService {
const animeId = getOrCreateAnimeRecord(this.db, {
parsedTitle: parsed.parsedTitle,
canonicalTitle: parsed.parsedTitle,
seasonScope: parsed.parsedSeason,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
@@ -0,0 +1,18 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
import test from 'node:test';
test('getRollupGroupsForSessions uses only localtime rollup keys', () => {
const source = fs.readFileSync(
path.join(process.cwd(), 'src/core/services/immersion-tracker/maintenance.ts'),
'utf8',
);
const start = source.indexOf('export function getRollupGroupsForSessions');
const end = source.indexOf('export function refreshRollupsForGroupsInTransaction');
const functionSource = source.slice(start, end);
assert.match(functionSource, /'unixepoch', 'localtime'/);
assert.doesNotMatch(functionSource, /UNION/);
assert.doesNotMatch(functionSource, /86400000/);
});
@@ -356,6 +356,81 @@ test('split session and lexical helpers return distinct-headword, detail, appear
}
});
test('similar words use same reading and shared kanji without kana suffix noise', () => {
const { db, dbPath, stmts } = createDb();
try {
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Similar Words Anime',
canonicalTitle: 'Similar Words Anime',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/similar-words.mkv', {
canonicalTitle: 'Similar Words Episode',
sourcePath: '/tmp/similar-words.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const sessionId = startSessionRecord(db, videoId, 1_000_000).sessionId;
const araiId = insertWordOccurrence(db, stmts, {
sessionId,
videoId,
animeId,
lineIndex: 1,
text: '荒い息',
word: { headword: '荒い', word: '荒い', reading: 'あらい' },
});
insertWordOccurrence(db, stmts, {
sessionId,
videoId,
animeId,
lineIndex: 2,
text: '洗い物',
word: { headword: '洗い', word: '洗い', reading: 'あらい' },
});
insertWordOccurrence(db, stmts, {
sessionId,
videoId,
animeId,
lineIndex: 3,
text: '荒波',
word: { headword: '荒波', word: '荒波', reading: 'あらなみ' },
});
for (let lineIndex = 4; lineIndex < 9; lineIndex++) {
insertWordOccurrence(db, stmts, {
sessionId,
videoId,
animeId,
lineIndex,
text: '良い',
word: { headword: '良い', word: '良い', reading: 'よい' },
});
}
insertWordOccurrence(db, stmts, {
sessionId,
videoId,
animeId,
lineIndex: 9,
text: 'お構いなく',
word: { headword: 'お構いなく', word: 'お構いなく', reading: 'おかまいなく' },
});
assert.deepEqual(
getSimilarWords(db, araiId, 10).map((row) => row.headword),
['洗い', '荒波'],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('split library helpers return anime/media session and analytics rows', () => {
const { db, dbPath, stmts } = createDb();
@@ -605,6 +680,79 @@ test('split maintenance helpers update anime metadata and watched state', () =>
}
});
test('deleteSessions refreshes only rollups affected by deleted sessions', () => {
const { db, dbPath } = createDb();
try {
const keepVideoId = getOrCreateVideoRecord(db, 'local:/tmp/rollup-keep.mkv', {
canonicalTitle: 'Rollup Keep',
sourcePath: '/tmp/rollup-keep.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const dropVideoId = getOrCreateVideoRecord(db, 'local:/tmp/rollup-drop.mkv', {
canonicalTitle: 'Rollup Drop',
sourcePath: '/tmp/rollup-drop.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const keepStartedAtMs = 1_700_000_000_000;
const dropStartedAtMs = 1_700_086_400_000;
const keepSessionId = startSessionRecord(db, keepVideoId, keepStartedAtMs).sessionId;
const dropSessionId = startSessionRecord(db, dropVideoId, dropStartedAtMs).sessionId;
finalizeSessionMetrics(db, keepSessionId, keepStartedAtMs, {
activeWatchedMs: 30_000,
cardsMined: 1,
});
finalizeSessionMetrics(db, dropSessionId, dropStartedAtMs, {
activeWatchedMs: 60_000,
cardsMined: 2,
});
const keepDay = getLocalEpochDay(db, keepStartedAtMs);
const dropDay = getLocalEpochDay(db, dropStartedAtMs);
const keepMonth = 202311;
const dropMonth = 202311;
const insertDaily = db.prepare(`
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const insertMonthly = db.prepare(`
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
insertDaily.run(keepDay, keepVideoId, 1, 0.5, 3, 6, 1, keepStartedAtMs, keepStartedAtMs);
insertDaily.run(dropDay, dropVideoId, 1, 1, 3, 6, 2, dropStartedAtMs, dropStartedAtMs);
insertMonthly.run(keepMonth, keepVideoId, 1, 0.5, 3, 6, 1, keepStartedAtMs, keepStartedAtMs);
insertMonthly.run(dropMonth, dropVideoId, 1, 1, 3, 6, 2, dropStartedAtMs, dropStartedAtMs);
deleteSessions(db, [dropSessionId]);
const dailyRows = db
.prepare('SELECT rollup_day, video_id, total_cards FROM imm_daily_rollups ORDER BY video_id')
.all() as Array<{ rollup_day: number; video_id: number; total_cards: number }>;
const monthlyRows = db
.prepare(
'SELECT rollup_month, video_id, total_cards FROM imm_monthly_rollups ORDER BY video_id',
)
.all() as Array<{ rollup_month: number; video_id: number; total_cards: number }>;
assert.deepEqual(dailyRows, [{ rollup_day: keepDay, video_id: keepVideoId, total_cards: 1 }]);
assert.deepEqual(monthlyRows, [
{ rollup_month: keepMonth, video_id: keepVideoId, total_cards: 1 },
]);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('split maintenance helpers delete multiple sessions and whole videos with dependent rows', () => {
const { db, dbPath, stmts } = createDb();
@@ -35,9 +35,11 @@ import {
getSessionTimeline,
getSessionWordsByLine,
getWordOccurrences,
searchSubtitleSentences,
upsertCoverArt,
} from '../query.js';
import {
getLocalEpochDay,
getShiftedLocalDaySec,
getStartOfLocalDayTimestamp,
toDbTimestamp,
@@ -759,6 +761,10 @@ test('getTrendsDashboard returns chart-ready aggregated series', () => {
assert.equal(dashboard.progress.watchTime[1]?.value, 75);
assert.equal(dashboard.progress.lookups[1]?.value, 18);
assert.equal(dashboard.ratios.lookupsPerHundred[0]?.value, +((8 / 120) * 100).toFixed(1));
assert.equal(dashboard.ratios.cardsPerHour[0]?.value, +(2 / (30 / 60)).toFixed(1));
assert.equal(dashboard.ratios.cardsPerHour[1]?.value, +(3 / (45 / 60)).toFixed(1));
assert.equal(dashboard.ratios.readingSpeed[0]?.value, +(120 / 30).toFixed(1));
assert.equal(dashboard.ratios.readingSpeed[1]?.value, +(140 / 45).toFixed(1));
assert.equal(dashboard.librarySummary[0]?.title, 'Trend Dashboard Anime');
assert.equal(dashboard.animeCumulative.watchTime[1]?.value, 75);
assert.equal(
@@ -771,6 +777,84 @@ test('getTrendsDashboard returns chart-ready aggregated series', () => {
}
});
test('getTrendsDashboard redacts legacy Jellyfin stream titles', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const rawStreamTitle =
'stream?static true&api key secret-token&MediaSourceId ms-1&AudioStreamIndex 3&SubtitleStreamIndex 4';
const videoId = getOrCreateVideoRecord(
db,
'remote:http://jellyfin.local/Videos/item-1/stream?static=true&api_key=secret-token&MediaSourceId=ms-1&AudioStreamIndex=3&SubtitleStreamIndex=4',
{
canonicalTitle: rawStreamTitle,
sourcePath: null,
sourceUrl:
'http://jellyfin.local/Videos/item-1/stream?static=true&api_key=secret-token&MediaSourceId=ms-1&AudioStreamIndex=3&SubtitleStreamIndex=4',
sourceType: SOURCE_TYPE_REMOTE,
},
);
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: rawStreamTitle,
canonicalTitle: rawStreamTitle,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename:
'stream?static=true&api_key=secret-token&MediaSourceId=ms-1&AudioStreamIndex=3&SubtitleStreamIndex=4',
parsedTitle: rawStreamTitle,
parsedSeason: null,
parsedEpisode: null,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: null,
});
const startedAtMs = 1_700_000_000_000;
const session = startSessionRecord(db, videoId, startedAtMs);
db.prepare(
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
tokens_seen = ?
WHERE session_id = ?
`,
).run(`${startedAtMs + 30 * 60_000}`, 30 * 60_000, 30 * 60_000, 120, session.sessionId);
db.prepare(
`
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards
) VALUES (?, ?, ?, ?, ?, ?, ?)
`,
).run(Math.floor(startedAtMs / 86_400_000), videoId, 1, 30, 10, 120, 0);
const dashboard = getTrendsDashboard(db, 'all', 'day');
const titles = [
...dashboard.animeCumulative.watchTime.map((point) => point.animeTitle),
...dashboard.librarySummary.map((row) => row.title),
];
assert.deepEqual([...new Set(titles)], ['Jellyfin Video']);
assert.equal(titles.some((title) => title.includes('api_key=')), false);
assert.equal(titles.some((title) => title.includes('api key')), false);
assert.equal(titles.some((title) => title.includes('secret-token')), false);
assert.equal(titles.some((title) => title.includes('stream?')), false);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('getTrendsDashboard keeps local-midnight session buckets separate', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -3686,6 +3770,187 @@ test('getWordOccurrences maps a normalized word back to anime, video, and subtit
}
});
test('searchSubtitleSentences searches known subtitle lines and returns media context', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Dungeon Meshi',
canonicalTitle: 'Dungeon Meshi',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"test"}',
});
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/dungeon-meshi-01.mkv', {
canonicalTitle: 'Episode 1',
sourcePath: '/tmp/Dungeon Meshi 01.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'Dungeon Meshi 01.mkv',
parsedTitle: 'Dungeon Meshi',
parsedSeason: 1,
parsedEpisode: 1,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":1}',
});
const { sessionId } = startSessionRecord(db, videoId, 3_000_000);
db.prepare(
`INSERT INTO imm_subtitle_lines (
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms,
text, secondary_text, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
).run(
sessionId,
null,
videoId,
animeId,
7,
4_000,
5_500,
'魔物を食べるなんて信じられない',
'I cannot believe we are eating monsters',
3_000,
3_000,
);
db.prepare(
`INSERT INTO imm_subtitle_lines (
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms,
text, secondary_text, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
).run(
sessionId,
null,
videoId,
animeId,
8,
6_000,
7_000,
'これは別の行です',
'Another line',
2_000,
2_000,
);
const rows = searchSubtitleSentences(db, '魔物 食べる', 10);
assert.deepEqual(rows, [
{
animeId,
animeTitle: 'Dungeon Meshi',
sourcePath: '/tmp/Dungeon Meshi 01.mkv',
secondaryText: 'I cannot believe we are eating monsters',
videoId,
videoTitle: 'Episode 1',
sessionId,
lineIndex: 7,
segmentStartMs: 4_000,
segmentEndMs: 5_500,
text: '魔物を食べるなんて信じられない',
},
]);
assert.deepEqual(searchSubtitleSentences(db, 'monsters', 10), []);
assert.doesNotThrow(() => searchSubtitleSentences(db, '魔物', Number.POSITIVE_INFINITY));
assert.equal(searchSubtitleSentences(db, '魔物', -1).length, 1);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('searchSubtitleSentences searches subtitle lines by resolved headword candidates', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"test"}',
});
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-05.mkv', {
canonicalTitle: 'Episode 5',
sourcePath: '/tmp/Little Witch Academia S01E05.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'Little Witch Academia S01E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 1,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":5}',
});
const { sessionId } = startSessionRecord(db, videoId, 4_000_000);
const lineResult = db
.prepare(
`INSERT INTO imm_subtitle_lines (
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms,
text, secondary_text, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
)
.run(
sessionId,
null,
videoId,
animeId,
20,
247_000,
250_000,
'ああ、名無しが何だか知らねえが',
null,
4_000,
4_000,
);
const wordResult = db
.prepare(
`INSERT INTO imm_words (
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
)
.run('知る', '知らねえ', 'しらねえ', 'verb', '動詞', '自立', '', 4_000, 4_000, 1);
db.prepare(
`INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
VALUES (?, ?, ?)`,
).run(Number(lineResult.lastInsertRowid), Number(wordResult.lastInsertRowid), 1);
assert.deepEqual(searchSubtitleSentences(db, '知らない', 10), []);
const rows = searchSubtitleSentences(db, '知らない', 10, {
headwordTerms: [{ term: '知らない', headwords: ['知る'] }],
});
assert.deepEqual(
rows.map((row) => row.text),
['ああ、名無しが何だか知らねえが'],
);
assert.deepEqual(
searchSubtitleSentences(db, '知らねえ', 10).map((row) => row.text),
['ああ、名無しが何だか知らねえが'],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('getKanjiOccurrences maps a kanji back to anime, video, and subtitle line context', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -4100,8 +4365,14 @@ test('deleteSession removes zero-session media from library and trends', () => {
const startedAtMs = 9_000_000;
const endedAtMs = startedAtMs + 120_000;
const rollupDay = Math.floor(startedAtMs / 86_400_000);
const rollupMonth = 197001;
const rollupDay = getLocalEpochDay(db, startedAtMs);
const rollupMonth = (
db
.prepare(
"SELECT CAST(strftime('%Y%m', CAST(? AS REAL) / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollupMonth",
)
.get(startedAtMs) as { rollupMonth: number }
).rollupMonth;
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
db.prepare(
@@ -0,0 +1,413 @@
import type { DatabaseSync } from './sqlite';
import { normalizeText } from './reducer';
import { normalizeAnimeIdentityKey } from './storage';
import { nowMs } from './time';
import { toDbTimestamp } from './query-shared';
import type { JellyfinLinkRepairSummary } from './types';
type LegacyJellyfinVideoRow = {
video_id: number;
video_key: string;
source_url: string | null;
canonical_title: string;
};
type JellyfinTargetVideoRow = {
video_id: number;
anime_id: number | null;
canonical_title: string;
parsed_basename: string | null;
parsed_title: string | null;
parsed_season: number | null;
parsed_episode: number | null;
parser_source: string | null;
parser_confidence: number | null;
parse_metadata_json: string | null;
};
type LeakedAnimeTitleRow = {
anime_id: number;
canonical_title: string;
normalized_title_key: string;
title_romaji: string | null;
title_english: string | null;
title_native: string | null;
linked_video_title: string | null;
};
function looksLikeLeakedJellyfinTitle(value: string | null): boolean {
if (!value) return false;
const lowered = value.toLowerCase();
const hasApiKey = /api[\s_-]*key(?:\s|=|$)/i.test(value);
return (
hasApiKey &&
(lowered.includes('stream?') ||
lowered.includes('/stream?') ||
lowered.includes('/videos/') ||
lowered.includes('mediasourceid'))
);
}
function chooseSafeAnimeTitle(row: LeakedAnimeTitleRow): string | null {
const candidates = [
row.title_english,
row.title_romaji,
row.title_native,
row.linked_video_title?.replace(/^\[Jellyfin\/direct]\s*/i, ''),
];
for (const candidate of candidates) {
const normalized = candidate?.trim();
if (normalized && !looksLikeLeakedJellyfinTitle(normalized)) {
return normalized;
}
}
return null;
}
function parseLegacyJellyfinStreamUrl(value: string | null): URL | null {
if (!value) return null;
const trimmed = value.trim();
const urlText = trimmed.startsWith('remote:') ? trimmed.slice('remote:'.length) : trimmed;
try {
const url = new URL(urlText);
const pathSegments = url.pathname.split('/').filter(Boolean);
const videosIndex = pathSegments.findIndex((segment) => segment.toLowerCase() === 'videos');
if (
videosIndex < 0 ||
pathSegments[videosIndex + 1] === undefined ||
pathSegments[videosIndex + 2]?.toLowerCase() !== 'stream'
) {
return null;
}
if (!url.searchParams.has('api_key')) {
return null;
}
return url;
} catch {
return null;
}
}
function buildJellyfinStatsUrlFromLegacyStream(url: URL): string | null {
const pathSegments = url.pathname.split('/').filter(Boolean);
const videosIndex = pathSegments.findIndex((segment) => segment.toLowerCase() === 'videos');
const itemId = normalizeText(pathSegments[videosIndex + 1]);
if (!itemId) return null;
return `jellyfin://${url.host}/item/${encodeURIComponent(itemId)}`;
}
function buildSanitizedJellyfinVideoKey(
db: DatabaseSync,
videoId: number,
statsUrl: string,
): string {
const baseKey = `remote:${statsUrl}`;
const existing = db
.prepare('SELECT video_id FROM imm_videos WHERE video_key = ?')
.get(baseKey) as { video_id: number } | null;
if (!existing || existing.video_id === videoId) {
return baseKey;
}
return `${baseKey}#legacy-${videoId}`;
}
function repairLeakedJellyfinAnimeTitles(db: DatabaseSync, currentTimestamp: string): number {
const candidates = (
db
.prepare(
`
SELECT
a.anime_id,
a.normalized_title_key,
a.canonical_title,
a.title_romaji,
a.title_english,
a.title_native,
(
SELECT v.canonical_title
FROM imm_videos v
WHERE v.anime_id = a.anime_id
AND v.canonical_title NOT LIKE '%api_key=%'
AND lower(v.canonical_title) NOT LIKE '%api key%'
ORDER BY v.LAST_UPDATE_DATE DESC, v.video_id DESC
LIMIT 1
) AS linked_video_title
FROM imm_anime a
WHERE a.canonical_title LIKE '%api_key=%'
OR lower(a.canonical_title) LIKE '%api key%'
OR lower(a.normalized_title_key) LIKE '%api key%'
`,
)
.all() as LeakedAnimeTitleRow[]
).filter(
(row) =>
looksLikeLeakedJellyfinTitle(row.canonical_title) ||
looksLikeLeakedJellyfinTitle(row.normalized_title_key),
);
let repaired = 0;
for (const candidate of candidates) {
const replacementTitle = chooseSafeAnimeTitle(candidate);
if (!replacementTitle) {
continue;
}
const replacementKey = normalizeAnimeIdentityKey(replacementTitle);
if (!replacementKey) {
continue;
}
const existing = db
.prepare(
`
SELECT anime_id
FROM imm_anime
WHERE normalized_title_key = ?
AND anime_id != ?
`,
)
.get(replacementKey, candidate.anime_id) as { anime_id: number } | null;
if (existing) {
const videoUpdate = db
.prepare(
`
UPDATE imm_videos
SET anime_id = ?, LAST_UPDATE_DATE = ?
WHERE anime_id = ?
`,
)
.run(existing.anime_id, currentTimestamp, candidate.anime_id) as { changes: number };
const subtitleUpdate = db
.prepare(
`
UPDATE imm_subtitle_lines
SET anime_id = ?, LAST_UPDATE_DATE = ?
WHERE anime_id = ?
`,
)
.run(existing.anime_id, currentTimestamp, candidate.anime_id) as { changes: number };
const animeDelete = db
.prepare(
`
DELETE FROM imm_anime
WHERE anime_id = ?
AND NOT EXISTS (SELECT 1 FROM imm_videos WHERE anime_id = ?)
AND NOT EXISTS (SELECT 1 FROM imm_subtitle_lines WHERE anime_id = ?)
`,
)
.run(candidate.anime_id, candidate.anime_id, candidate.anime_id) as { changes: number };
if (videoUpdate.changes > 0 || subtitleUpdate.changes > 0) {
repaired += 1;
} else if (animeDelete.changes > 0) {
repaired += 1;
}
continue;
}
const updated = db
.prepare(
`
UPDATE imm_anime
SET
normalized_title_key = ?,
canonical_title = ?,
LAST_UPDATE_DATE = ?
WHERE anime_id = ?
`,
)
.run(replacementKey, replacementTitle, currentTimestamp, candidate.anime_id) as {
changes: number;
};
if (updated.changes > 0) {
repaired += 1;
}
}
return repaired;
}
function repairLeakedJellyfinVideoParseMetadata(
db: DatabaseSync,
currentTimestamp: string,
): number {
const updated = db
.prepare(
`
UPDATE imm_videos
SET
parsed_basename = NULL,
parsed_title = NULL,
parse_metadata_json = NULL,
parser_source = CASE
WHEN parser_source = 'guessit' THEN 'jellyfin'
ELSE parser_source
END,
LAST_UPDATE_DATE = ?
WHERE source_type = 2
AND (
parsed_basename LIKE '%api_key=%'
OR lower(parsed_basename) LIKE '%api key%'
OR parsed_title LIKE '%api_key=%'
OR lower(parsed_title) LIKE '%api key%'
OR parse_metadata_json LIKE '%api_key=%'
OR lower(parse_metadata_json) LIKE '%api key%'
)
`,
)
.run(currentTimestamp) as { changes: number };
return updated.changes;
}
export function repairJellyfinStreamVideoLinks(db: DatabaseSync): JellyfinLinkRepairSummary {
const candidates = db
.prepare(
`
SELECT video_id, video_key, source_url, canonical_title
FROM imm_videos
WHERE source_type = 2
AND (
video_key LIKE '%api_key=%'
OR lower(video_key) LIKE '%api key%'
OR source_url LIKE '%api_key=%'
OR lower(source_url) LIKE '%api key%'
OR canonical_title LIKE '%api_key=%'
OR lower(canonical_title) LIKE '%api key%'
)
`,
)
.all() as LegacyJellyfinVideoRow[];
const summary: JellyfinLinkRepairSummary = {
scanned: candidates.length,
repaired: 0,
};
if (candidates.length === 0) {
const currentTimestamp = toDbTimestamp(nowMs());
const repaired =
repairLeakedJellyfinAnimeTitles(db, currentTimestamp) +
repairLeakedJellyfinVideoParseMetadata(db, currentTimestamp);
summary.repaired += repaired;
return summary;
}
const currentTimestamp = toDbTimestamp(nowMs());
db.exec('BEGIN IMMEDIATE');
try {
for (const candidate of candidates) {
const legacyUrl =
parseLegacyJellyfinStreamUrl(candidate.source_url) ??
parseLegacyJellyfinStreamUrl(candidate.video_key);
if (!legacyUrl) {
continue;
}
const statsUrl = buildJellyfinStatsUrlFromLegacyStream(legacyUrl);
if (!statsUrl) {
continue;
}
const sanitizedVideoKey = buildSanitizedJellyfinVideoKey(db, candidate.video_id, statsUrl);
const sanitizedCanonicalTitle = looksLikeLeakedJellyfinTitle(candidate.canonical_title)
? 'Jellyfin Video'
: candidate.canonical_title;
const target = db
.prepare(
`
SELECT
video_id,
anime_id,
canonical_title,
parsed_basename,
parsed_title,
parsed_season,
parsed_episode,
parser_source,
parser_confidence,
parse_metadata_json
FROM imm_videos
WHERE video_id != ?
AND (video_key = ? OR source_url = ?)
ORDER BY parser_source = 'jellyfin' DESC, video_id DESC
LIMIT 1
`,
)
.get(candidate.video_id, `remote:${statsUrl}`, statsUrl) as JellyfinTargetVideoRow | null;
if (!target) {
const updated = db
.prepare(
`
UPDATE imm_videos
SET
video_key = ?,
source_url = ?,
canonical_title = ?,
parser_source = COALESCE(parser_source, 'jellyfin'),
LAST_UPDATE_DATE = ?
WHERE video_id = ?
AND (video_key != ? OR source_url != ? OR canonical_title != ?)
`,
)
.run(
sanitizedVideoKey,
statsUrl,
sanitizedCanonicalTitle,
currentTimestamp,
candidate.video_id,
sanitizedVideoKey,
statsUrl,
sanitizedCanonicalTitle,
) as { changes: number };
if (updated.changes > 0) {
summary.repaired += 1;
}
continue;
}
db.prepare(
`
UPDATE imm_videos
SET
video_key = ?,
anime_id = ?,
canonical_title = ?,
source_url = ?,
parsed_basename = ?,
parsed_title = ?,
parsed_season = ?,
parsed_episode = ?,
parser_source = ?,
parser_confidence = ?,
parse_metadata_json = ?,
LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(
sanitizedVideoKey,
target.anime_id,
target.canonical_title,
statsUrl,
target.parsed_basename,
target.parsed_title,
target.parsed_season,
target.parsed_episode,
target.parser_source,
target.parser_confidence,
target.parse_metadata_json,
currentTimestamp,
candidate.video_id,
);
if (target.anime_id !== null) {
db.prepare(
`
UPDATE imm_subtitle_lines
SET anime_id = ?, LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(target.anime_id, currentTimestamp, candidate.video_id);
}
summary.repaired += 1;
}
summary.repaired += repairLeakedJellyfinAnimeTitles(db, currentTimestamp);
summary.repaired += repairLeakedJellyfinVideoParseMetadata(db, currentTimestamp);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return summary;
}
+168 -44
View File
@@ -60,6 +60,34 @@ interface RetainedSessionRow {
mediaBufferEvents: number;
}
const RETAINED_SESSION_METRICS_CTE = `
retained_sessions AS (
SELECT
s.session_id,
s.video_id,
v.anime_id,
s.started_at_ms,
s.ended_at_ms,
MAX(COALESCE(t.active_watched_ms, s.active_watched_ms, 0), 0) AS active_ms,
MAX(COALESCE(t.cards_mined, s.cards_mined, 0), 0) AS cards_mined,
MAX(COALESCE(t.lines_seen, s.lines_seen, 0), 0) AS lines_seen,
MAX(COALESCE(t.tokens_seen, s.tokens_seen, 0), 0) AS tokens_seen,
CASE WHEN v.watched > 0 THEN 1 ELSE 0 END AS completed
FROM imm_sessions s
JOIN imm_videos v
ON v.video_id = s.video_id
LEFT JOIN imm_session_telemetry t
ON t.telemetry_id = (
SELECT telemetry_id
FROM imm_session_telemetry
WHERE session_id = s.session_id
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
)
WHERE s.ended_at_ms IS NOT NULL
)
`;
function hasRetainedPriorSession(
db: DatabaseSync,
videoId: number,
@@ -154,54 +182,150 @@ function rebuildLifetimeSummariesInternal(
db: DatabaseSync,
rebuiltAtMs: number,
): LifetimeRebuildSummary {
const rows = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
ended_media_ms AS lastMediaMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as Array<
Omit<RetainedSessionRow, 'startedAtMs' | 'endedAtMs' | 'lastMediaMs'> & {
startedAtMs: number | string;
endedAtMs: number | string;
lastMediaMs: number | string | null;
}
>;
const sessions = rows.map((row) => ({
...row,
startedAtMs: row.startedAtMs,
endedAtMs: row.endedAtMs,
lastMediaMs: row.lastMediaMs === null ? null : Number(row.lastMediaMs),
})) as RetainedSessionRow[];
const rebuiltAtDbMs = toDbTimestamp(rebuiltAtMs);
const appliedSessions = Number(
(
db
.prepare('SELECT COUNT(*) AS total FROM imm_sessions WHERE ended_at_ms IS NOT NULL')
.get() as { total: number }
).total,
);
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
db.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
SELECT
session_id,
ended_at_ms,
?,
?
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
`,
).run(rebuiltAtDbMs, rebuiltAtDbMs);
db.prepare(
`
WITH ${RETAINED_SESSION_METRICS_CTE}
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
SELECT
video_id,
COUNT(*) AS total_sessions,
COALESCE(SUM(active_ms), 0) AS total_active_ms,
COALESCE(SUM(cards_mined), 0) AS total_cards,
COALESCE(SUM(lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(tokens_seen), 0) AS total_tokens_seen,
MAX(completed) AS completed,
MIN(started_at_ms) AS first_watched_ms,
MAX(ended_at_ms) AS last_watched_ms,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM retained_sessions
GROUP BY video_id
`,
).run(rebuiltAtDbMs, rebuiltAtDbMs);
db.prepare(
`
WITH ${RETAINED_SESSION_METRICS_CTE}
INSERT INTO imm_lifetime_anime (
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
SELECT
anime_id,
COUNT(*) AS total_sessions,
COALESCE(SUM(active_ms), 0) AS total_active_ms,
COALESCE(SUM(cards_mined), 0) AS total_cards,
COALESCE(SUM(lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(tokens_seen), 0) AS total_tokens_seen,
COUNT(DISTINCT video_id) AS episodes_started,
COUNT(DISTINCT CASE WHEN completed > 0 THEN video_id END) AS episodes_completed,
MIN(started_at_ms) AS first_watched_ms,
MAX(ended_at_ms) AS last_watched_ms,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM retained_sessions
WHERE anime_id IS NOT NULL
GROUP BY anime_id
`,
).run(rebuiltAtDbMs, rebuiltAtDbMs);
db.prepare(
`
WITH ${RETAINED_SESSION_METRICS_CTE},
anime_completion AS (
SELECT
rs.anime_id,
MAX(a.episodes_total) AS episodes_total,
COUNT(DISTINCT CASE WHEN rs.completed > 0 THEN rs.video_id END) AS completed_videos
FROM retained_sessions rs
JOIN imm_anime a
ON a.anime_id = rs.anime_id
WHERE rs.anime_id IS NOT NULL
GROUP BY rs.anime_id
)
UPDATE imm_lifetime_global
SET
total_sessions = (SELECT COUNT(*) FROM retained_sessions),
total_active_ms = (SELECT COALESCE(SUM(active_ms), 0) FROM retained_sessions),
total_cards = (SELECT COALESCE(SUM(cards_mined), 0) FROM retained_sessions),
active_days = (
SELECT COUNT(DISTINCT CAST(
julianday(CAST(started_at_ms AS REAL) / 1000, 'unixepoch', 'localtime') - 2440587.5
AS INTEGER
))
FROM retained_sessions
),
episodes_started = (SELECT COUNT(DISTINCT video_id) FROM retained_sessions),
episodes_completed = (
SELECT COUNT(DISTINCT CASE WHEN completed > 0 THEN video_id END)
FROM retained_sessions
),
anime_completed = (
SELECT COUNT(*)
FROM anime_completion
WHERE episodes_total IS NOT NULL
AND episodes_total > 0
AND completed_videos >= episodes_total
),
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(rebuiltAtDbMs, rebuiltAtDbMs);
return {
appliedSessions: sessions.length,
appliedSessions,
rebuiltAtMs,
};
}
@@ -1,6 +1,6 @@
import type { DatabaseSync } from './sqlite';
import { nowMs } from './time';
import { subtractDbTimestamp, toDbTimestamp } from './query-shared';
import { makePlaceholders, subtractDbTimestamp, toDbTimestamp } from './query-shared';
const ROLLUP_STATE_KEY = 'last_rollup_sample_ms';
const DAILY_MS = 86_400_000;
@@ -20,6 +20,12 @@ interface RollupTelemetryResult {
maxSampleMs: number | null;
}
export interface RollupGroup {
rollupDay: number;
rollupMonth: number;
videoId: number;
}
interface RawRetentionResult {
deletedSessionEvents: number;
deletedTelemetryRows: number;
@@ -164,6 +170,26 @@ function upsertDailyRollupsForGroups(
}
const upsertStmt = db.prepare(`
WITH matching_sessions AS (
SELECT *
FROM imm_sessions
WHERE CAST(julianday(CAST(started_at_ms AS REAL) / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ?
AND video_id = ?
),
session_metrics AS (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards,
MAX(t.lookup_count) AS max_lookups,
MAX(t.lookup_hits) AS max_hits
FROM imm_session_telemetry t
JOIN matching_sessions s
ON s.session_id = t.session_id
GROUP BY t.session_id
)
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, cards_per_hour,
@@ -197,20 +223,8 @@ function upsertDailyRollupsForGroups(
END AS lookup_hit_rate,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
LEFT JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards,
MAX(t.lookup_count) AS max_lookups,
MAX(t.lookup_hits) AS max_hits
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
FROM matching_sessions s
LEFT JOIN session_metrics sm ON s.session_id = sm.session_id
GROUP BY rollup_day, s.video_id
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
@@ -226,7 +240,7 @@ function upsertDailyRollupsForGroups(
`);
for (const { rollupDay, videoId } of groups) {
upsertStmt.run(rollupNowMs, rollupNowMs, rollupDay, videoId);
upsertStmt.run(rollupDay, videoId, rollupNowMs, rollupNowMs);
}
}
@@ -240,6 +254,24 @@ function upsertMonthlyRollupsForGroups(
}
const upsertStmt = db.prepare(`
WITH matching_sessions AS (
SELECT *
FROM imm_sessions
WHERE CAST(strftime('%Y%m', CAST(started_at_ms AS REAL) / 1000, 'unixepoch', 'localtime') AS INTEGER) = ?
AND video_id = ?
),
session_metrics AS (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards
FROM imm_session_telemetry t
JOIN matching_sessions s
ON s.session_id = t.session_id
GROUP BY t.session_id
)
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
@@ -254,18 +286,8 @@ function upsertMonthlyRollupsForGroups(
COALESCE(SUM(COALESCE(sm.max_cards, s.cards_mined)), 0) AS total_cards,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
LEFT JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
FROM matching_sessions s
LEFT JOIN session_metrics sm ON s.session_id = sm.session_id
GROUP BY rollup_month, s.video_id
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
@@ -278,10 +300,75 @@ function upsertMonthlyRollupsForGroups(
`);
for (const { rollupMonth, videoId } of groups) {
upsertStmt.run(rollupNowMs, rollupNowMs, rollupMonth, videoId);
upsertStmt.run(rollupMonth, videoId, rollupNowMs, rollupNowMs);
}
}
export function getRollupGroupsForSessions(db: DatabaseSync, sessionIds: number[]): RollupGroup[] {
if (sessionIds.length === 0) {
return [];
}
const placeholders = makePlaceholders(sessionIds);
const rows = db
.prepare(
`
SELECT DISTINCT
CAST(julianday(CAST(started_at_ms AS REAL) / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', CAST(started_at_ms AS REAL) / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
video_id
FROM imm_sessions
WHERE session_id IN (${placeholders})
`,
)
.all(...sessionIds) as RollupGroupRow[];
return rows.map((row) => ({
rollupDay: row.rollup_day,
rollupMonth: row.rollup_month,
videoId: row.video_id,
}));
}
export function refreshRollupsForGroupsInTransaction(
db: DatabaseSync,
groups: RollupGroup[],
): void {
if (groups.length === 0) {
return;
}
const rollupNowMs = toDbTimestamp(nowMs());
const dailyGroups = dedupeGroups(
groups.map((group) => ({
rollupDay: group.rollupDay,
videoId: group.videoId,
})),
);
const monthlyGroups = dedupeGroups(
groups.map((group) => ({
rollupMonth: group.rollupMonth,
videoId: group.videoId,
})),
);
const deleteDailyStmt = db.prepare(
'DELETE FROM imm_daily_rollups WHERE rollup_day = ? AND video_id = ?',
);
const deleteMonthlyStmt = db.prepare(
'DELETE FROM imm_monthly_rollups WHERE rollup_month = ? AND video_id = ?',
);
for (const { rollupDay, videoId } of dailyGroups) {
deleteDailyStmt.run(rollupDay, videoId);
}
for (const { rollupMonth, videoId } of monthlyGroups) {
deleteMonthlyStmt.run(rollupMonth, videoId);
}
upsertDailyRollupsForGroups(db, dailyGroups, rollupNowMs);
upsertMonthlyRollupsForGroups(db, monthlyGroups, rollupNowMs);
}
function getAffectedRollupGroups(
db: DatabaseSync,
lastRollupSampleMs: number | string,
@@ -179,6 +179,32 @@ test('guessAnimeVideoMetadata uses guessit basename output first when available'
});
});
test('guessAnimeVideoMetadata keeps season directory scope when guessit omits season', async () => {
const parsed = await guessAnimeVideoMetadata(
'/tmp/KonoSuba/Season 2/KonoSuba - 05.mkv',
'Episode 5',
{
runGuessit: async () =>
JSON.stringify({
title: 'KonoSuba',
}),
},
);
assert.deepEqual(parsed, {
parsedBasename: 'KonoSuba - 05.mkv',
parsedTitle: 'KonoSuba',
parsedSeason: 2,
parsedEpisode: null,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: 'KonoSuba - 05.mkv',
source: 'guessit',
}),
});
});
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
@@ -7,6 +7,8 @@ import type {
KanjiOccurrenceRow,
KanjiStatsRow,
KanjiWordRow,
SentenceSearchOptions,
SentenceSearchResultRow,
SessionEventRow,
SimilarWordRow,
StatsExcludedWordRow,
@@ -20,6 +22,56 @@ import { nowMs } from './time';
const VOCABULARY_STATS_FILTER_OVERSAMPLE_FACTOR = 4;
const VOCABULARY_STATS_FILTER_OVERSAMPLE_MIN = 100;
const SENTENCE_SEARCH_DEFAULT_LIMIT = 50;
const SENTENCE_SEARCH_MAX_LIMIT = 100;
const KANJI_PATTERN = /\p{Script=Han}/gu;
function resolveSentenceSearchLimit(limit: number): number {
if (!Number.isFinite(limit)) return SENTENCE_SEARCH_DEFAULT_LIMIT;
const normalized = Math.floor(limit);
if (normalized <= 0) return SENTENCE_SEARCH_DEFAULT_LIMIT;
return Math.min(normalized, SENTENCE_SEARCH_MAX_LIMIT);
}
export function splitSentenceSearchTerms(query: string): string[] {
return query
.trim()
.split(/\s+/)
.map((term) => term.trim())
.filter(Boolean)
.slice(0, 8);
}
function escapeLikeTerm(term: string): string {
return term.replace(/[\\%_]/g, (match) => `\\${match}`);
}
function uniqueNonEmptyTerms(values: readonly string[] | undefined): string[] {
const seen = new Set<string>();
const terms: string[] = [];
for (const value of values ?? []) {
const term = value.trim();
if (!term || seen.has(term)) continue;
seen.add(term);
terms.push(term);
}
return terms;
}
function getHeadwordCandidatesForSentenceSearchTerm(
term: string,
options: SentenceSearchOptions | undefined,
): string[] {
const headwords =
options?.headwordTerms
?.filter((entry) => entry.term === term)
.flatMap((entry) => entry.headwords) ?? [];
return uniqueNonEmptyTerms(headwords);
}
function uniqueKanji(text: string): string[] {
return Array.from(new Set(text.match(KANJI_PATTERN) ?? []));
}
function toVocabularyToken(row: VocabularyStatsRow): MergedToken {
const partOfSpeech =
@@ -211,6 +263,70 @@ export function getKanjiOccurrences(
.all(kanji, limit, offset) as unknown as KanjiOccurrenceRow[];
}
export function searchSubtitleSentences(
db: DatabaseSync,
query: string,
limit = SENTENCE_SEARCH_DEFAULT_LIMIT,
options?: SentenceSearchOptions,
): SentenceSearchResultRow[] {
const terms = splitSentenceSearchTerms(query);
if (terms.length === 0) return [];
const resolvedLimit = resolveSentenceSearchLimit(limit);
const clauses: string[] = [];
const params: string[] = [];
for (const term of terms) {
const likeTerm = `%${escapeLikeTerm(term)}%`;
const headwords = getHeadwordCandidatesForSentenceSearchTerm(term, options);
const headwordClause =
headwords.length > 0
? `
OR EXISTS (
SELECT 1
FROM imm_word_line_occurrences o
JOIN imm_words w ON w.id = o.word_id
WHERE o.line_id = l.line_id
AND w.headword IN (${headwords.map(() => '?').join(', ')})
)
`
: '';
clauses.push(`
(
l.text LIKE ? ESCAPE '\\'
OR v.canonical_title LIKE ? ESCAPE '\\'
OR COALESCE(a.canonical_title, '') LIKE ? ESCAPE '\\'
${headwordClause}
)
`);
params.push(likeTerm, likeTerm, likeTerm, ...headwords);
}
return db
.prepare(
`
SELECT
l.anime_id AS animeId,
a.canonical_title AS animeTitle,
l.video_id AS videoId,
v.canonical_title AS videoTitle,
v.source_path AS sourcePath,
l.secondary_text AS secondaryText,
l.session_id AS sessionId,
l.line_index AS lineIndex,
l.segment_start_ms AS segmentStartMs,
l.segment_end_ms AS segmentEndMs,
l.text AS text
FROM imm_subtitle_lines l
JOIN imm_videos v ON v.video_id = l.video_id
LEFT JOIN imm_anime a ON a.anime_id = l.anime_id
WHERE ${clauses.join(' AND ')}
ORDER BY l.CREATED_DATE DESC, l.line_id DESC
LIMIT ?
`,
)
.all(...params, resolvedLimit) as unknown as SentenceSearchResultRow[];
}
export function getSessionEvents(
db: DatabaseSync,
sessionId: number,
@@ -287,24 +403,38 @@ export function getSimilarWords(db: DatabaseSync, wordId: number, limit = 10): S
reading: string;
} | null;
if (!word || word.headword.trim() === '') return [];
const clauses: string[] = [];
const params: string[] = [];
const reading = word.reading.trim();
if (reading !== '') {
clauses.push('reading = ?');
params.push(word.reading);
}
for (const kanji of uniqueKanji(word.headword)) {
clauses.push("headword LIKE ? ESCAPE '\\'");
params.push(`%${escapeLikeTerm(kanji)}%`);
}
if (clauses.length === 0) return [];
const orderBy =
reading !== '' ? 'CASE WHEN reading = ? THEN 0 ELSE 1 END, frequency DESC' : 'frequency DESC';
const orderParams = reading !== '' ? [word.reading] : [];
return db
.prepare(
`
SELECT id AS wordId, headword, word, reading, frequency
FROM imm_words
WHERE id != ?
AND (reading = ? OR headword LIKE ? OR headword LIKE ?)
ORDER BY frequency DESC
AND (${clauses.join(' OR ')})
ORDER BY ${orderBy}
LIMIT ?
`,
)
.all(
wordId,
word.reading,
`%${word.headword.charAt(0)}%`,
`%${word.headword.charAt(word.headword.length - 1)}%`,
limit,
) as SimilarWordRow[];
.all(wordId, ...params, ...orderParams, limit) as SimilarWordRow[];
}
export function getKanjiDetail(db: DatabaseSync, kanjiId: number): KanjiDetailRow | null {
@@ -2,7 +2,7 @@ import { createHash } from 'node:crypto';
import type { DatabaseSync } from './sqlite';
import { buildCoverBlobReference, normalizeCoverBlobBytes } from './storage';
import { rebuildLifetimeSummariesInTransaction } from './lifetime';
import { rebuildRollupsInTransaction } from './maintenance';
import { getRollupGroupsForSessions, refreshRollupsForGroupsInTransaction } from './maintenance';
import { nowMs } from './time';
import { PartOfSpeech, type MergedToken } from '../../../types';
import { shouldExcludeTokenFromVocabularyPersistence } from '../tokenizer/annotation-stage';
@@ -474,13 +474,14 @@ export function deleteSession(db: DatabaseSync, sessionId: number): void {
const sessionIds = [sessionId];
const affectedWordIds = getAffectedWordIdsForSessions(db, sessionIds);
const affectedKanjiIds = getAffectedKanjiIdsForSessions(db, sessionIds);
const affectedRollupGroups = getRollupGroupsForSessions(db, sessionIds);
db.exec('BEGIN IMMEDIATE');
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
refreshRollupsForGroupsInTransaction(db, affectedRollupGroups);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -492,13 +493,14 @@ export function deleteSessions(db: DatabaseSync, sessionIds: number[]): void {
if (sessionIds.length === 0) return;
const affectedWordIds = getAffectedWordIdsForSessions(db, sessionIds);
const affectedKanjiIds = getAffectedKanjiIdsForSessions(db, sessionIds);
const affectedRollupGroups = getRollupGroupsForSessions(db, sessionIds);
db.exec('BEGIN IMMEDIATE');
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
refreshRollupsForGroupsInTransaction(db, affectedRollupGroups);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -536,7 +538,6 @@ export function deleteVideo(db: DatabaseSync, videoId: number): void {
db.prepare('DELETE FROM imm_videos WHERE video_id = ?').run(videoId);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -74,6 +74,8 @@ export interface TrendsDashboardQueryResult {
};
ratios: {
lookupsPerHundred: TrendChartPoint[];
cardsPerHour: TrendChartPoint[];
readingSpeed: TrendChartPoint[];
};
animeCumulative: {
watchTime: TrendPerAnimePoint[];
@@ -176,11 +178,31 @@ function getTrendSessionWordCount(session: Pick<TrendSessionMetricRow, 'tokensSe
return session.tokensSeen;
}
function looksLikeJellyfinStreamTitle(title: string): boolean {
const lowered = title.toLowerCase();
const hasApiKey = /api[\s_-]*key(?:\s|=|$)/i.test(title);
return (
hasApiKey &&
(lowered.includes('stream?') ||
lowered.includes('/stream?') ||
lowered.includes('/videos/') ||
lowered.includes('mediasourceid'))
);
}
function sanitizeTrendTitle(title: string): string {
const normalized = title.trim();
if (!normalized) {
return 'Unknown';
}
return looksLikeJellyfinStreamTitle(normalized) ? 'Jellyfin Video' : normalized;
}
function resolveTrendAnimeTitle(value: {
animeTitle: string | null;
canonicalTitle: string | null;
}): string {
return value.animeTitle ?? value.canonicalTitle ?? 'Unknown';
return sanitizeTrendTitle(value.animeTitle ?? value.canonicalTitle ?? 'Unknown');
}
function accumulatePoints(points: TrendChartPoint[]): TrendChartPoint[] {
@@ -225,6 +247,26 @@ function buildAggregatedTrendRows(rollups: ImmersionSessionRollupRow[]) {
}));
}
function buildEfficiencyRates(rows: ReturnType<typeof buildAggregatedTrendRows>): {
cardsPerHour: TrendChartPoint[];
readingSpeed: TrendChartPoint[];
} {
const cardsPerHour: TrendChartPoint[] = [];
const readingSpeed: TrendChartPoint[] = [];
for (const row of rows) {
const hours = row.activeMin / 60;
cardsPerHour.push({
label: row.label,
value: hours > 0 ? +(row.cards / hours).toFixed(1) : 0,
});
readingSpeed.push({
label: row.label,
value: row.activeMin > 0 ? +(row.words / row.activeMin).toFixed(1) : 0,
});
}
return { cardsPerHour, readingSpeed };
}
function buildWatchTimeByDayOfWeek(sessions: TrendSessionMetricRow[]): TrendChartPoint[] {
const totals = new Array(7).fill(0);
for (const session of sessions) {
@@ -449,7 +491,7 @@ function getVideoAnimeTitleMap(
)
.all(...uniqueIds) as Array<{ videoId: number; animeTitle: string }>;
return new Map(rows.map((row) => [row.videoId, row.animeTitle]));
return new Map(rows.map((row) => [row.videoId, sanitizeTrendTitle(row.animeTitle)]));
}
function resolveVideoAnimeTitle(
@@ -675,6 +717,7 @@ export function getTrendsDashboard(
);
const aggregatedRows = buildAggregatedTrendRows(chartRollups);
const efficiency = buildEfficiencyRates(aggregatedRows);
const activity = {
watchTime: aggregatedRows.map((row) => ({ label: row.label, value: row.activeMin })),
cards: aggregatedRows.map((row) => ({ label: row.label, value: row.cards })),
@@ -724,6 +767,8 @@ export function getTrendsDashboard(
},
ratios: {
lookupsPerHundred: buildLookupsPerHundredWords(sessions, groupBy),
cardsPerHour: efficiency.cardsPerHour,
readingSpeed: efficiency.readingSpeed,
},
animeCumulative: {
watchTime: buildCumulativePerAnime(animePerDay.watchTime),
@@ -813,7 +813,7 @@ test('ensureSchema migrates legacy videos and backfills anime metadata from file
.all() as Array<{ canonical_title: string }>;
assert.deepEqual(
animeRows.map((row) => row.canonical_title),
['Frieren', 'Little Witch Academia'],
['Frieren', 'Little Witch Academia Season 2'],
);
const littleWitchRows = db
@@ -855,7 +855,7 @@ test('ensureSchema migrates legacy videos and backfills anime metadata from file
})),
[
{
animeTitle: 'Little Witch Academia',
animeTitle: 'Little Witch Academia Season 2',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedSeason: 2,
@@ -863,7 +863,7 @@ test('ensureSchema migrates legacy videos and backfills anime metadata from file
parserSource: 'fallback',
},
{
animeTitle: 'Little Witch Academia',
animeTitle: 'Little Witch Academia Season 2',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedSeason: 2,
+35 -3
View File
@@ -23,6 +23,7 @@ export interface TrackerPreparedStatements {
export interface AnimeRecordInput {
parsedTitle: string;
canonicalTitle: string;
seasonScope?: number | null;
anilistId: number | null;
titleRomaji: string | null;
titleEnglish: string | null;
@@ -300,6 +301,31 @@ export function normalizeAnimeIdentityKey(title: string): string {
.replace(/\s+/g, ' ');
}
function normalizeSeasonScope(value: number | null | undefined): number | null {
if (typeof value !== 'number' || !Number.isSafeInteger(value) || value <= 0) {
return null;
}
return value;
}
function titleAlreadyHasSeasonScope(title: string, season: number): boolean {
const normalized = title.normalize('NFKC').toLowerCase();
const padded = String(season).padStart(2, '0');
return (
new RegExp(`\\bseason\\s*0?${season}\\b`, 'i').test(normalized) ||
new RegExp(`\\bs0?${season}\\b`, 'i').test(normalized) ||
new RegExp(`\\bs${padded}\\b`, 'i').test(normalized)
);
}
function buildSeasonScopedAnimeTitle(title: string, season: number | null): string {
const trimmed = title.trim();
if (!trimmed || season === null || titleAlreadyHasSeasonScope(trimmed, season)) {
return trimmed;
}
return `${trimmed} Season ${season}`;
}
function looksLikeEpisodeOnlyTitle(title: string): boolean {
const normalized = title.normalize('NFKC').toLowerCase().replace(/\s+/g, ' ').trim();
return /^(episode|ep)\s*\d{1,3}$/.test(normalized) || /^第\s*\d{1,3}\s*話$/.test(normalized);
@@ -478,7 +504,12 @@ function ensureStatsExcludedWordsTable(db: DatabaseSync): void {
}
export function getOrCreateAnimeRecord(db: DatabaseSync, input: AnimeRecordInput): number {
const normalizedTitleKey = normalizeAnimeIdentityKey(input.parsedTitle);
const seasonScope = normalizeSeasonScope(input.seasonScope);
const identityTitle = buildSeasonScopedAnimeTitle(input.parsedTitle, seasonScope);
const canonicalTitle =
buildSeasonScopedAnimeTitle(input.canonicalTitle || input.parsedTitle, seasonScope) ||
identityTitle;
const normalizedTitleKey = normalizeAnimeIdentityKey(identityTitle);
if (!normalizedTitleKey) {
throw new Error('parsedTitle is required to create or update an anime record');
}
@@ -508,7 +539,7 @@ export function getOrCreateAnimeRecord(db: DatabaseSync, input: AnimeRecordInput
WHERE anime_id = ?
`,
).run(
input.canonicalTitle,
canonicalTitle,
input.anilistId,
input.titleRomaji,
input.titleEnglish,
@@ -539,7 +570,7 @@ export function getOrCreateAnimeRecord(db: DatabaseSync, input: AnimeRecordInput
)
.run(
normalizedTitleKey,
input.canonicalTitle,
canonicalTitle,
input.anilistId,
input.titleRomaji,
input.titleEnglish,
@@ -648,6 +679,7 @@ function migrateLegacyAnimeMetadata(db: DatabaseSync): void {
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: parsed.title,
canonicalTitle: parsed.title,
seasonScope: parsed.season,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
@@ -52,6 +52,11 @@ export interface ImmersionTrackerPolicy {
};
}
export interface JellyfinLinkRepairSummary {
scanned: number;
repaired: number;
}
export interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
@@ -367,6 +372,29 @@ export interface KanjiOccurrenceRow {
occurrenceCount: number;
}
export interface SentenceSearchResultRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
}
export interface SentenceSearchHeadwordTerm {
term: string;
headwords: string[];
}
export interface SentenceSearchOptions {
headwordTerms?: SentenceSearchHeadwordTerm[];
}
export interface SessionEventRow {
eventType: number;
tsMs: number;
+21
View File
@@ -235,6 +235,27 @@ test('dispatchMpvProtocolMessage prefers the already selected matching secondary
assert.deepEqual(state.commands, [{ command: ['set_property', 'secondary-sid', 3] }]);
});
test('dispatchMpvProtocolMessage skips signs and songs when choosing secondary subtitles', async () => {
const { deps, state } = createDeps({
getResolvedConfig: () => ({
secondarySub: { secondarySubLanguages: ['eng', 'en'] },
}),
});
await dispatchMpvProtocolMessage(
{
request_id: MPV_REQUEST_ID_TRACK_LIST_SECONDARY,
data: [
{ type: 'sub', id: 2, lang: 'eng', title: 'English Signs & Songs' },
{ type: 'sub', id: 3, lang: 'eng', title: 'English Dialogue' },
],
},
deps,
);
assert.deepEqual(state.commands, [{ command: ['set_property', 'secondary-sid', 3] }]);
});
test('dispatchMpvProtocolMessage restores secondary visibility on shutdown', async () => {
const { deps, state } = createDeps();
+14 -2
View File
@@ -149,6 +149,11 @@ function getSubtitleTrackIdentity(track: SubtitleTrackCandidate): string {
return `id:${track.id}`;
}
function isSignsOrSongsSubtitleTrack(track: SubtitleTrackCandidate): boolean {
const label = `${track.title} ${track.externalFilename ?? ''}`.toLowerCase();
return /\b(signs?|songs?)\b/.test(label);
}
function pickSecondarySubtitleTrackId(
tracks: Array<Record<string, unknown>>,
preferredLanguages: string[],
@@ -177,12 +182,19 @@ function pickSecondarySubtitleTrackId(
const uniqueTracks = [...dedupedTracks.values()];
for (const language of normalizedLanguages) {
const selectedMatch = uniqueTracks.find((track) => track.selected && track.lang === language);
const languageTracks = uniqueTracks.filter((track) => track.lang === language);
if (languageTracks.length === 0) {
continue;
}
const cleanTracks = languageTracks.filter((track) => !isSignsOrSongsSubtitleTrack(track));
const candidateTracks = cleanTracks.length > 0 ? cleanTracks : languageTracks;
const selectedMatch = candidateTracks.find((track) => track.selected);
if (selectedMatch) {
return selectedMatch.id;
}
const match = uniqueTracks.find((track) => track.lang === language);
const match = candidateTracks[0];
if (match) {
return match.id;
}
@@ -0,0 +1,484 @@
import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync, statSync } from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { runCommand, type CommandResult } from '../../subsync/utils';
import { parseSubtitleCues, type SubtitleCue } from './subtitle-cue-parser.js';
import { isEnglishYoutubeLang, normalizeYoutubeLangCode } from './youtube/labels.js';
const DEFAULT_SECONDARY_SUBTITLE_LANGUAGES = ['en', 'eng', 'english', 'en-us', 'enus'];
const DEFAULT_PRIMARY_SUBTITLE_LANGUAGES = ['ja', 'jpn', 'jp', 'japanese'];
const SUPPORTED_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass', '.ssa']);
const TIMING_TOLERANCE_SECONDS = 0.25;
const SAME_TIMING_EPSILON_SECONDS = 0.001;
const RETIMED_SUBTITLE_TIMEOUT_MS = 30_000;
const FALLBACK_ALASS_PATHS = [
'/opt/homebrew/bin/alass-cli',
'/opt/homebrew/bin/alass',
'/usr/local/bin/alass-cli',
'/usr/local/bin/alass',
'/usr/bin/alass',
];
type SidecarCandidate = {
path: string;
languageRank: number;
extensionRank: number;
name: string;
};
type RetimedSubtitleCacheEntry = {
path: string;
cleanupDir: string;
promise?: Promise<string>;
};
export type RetimedSubtitleCommandRunner = (
alassPath: string,
referencePath: string,
inputPath: string,
outputPath: string,
) => Promise<CommandResult>;
export type RetimedSecondarySubtitleInput = {
sourcePath: string;
startMs: number;
endMs: number;
languages?: readonly string[];
primaryLanguages?: readonly string[];
alassPath?: string | null;
runAlass?: RetimedSubtitleCommandRunner;
};
const retimedSubtitleCache = new Map<string, RetimedSubtitleCacheEntry>();
let retimedSubtitleCleanupRegistered = false;
function unique(values: string[]): string[] {
return values.filter((value, index) => value.length > 0 && values.indexOf(value) === index);
}
function expandPreferredLanguages(
languages: readonly string[] | undefined,
fallback: readonly string[],
): string[] {
const normalized = unique(
(languages ?? []).map((language) => normalizeYoutubeLangCode(language)).filter(Boolean),
);
const base = normalized.length > 0 ? normalized : [...fallback];
const expanded: string[] = [];
for (const language of base) {
expanded.push(language);
if (isEnglishYoutubeLang(language)) {
expanded.push(...DEFAULT_SECONDARY_SUBTITLE_LANGUAGES);
}
}
return unique(expanded);
}
function isExecutableFile(filePath: string): boolean {
try {
return statSync(filePath).isFile();
} catch {
return false;
}
}
function pathEntries(): string[] {
const entries = (process.env.PATH ?? '')
.split(path.delimiter)
.map((entry) => entry.trim())
.filter(Boolean);
return unique([...entries, ...FALLBACK_ALASS_PATHS.map((candidate) => path.dirname(candidate))]);
}
function executableNames(name: string): string[] {
if (process.platform !== 'win32') return [name];
const extensions = (process.env.PATHEXT ?? '.EXE;.CMD;.BAT')
.split(';')
.map((entry) => entry.trim())
.filter(Boolean);
if (path.extname(name)) return [name];
return [name, ...extensions.map((extension) => `${name}${extension}`)];
}
function findExecutable(names: readonly string[]): string {
for (const name of names) {
if (path.dirname(name) !== '.') {
return isExecutableFile(name) ? name : '';
}
}
for (const dir of pathEntries()) {
for (const name of names) {
for (const executableName of executableNames(name)) {
const candidate = path.join(dir, executableName);
if (isExecutableFile(candidate)) return candidate;
}
}
}
for (const candidate of FALLBACK_ALASS_PATHS) {
if (isExecutableFile(candidate)) return candidate;
}
return '';
}
function resolveAlassPath(configuredPath: string | null | undefined): string {
const trimmed = configuredPath?.trim() ?? '';
if (trimmed) {
return findExecutable([trimmed]);
}
return findExecutable(['alass', 'alass-cli']);
}
function fileSignature(filePath: string): string | null {
try {
const stats = statSync(filePath);
if (!stats.isFile()) return null;
return `${stats.size}:${stats.mtimeMs}`;
} catch {
return null;
}
}
function retimedCacheKey(
alassPath: string,
primaryPath: string,
secondaryPath: string,
): string | null {
const primarySignature = fileSignature(primaryPath);
const secondarySignature = fileSignature(secondaryPath);
if (!primarySignature || !secondarySignature) return null;
return [alassPath, primaryPath, primarySignature, secondaryPath, secondarySignature].join('\0');
}
function cleanupRetimedSubtitleCache(): void {
for (const entry of retimedSubtitleCache.values()) {
try {
rmSync(entry.cleanupDir, { recursive: true, force: true });
} catch {
// Best-effort temp cleanup.
}
}
retimedSubtitleCache.clear();
}
function registerRetimedSubtitleCleanup(): void {
if (retimedSubtitleCleanupRegistered) return;
retimedSubtitleCleanupRegistered = true;
process.once('exit', cleanupRetimedSubtitleCache);
}
export function clearRetimedSecondarySubtitleCache(): void {
cleanupRetimedSubtitleCache();
}
function splitLanguageSuffix(value: string): string[] {
const normalizedWhole = normalizeYoutubeLangCode(value);
const tokens = value
.split(/[^A-Za-z0-9-]+/g)
.map((token) => normalizeYoutubeLangCode(token))
.filter(Boolean);
return unique([normalizedWhole, ...tokens]);
}
function languageTokenMatches(token: string, preferredLanguage: string): boolean {
if (token === preferredLanguage) {
return true;
}
if (token.startsWith(`${preferredLanguage}-`) || preferredLanguage.startsWith(`${token}-`)) {
return true;
}
return isEnglishYoutubeLang(token) && isEnglishYoutubeLang(preferredLanguage);
}
function resolveLanguageRank(suffix: string, preferredLanguages: string[]): number {
const tokens = splitLanguageSuffix(suffix);
for (let index = 0; index < preferredLanguages.length; index += 1) {
const preferredLanguage = preferredLanguages[index]!;
if (tokens.some((token) => languageTokenMatches(token, preferredLanguage))) {
return index;
}
}
return Number.POSITIVE_INFINITY;
}
function extensionRank(ext: string): number {
if (ext === '.srt') return 0;
if (ext === '.vtt') return 1;
if (ext === '.ass') return 2;
if (ext === '.ssa') return 3;
return 4;
}
function findSidecarSubtitleCandidates(
sourcePath: string,
preferredLanguages: string[],
): SidecarCandidate[] {
const source = path.parse(sourcePath);
let entries: string[];
try {
entries = readdirSync(source.dir);
} catch {
return [];
}
const prefix = `${source.name}.`;
return entries
.map((entry) => {
const parsed = path.parse(entry);
const ext = parsed.ext.toLowerCase();
if (!SUPPORTED_SUBTITLE_EXTENSIONS.has(ext) || !parsed.name.startsWith(prefix)) {
return null;
}
const suffix = parsed.name.slice(prefix.length);
const languageRank = resolveLanguageRank(suffix, preferredLanguages);
if (!Number.isFinite(languageRank)) {
return null;
}
return {
path: path.join(source.dir, entry),
languageRank,
extensionRank: extensionRank(ext),
name: entry,
};
})
.filter((candidate): candidate is SidecarCandidate => candidate !== null)
.sort((left, right) => {
if (left.languageRank !== right.languageRank) return left.languageRank - right.languageRank;
if (left.extensionRank !== right.extensionRank)
return left.extensionRank - right.extensionRank;
return left.name.localeCompare(right.name);
});
}
function combineCueText(cues: SubtitleCue[]): string {
return unique(cues.map((cue) => cue.text.trim()).filter(Boolean))
.join('\n')
.trim();
}
function overlapSeconds(cue: SubtitleCue, startSeconds: number, endSeconds: number): number {
return (
Math.min(cue.endTime, endSeconds + TIMING_TOLERANCE_SECONDS) -
Math.max(cue.startTime, startSeconds - TIMING_TOLERANCE_SECONDS)
);
}
function isSameCueTiming(left: SubtitleCue, right: SubtitleCue): boolean {
return (
Math.abs(left.startTime - right.startTime) <= SAME_TIMING_EPSILON_SECONDS &&
Math.abs(left.endTime - right.endTime) <= SAME_TIMING_EPSILON_SECONDS
);
}
function compareCueTimingMatch(
startSeconds: number,
endSeconds: number,
left: { cue: SubtitleCue; overlap: number },
right: { cue: SubtitleCue; overlap: number },
): number {
if (left.overlap !== right.overlap) {
return right.overlap - left.overlap;
}
const leftStartDistance = Math.abs(left.cue.startTime - startSeconds);
const rightStartDistance = Math.abs(right.cue.startTime - startSeconds);
if (leftStartDistance !== rightStartDistance) {
return leftStartDistance - rightStartDistance;
}
const leftEndDistance = Math.abs(left.cue.endTime - endSeconds);
const rightEndDistance = Math.abs(right.cue.endTime - endSeconds);
if (leftEndDistance !== rightEndDistance) {
return leftEndDistance - rightEndDistance;
}
return left.cue.startTime - right.cue.startTime;
}
function findCueTextAtTiming(cues: SubtitleCue[], startMs: number, endMs: number): string {
const startSeconds = startMs / 1000;
const endSeconds = endMs / 1000;
const midpointSeconds = (startSeconds + endSeconds) / 2;
const midpointMatches = cues
.filter(
(cue) =>
cue.startTime - TIMING_TOLERANCE_SECONDS <= midpointSeconds &&
cue.endTime + TIMING_TOLERANCE_SECONDS >= midpointSeconds,
)
.map((cue) => ({ cue, overlap: overlapSeconds(cue, startSeconds, endSeconds) }))
.sort((left, right) => compareCueTimingMatch(startSeconds, endSeconds, left, right));
const [bestMidpointMatch] = midpointMatches;
const midpointText = bestMidpointMatch
? combineCueText(
midpointMatches
.filter((match) => isSameCueTiming(match.cue, bestMidpointMatch.cue))
.map((match) => match.cue),
)
: '';
if (midpointText) {
return midpointText;
}
const [bestOverlap] = cues
.map((cue) => ({ cue, overlap: overlapSeconds(cue, startSeconds, endSeconds) }))
.filter((entry) => entry.overlap > 0)
.sort((left, right) => compareCueTimingMatch(startSeconds, endSeconds, left, right));
return bestOverlap ? bestOverlap.cue.text.trim() : '';
}
function readCueTextAtTiming(filePath: string, startMs: number, endMs: number): string {
const content = readFileSync(filePath, 'utf8');
const cues = parseSubtitleCues(content, filePath);
return findCueTextAtTiming(cues, startMs, endMs);
}
async function defaultRunAlass(
alassPath: string,
referencePath: string,
inputPath: string,
outputPath: string,
): Promise<CommandResult> {
return runCommand(alassPath, [referencePath, inputPath, outputPath], RETIMED_SUBTITLE_TIMEOUT_MS);
}
async function retimeSecondarySubtitle(input: {
alassPath: string;
primaryPath: string;
secondaryPath: string;
runAlass: RetimedSubtitleCommandRunner;
}): Promise<string> {
const key = retimedCacheKey(input.alassPath, input.primaryPath, input.secondaryPath);
if (!key) return '';
const cached = retimedSubtitleCache.get(key);
if (cached?.promise) {
return cached.promise;
}
if (cached && existsSync(cached.path)) {
return cached.path;
}
if (cached) {
retimedSubtitleCache.delete(key);
try {
rmSync(cached.cleanupDir, { recursive: true, force: true });
} catch {}
}
registerRetimedSubtitleCleanup();
const cleanupDir = mkdtempSync(path.join(os.tmpdir(), 'subminer-retimed-secondary-'));
const parsedSecondary = path.parse(input.secondaryPath);
const outputPath = path.join(
cleanupDir,
`${parsedSecondary.name}.retimed${parsedSecondary.ext || '.srt'}`,
);
const entry: RetimedSubtitleCacheEntry = { path: outputPath, cleanupDir };
entry.promise = input
.runAlass(input.alassPath, input.primaryPath, input.secondaryPath, outputPath)
.then((result) => {
if (!result.ok || !existsSync(outputPath)) {
rmSync(cleanupDir, { recursive: true, force: true });
retimedSubtitleCache.delete(key);
return '';
}
entry.promise = undefined;
return outputPath;
})
.catch(() => {
rmSync(cleanupDir, { recursive: true, force: true });
retimedSubtitleCache.delete(key);
return '';
});
retimedSubtitleCache.set(key, entry);
return entry.promise;
}
export function resolveSecondarySubtitleTextFromSidecar(input: {
sourcePath: string;
startMs: number;
endMs: number;
languages?: readonly string[];
}): string {
if (!input.sourcePath || !existsSync(input.sourcePath)) {
return '';
}
try {
if (!statSync(input.sourcePath).isFile()) {
return '';
}
} catch {
return '';
}
const preferredLanguages = expandPreferredLanguages(
input.languages,
DEFAULT_SECONDARY_SUBTITLE_LANGUAGES,
);
const candidates = findSidecarSubtitleCandidates(input.sourcePath, preferredLanguages);
for (const candidate of candidates) {
try {
const text = readCueTextAtTiming(candidate.path, input.startMs, input.endMs);
if (text) {
return text;
}
} catch {
// Try the next matching sidecar.
}
}
return '';
}
export async function resolveRetimedSecondarySubtitleTextFromSidecar(
input: RetimedSecondarySubtitleInput,
): Promise<string> {
if (!input.sourcePath || !existsSync(input.sourcePath)) {
return '';
}
try {
if (!statSync(input.sourcePath).isFile()) {
return '';
}
} catch {
return '';
}
const alassPath = resolveAlassPath(input.alassPath);
if (!alassPath) return '';
const primaryLanguages = expandPreferredLanguages(
input.primaryLanguages,
DEFAULT_PRIMARY_SUBTITLE_LANGUAGES,
);
const secondaryLanguages = expandPreferredLanguages(
input.languages,
DEFAULT_SECONDARY_SUBTITLE_LANGUAGES,
);
const primaryCandidates = findSidecarSubtitleCandidates(input.sourcePath, primaryLanguages);
const secondaryCandidates = findSidecarSubtitleCandidates(input.sourcePath, secondaryLanguages);
const runAlass = input.runAlass ?? defaultRunAlass;
for (const primary of primaryCandidates) {
for (const secondary of secondaryCandidates) {
if (primary.path === secondary.path) continue;
try {
const retimedPath = await retimeSecondarySubtitle({
alassPath,
primaryPath: primary.path,
secondaryPath: secondary.path,
runAlass,
});
if (!retimedPath) continue;
const text = readCueTextAtTiming(retimedPath, input.startMs, input.endMs);
if (text) return text;
} catch {
// Try the next sidecar pair.
}
}
}
return '';
}
+487 -56
View File
@@ -1,5 +1,6 @@
import { Hono } from 'hono';
import type { ImmersionTrackerService } from './immersion-tracker-service.js';
import { splitSentenceSearchTerms } from './immersion-tracker/query-lexical.js';
import http, { type IncomingMessage, type ServerResponse } from 'node:http';
import { basename, extname, resolve, sep } from 'node:path';
import { readFileSync, existsSync, statSync } from 'node:fs';
@@ -7,6 +8,7 @@ import { Readable } from 'node:stream';
import { MediaGenerator } from '../../media-generator.js';
import { AnkiConnectClient } from '../../anki-connect.js';
import type { AnkiConnectConfig } from '../../types.js';
import { createLogger } from '../../logger.js';
import {
getConfiguredSentenceFieldName,
getConfiguredTranslationFieldName,
@@ -15,18 +17,50 @@ import {
} from '../../anki-field-config.js';
import { resolveAnimatedImageLeadInSeconds } from '../../anki-integration/animated-image-sync.js';
import type { AnilistRateLimiter } from './anilist/rate-limiter.js';
import {
resolveRetimedSecondarySubtitleTextFromSidecar,
resolveSecondarySubtitleTextFromSidecar,
type RetimedSecondarySubtitleInput,
} from './secondary-subtitle-sidecar.js';
type StatsServerNoteInfo = {
noteId: number;
fields: Record<string, { value: string }>;
};
type StatsServerMediaGenerator = {
generateAudio: (...args: Parameters<MediaGenerator['generateAudio']>) => Promise<Buffer | null>;
generateScreenshot: (
...args: Parameters<MediaGenerator['generateScreenshot']>
) => Promise<Buffer | null>;
generateAnimatedImage: (
...args: Parameters<MediaGenerator['generateAnimatedImage']>
) => Promise<Buffer | null>;
};
export type StatsMiningTimingEvent = {
mode: 'word' | 'sentence' | 'audio';
phase: string;
elapsedMs: number;
noteId?: number;
};
type StatsExcludedWordPayload = {
headword: string;
word: string;
reading: string;
};
type StatsCoverImagePayload = {
contentType: string;
dataUrl: string;
} | null;
type StatsCoverBatchBody = {
animeIds?: unknown;
videoIds?: unknown;
};
function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number {
if (raw === undefined) return fallback;
const n = Number(raw);
@@ -73,6 +107,62 @@ function parseExcludedWordsBody(body: unknown): StatsExcludedWordPayload[] | nul
return words;
}
function parsePositiveIdList(raw: unknown, maxItems = 100): number[] {
if (!Array.isArray(raw)) return [];
const ids = new Set<number>();
for (const rawId of raw) {
const id = typeof rawId === 'number' ? rawId : typeof rawId === 'string' ? Number(rawId) : NaN;
if (Number.isFinite(id) && id > 0) {
ids.add(Math.floor(id));
if (ids.size >= maxItems) break;
}
}
return Array.from(ids).sort((a, b) => a - b);
}
function coverImagePayload(
art: { coverBlob?: Uint8Array | null } | null | undefined,
): StatsCoverImagePayload {
if (!art?.coverBlob) return null;
const bytes = new Uint8Array(art.coverBlob);
const contentType = detectImageContentType(bytes);
return {
contentType,
dataUrl: `data:${contentType};base64,${Buffer.from(bytes).toString('base64')}`,
};
}
function detectImageContentType(bytes: Uint8Array): string {
if (
bytes.length >= 8 &&
bytes[0] === 0x89 &&
bytes[1] === 0x50 &&
bytes[2] === 0x4e &&
bytes[3] === 0x47
) {
return 'image/png';
}
if (bytes.length >= 3 && bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff) {
return 'image/jpeg';
}
if (
bytes.length >= 12 &&
bytes[0] === 0x52 &&
bytes[1] === 0x49 &&
bytes[2] === 0x46 &&
bytes[3] === 0x46 &&
bytes[8] === 0x57 &&
bytes[9] === 0x45 &&
bytes[10] === 0x42 &&
bytes[11] === 0x50
) {
return 'image/webp';
}
return 'application/octet-stream';
}
function resolveStatsNoteFieldName(
noteInfo: StatsServerNoteInfo,
...preferredNames: (string | undefined)[]
@@ -87,6 +177,57 @@ function resolveStatsNoteFieldName(
return null;
}
function uniqueFieldNames(...fieldNames: (string | null | undefined)[]): string[] {
const seen = new Set<string>();
const result: string[] = [];
for (const fieldName of fieldNames) {
const normalized = fieldName?.trim();
if (!normalized) continue;
const key = normalized.toLowerCase();
if (seen.has(key)) continue;
seen.add(key);
result.push(normalized);
}
return result;
}
function getStatsWordMiningAudioFieldName(
ankiConfig: AnkiConnectConfig,
noteInfo: StatsServerNoteInfo | null,
): string {
return (
(noteInfo
? resolveStatsNoteFieldName(noteInfo, 'SentenceAudio', ankiConfig.fields?.audio)
: null) ??
ankiConfig.fields?.audio ??
'ExpressionAudio'
);
}
function getStatsDirectMiningAudioFieldNames(
ankiConfig: AnkiConnectConfig,
noteInfo: StatsServerNoteInfo | null,
mode: 'sentence' | 'audio',
): string[] {
const configuredAudioField = ankiConfig.fields?.audio ?? 'ExpressionAudio';
if (!ankiConfig.isLapis?.enabled && !ankiConfig.isKiku?.enabled) {
return [configuredAudioField];
}
const sentenceAudioField = noteInfo
? resolveStatsNoteFieldName(noteInfo, 'SentenceAudio', configuredAudioField)
: 'SentenceAudio';
const expressionAudioField = noteInfo
? resolveStatsNoteFieldName(noteInfo, configuredAudioField)
: configuredAudioField;
if (mode === 'sentence') {
return uniqueFieldNames(sentenceAudioField);
}
return uniqueFieldNames(sentenceAudioField, expressionAudioField);
}
function toFetchHeaders(headers: IncomingMessage['headers']): Headers {
const fetchHeaders = new Headers();
for (const [name, value] of Object.entries(headers)) {
@@ -256,9 +397,19 @@ export interface StatsServerConfig {
knownWordCachePath?: string;
mpvSocketPath?: string;
ankiConnectConfig?: AnkiConnectConfig;
getAnkiConnectConfig?: () => AnkiConnectConfig | undefined;
getYomitanAnkiDeckName?: () => Promise<string | null | undefined> | string | null | undefined;
secondarySubtitleLanguages?: string[];
getSecondarySubtitleLanguages?: () => string[] | undefined;
statsMiningAlassPath?: string;
getStatsMiningAlassPath?: () => string | null | undefined;
resolveRetimedSecondarySubtitleText?: (
input: RetimedSecondarySubtitleInput,
) => Promise<string> | string;
anilistRateLimiter?: AnilistRateLimiter;
addYomitanNote?: (word: string) => Promise<number | null>;
resolveAnkiNoteId?: (noteId: number) => number;
resolveSentenceSearchHeadwords?: (term: string) => Promise<string[]> | string[];
}
const STATS_STATIC_CONTENT_TYPES: Record<string, string> = {
@@ -279,6 +430,52 @@ const STATS_STATIC_CONTENT_TYPES: Record<string, string> = {
'.woff2': 'font/woff2',
};
const ANKI_CONNECT_FETCH_TIMEOUT_MS = 3_000;
const statsMiningLogger = createLogger('stats:mining');
function defaultNowMs(): number {
return Date.now();
}
function parseBooleanQuery(raw: string | undefined, fallback: boolean): boolean {
if (raw === undefined) return fallback;
const normalized = raw.trim().toLowerCase();
if (!normalized) return fallback;
return !['0', 'false', 'no', 'off'].includes(normalized);
}
function uniqueNonEmptyStrings(values: readonly string[]): string[] {
const seen = new Set<string>();
const result: string[] = [];
for (const value of values) {
const normalized = value.trim();
if (!normalized || seen.has(normalized)) continue;
seen.add(normalized);
result.push(normalized);
}
return result;
}
async function buildSentenceSearchOptions(
query: string,
searchByHeadword: boolean,
resolveSentenceSearchHeadwords: ((term: string) => Promise<string[]> | string[]) | undefined,
): Promise<{ headwordTerms: Array<{ term: string; headwords: string[] }> } | undefined> {
if (!searchByHeadword) return undefined;
const terms = splitSentenceSearchTerms(query);
const headwordTerms: Array<{ term: string; headwords: string[] }> = [];
for (const term of terms) {
const resolved = resolveSentenceSearchHeadwords
? await resolveSentenceSearchHeadwords(term)
: [term];
const headwords = uniqueNonEmptyStrings(resolved);
if (headwords.length > 0) {
headwordTerms.push({ term, headwords });
}
}
return headwordTerms.length > 0 ? { headwordTerms } : undefined;
}
function buildAnkiNotePreview(
fields: Record<string, { value: string }>,
@@ -340,12 +537,81 @@ export function createStatsApp(
knownWordCachePath?: string;
mpvSocketPath?: string;
ankiConnectConfig?: AnkiConnectConfig;
getAnkiConnectConfig?: () => AnkiConnectConfig | undefined;
getYomitanAnkiDeckName?: () => Promise<string | null | undefined> | string | null | undefined;
secondarySubtitleLanguages?: string[];
getSecondarySubtitleLanguages?: () => string[] | undefined;
statsMiningAlassPath?: string;
getStatsMiningAlassPath?: () => string | null | undefined;
resolveRetimedSecondarySubtitleText?: (
input: RetimedSecondarySubtitleInput,
) => Promise<string> | string;
anilistRateLimiter?: AnilistRateLimiter;
addYomitanNote?: (word: string) => Promise<number | null>;
resolveAnkiNoteId?: (noteId: number) => number;
resolveSentenceSearchHeadwords?: (term: string) => Promise<string[]> | string[];
createMediaGenerator?: () => StatsServerMediaGenerator;
onMiningTiming?: (event: StatsMiningTimingEvent) => void;
nowMs?: () => number;
},
) {
const app = new Hono();
const nowMs = options?.nowMs ?? defaultNowMs;
const getAnkiConnectConfig = (): AnkiConnectConfig | undefined =>
options?.getAnkiConnectConfig?.() ?? options?.ankiConnectConfig;
const getSecondarySubtitleLanguages = (): string[] =>
options?.getSecondarySubtitleLanguages?.() ?? options?.secondarySubtitleLanguages ?? [];
const getStatsMiningAlassPath = (): string | null | undefined =>
options?.getStatsMiningAlassPath?.() ?? options?.statsMiningAlassPath;
const getEffectiveMiningDeckName = async (ankiConfig: AnkiConnectConfig): Promise<string> => {
const configuredDeckName = ankiConfig.deck?.trim() ?? '';
if (configuredDeckName) return configuredDeckName;
try {
const yomitanDeckName = await options?.getYomitanAnkiDeckName?.();
return typeof yomitanDeckName === 'string' ? yomitanDeckName.trim() : '';
} catch (error) {
statsMiningLogger.warn(
'Failed to resolve Yomitan Anki deck for stats mining:',
error instanceof Error ? error.message : String(error),
);
return '';
}
};
const recordMiningTiming = (event: StatsMiningTimingEvent): void => {
options?.onMiningTiming?.(event);
statsMiningLogger.debug(
`[stats:mining] ${event.mode} ${event.phase} ${Math.round(event.elapsedMs)}ms`,
event,
);
};
const timeMiningPhase = async <T>(
mode: StatsMiningTimingEvent['mode'],
phase: string,
fn: () => Promise<T>,
details?: (value: T) => Partial<StatsMiningTimingEvent>,
): Promise<T> => {
const startedAtMs = nowMs();
try {
const value = await fn();
recordMiningTiming({
mode,
phase,
elapsedMs: nowMs() - startedAtMs,
...details?.(value),
});
return value;
} catch (err) {
recordMiningTiming({
mode,
phase,
elapsedMs: nowMs() - startedAtMs,
});
throw err;
}
};
app.get('/api/stats/overview', async (c) => {
const [rawSessions, rollups, hints] = await Promise.all([
@@ -509,6 +775,20 @@ export function createStatsApp(
return c.json(occurrences);
});
app.get('/api/stats/sentences/search', async (c) => {
const query = (c.req.query('q') ?? '').trim();
if (!query) return c.json([]);
const limit = parseIntQuery(c.req.query('limit'), 50, 100);
const searchByHeadword = parseBooleanQuery(c.req.query('headword'), true);
const searchOptions = await buildSentenceSearchOptions(
query,
searchByHeadword,
options?.resolveSentenceSearchHeadwords,
);
const rows = await tracker.searchSubtitleSentences(query, limit, searchOptions);
return c.json(rows);
});
app.get('/api/stats/kanji', async (c) => {
const limit = parseIntQuery(c.req.query('limit'), 100, 500);
const kanji = await tracker.getKanjiStats(limit);
@@ -707,14 +987,36 @@ export function createStatsApp(
return c.json({ ok: true });
});
app.post('/api/stats/covers', async (c) => {
const body = (await c.req.json().catch(() => null)) as StatsCoverBatchBody | null;
const animeIds = parsePositiveIdList(body?.animeIds);
const videoIds = parsePositiveIdList(body?.videoIds);
const anime: Record<number, StatsCoverImagePayload> = {};
const media: Record<number, StatsCoverImagePayload> = {};
await Promise.all(
animeIds.map(async (animeId) => {
anime[animeId] = coverImagePayload(await tracker.getAnimeCoverArt(animeId));
}),
);
await Promise.all(
videoIds.map(async (videoId) => {
media[videoId] = coverImagePayload(await tracker.getCoverArt(videoId));
}),
);
return c.json({ anime, media });
});
app.get('/api/stats/anime/:animeId/cover', async (c) => {
const animeId = parseIntQuery(c.req.param('animeId'), 0);
if (animeId <= 0) return c.body(null, 404);
const art = await tracker.getAnimeCoverArt(animeId);
if (!art?.coverBlob) return c.body(null, 404);
return new Response(new Uint8Array(art.coverBlob), {
const bytes = new Uint8Array(art.coverBlob);
return new Response(bytes, {
headers: {
'Content-Type': 'image/jpeg',
'Content-Type': detectImageContentType(bytes),
'Cache-Control': 'public, max-age=86400',
},
});
@@ -729,9 +1031,10 @@ export function createStatsApp(
art = await tracker.getCoverArt(videoId);
}
if (!art?.coverBlob) return c.body(null, 404);
return new Response(new Uint8Array(art.coverBlob), {
const bytes = new Uint8Array(art.coverBlob);
return new Response(bytes, {
headers: {
'Content-Type': 'image/jpeg',
'Content-Type': detectImageContentType(bytes),
'Cache-Control': 'public, max-age=604800',
},
});
@@ -754,8 +1057,9 @@ export function createStatsApp(
app.post('/api/stats/anki/browse', async (c) => {
const noteId = parseIntQuery(c.req.query('noteId'), 0);
if (noteId <= 0) return c.body(null, 400);
const ankiConfig = getAnkiConnectConfig();
try {
const response = await fetch('http://127.0.0.1:8765', {
const response = await fetch(ankiConfig?.url ?? 'http://127.0.0.1:8765', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
signal: AbortSignal.timeout(ANKI_CONNECT_FETCH_TIMEOUT_MS),
@@ -791,7 +1095,8 @@ export function createStatsApp(
),
);
try {
const response = await fetch('http://127.0.0.1:8765', {
const ankiConfig = getAnkiConnectConfig();
const response = await fetch(ankiConfig?.url ?? 'http://127.0.0.1:8765', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
signal: AbortSignal.timeout(ANKI_CONNECT_FETCH_TIMEOUT_MS),
@@ -807,7 +1112,7 @@ export function createStatsApp(
return c.json(
(result.result ?? []).map((note) => ({
...note,
preview: buildAnkiNotePreview(note.fields, options?.ankiConnectConfig),
preview: buildAnkiNotePreview(note.fields, ankiConfig),
})),
);
} catch {
@@ -822,7 +1127,8 @@ export function createStatsApp(
const endMs = typeof body?.endMs === 'number' ? body.endMs : NaN;
const sentence = typeof body?.sentence === 'string' ? body.sentence.trim() : '';
const word = typeof body?.word === 'string' ? body.word.trim() : '';
const secondaryText = typeof body?.secondaryText === 'string' ? body.secondaryText.trim() : '';
const bodySecondaryText =
typeof body?.secondaryText === 'string' ? body.secondaryText.trim() : '';
const videoTitle = typeof body?.videoTitle === 'string' ? body.videoTitle.trim() : '';
const rawMode = c.req.query('mode');
const mode = rawMode === 'audio' ? 'audio' : rawMode === 'word' ? 'word' : 'sentence';
@@ -830,18 +1136,51 @@ export function createStatsApp(
if (!sourcePath || !sentence || !Number.isFinite(startMs) || !Number.isFinite(endMs)) {
return c.json({ error: 'sourcePath, sentence, startMs, and endMs are required' }, 400);
}
if (endMs <= startMs) {
return c.json({ error: 'endMs must be greater than startMs' }, 400);
}
if (!existsSync(sourcePath)) {
return c.json({ error: 'File not found' }, 404);
}
const ankiConfig = options?.ankiConnectConfig;
const ankiConfig = getAnkiConnectConfig();
if (!ankiConfig) {
return c.json({ error: 'AnkiConnect is not configured' }, 500);
}
const secondarySubtitleLanguages = getSecondarySubtitleLanguages();
let retimedSecondaryText = '';
if (mode === 'sentence' && !bodySecondaryText) {
try {
retimedSecondaryText = await (
options?.resolveRetimedSecondarySubtitleText ??
resolveRetimedSecondarySubtitleTextFromSidecar
)({
sourcePath,
startMs,
endMs,
languages: secondarySubtitleLanguages,
alassPath: getStatsMiningAlassPath(),
});
} catch (error) {
statsMiningLogger.warn(
'Failed to resolve retimed secondary subtitle for stats mining:',
error instanceof Error ? error.message : String(error),
);
}
}
const secondaryText =
bodySecondaryText ||
retimedSecondaryText ||
resolveSecondarySubtitleTextFromSidecar({
sourcePath,
startMs,
endMs,
languages: secondarySubtitleLanguages,
});
const client = new AnkiConnectClient(ankiConfig.url ?? 'http://127.0.0.1:8765');
const mediaGen = new MediaGenerator();
const mediaGen = options?.createMediaGenerator?.() ?? new MediaGenerator();
const audioPadding = ankiConfig.media?.audioPadding ?? 0;
const maxMediaDuration = ankiConfig.media?.maxMediaDuration ?? 30;
@@ -865,7 +1204,9 @@ export function createStatsApp(
imageType === 'avif' && ankiConfig.media?.syncAnimatedImageToWordAudio !== false;
const audioPromise = generateAudio
? mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding)
? timeMiningPhase(mode, 'generateAudio', () =>
mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding),
)
: Promise.resolve(null);
const createImagePromise = (animatedLeadInSeconds = 0): Promise<Buffer | null> => {
@@ -874,22 +1215,26 @@ export function createStatsApp(
}
if (imageType === 'avif') {
return mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, {
fps: ankiConfig.media?.animatedFps ?? 10,
maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640,
maxHeight: ankiConfig.media?.animatedMaxHeight,
crf: ankiConfig.media?.animatedCrf ?? 35,
leadingStillDuration: animatedLeadInSeconds,
});
return timeMiningPhase(mode, 'generateAnimatedImage', () =>
mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, {
fps: ankiConfig.media?.animatedFps ?? 10,
maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640,
maxHeight: ankiConfig.media?.animatedMaxHeight,
crf: ankiConfig.media?.animatedCrf ?? 35,
leadingStillDuration: animatedLeadInSeconds,
}),
);
}
const midpointSec = (startSec + clampedEndSec) / 2;
return mediaGen.generateScreenshot(sourcePath, midpointSec, {
format: ankiConfig.media?.imageFormat ?? 'jpg',
quality: ankiConfig.media?.imageQuality ?? 92,
maxWidth: ankiConfig.media?.imageMaxWidth,
maxHeight: ankiConfig.media?.imageMaxHeight,
});
return timeMiningPhase(mode, 'generateScreenshot', () =>
mediaGen.generateScreenshot(sourcePath, midpointSec, {
format: ankiConfig.media?.imageFormat ?? 'jpg',
quality: ankiConfig.media?.imageQuality ?? 92,
maxWidth: ankiConfig.media?.imageMaxWidth,
maxHeight: ankiConfig.media?.imageMaxHeight,
}),
);
};
const imagePromise =
@@ -899,6 +1244,25 @@ export function createStatsApp(
const errors: string[] = [];
let noteId: number;
let effectiveDeckNamePromise: Promise<string> | null = null;
const getEffectiveDeckNameForRequest = (): Promise<string> => {
effectiveDeckNamePromise ??= getEffectiveMiningDeckName(ankiConfig);
return effectiveDeckNamePromise;
};
const moveNoteToConfiguredDeck = async (id: number): Promise<void> => {
const deckName = await getEffectiveDeckNameForRequest();
if (!deckName) {
return;
}
try {
const cardIds = await timeMiningPhase(mode, 'findCards', () =>
client.findCards(`nid:${id}`),
);
await timeMiningPhase(mode, 'changeDeck', () => client.changeDeck(cardIds, deckName));
} catch (err) {
errors.push(`deck: ${(err as Error).message}`);
}
};
if (mode === 'word') {
if (!options?.addYomitanNote) {
@@ -906,7 +1270,12 @@ export function createStatsApp(
}
const [yomitanResult, audioResult, imageResult] = await Promise.allSettled([
options.addYomitanNote(word),
timeMiningPhase(
'word',
'addYomitanNote',
() => options.addYomitanNote!(word),
(noteId) => (typeof noteId === 'number' ? { noteId } : {}),
),
audioPromise,
imagePromise,
]);
@@ -921,6 +1290,7 @@ export function createStatsApp(
}
noteId = yomitanResult.value;
await moveNoteToConfiguredDeck(noteId);
const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
if (audioResult.status === 'rejected')
errors.push(`audio: ${(audioResult.reason as Error).message}`);
@@ -928,10 +1298,19 @@ export function createStatsApp(
errors.push(`image: ${(imageResult.reason as Error).message}`);
let imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
if (syncAnimatedImageToWordAudio && generateImage) {
let noteInfo: StatsServerNoteInfo | null = null;
if (audioBuffer || (syncAnimatedImageToWordAudio && generateImage)) {
try {
const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[];
const noteInfo = noteInfoResult[0] ?? null;
noteInfo = noteInfoResult[0] ?? null;
} catch (err) {
if (syncAnimatedImageToWordAudio && generateImage) {
errors.push(`image: ${(err as Error).message}`);
}
}
}
if (syncAnimatedImageToWordAudio && generateImage) {
try {
const animatedLeadInSeconds = noteInfo
? await resolveAnimatedImageLeadInSeconds({
config: ankiConfig,
@@ -946,22 +1325,27 @@ export function createStatsApp(
errors.push(`image: ${(err as Error).message}`);
}
}
if (generateAudio && !audioBuffer && audioResult.status === 'fulfilled') {
errors.push('audio: no audio generated');
}
if (generateImage && !imageBuffer) {
errors.push('image: no image generated');
}
const mediaFields: Record<string, string> = {};
const timestamp = Date.now();
const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence';
const audioFieldName = ankiConfig.fields?.audio ?? 'ExpressionAudio';
const audioFieldName = getStatsWordMiningAudioFieldName(ankiConfig, noteInfo);
const imageFieldName = ankiConfig.fields?.image ?? 'Picture';
mediaFields[sentenceFieldName] = highlightedSentence;
if (secondaryText) {
mediaFields[ankiConfig.fields?.translation ?? 'SelectionText'] = secondaryText;
}
if (audioBuffer) {
const audioFilename = `subminer_audio_${timestamp}.mp3`;
try {
await client.storeMediaFile(audioFilename, audioBuffer);
await timeMiningPhase('word', 'uploadAudio', () =>
client.storeMediaFile(audioFilename, audioBuffer),
);
mediaFields[audioFieldName] = `[sound:${audioFilename}]`;
} catch (err) {
errors.push(`audio upload: ${(err as Error).message}`);
@@ -972,7 +1356,9 @@ export function createStatsApp(
const imageExt = imageType === 'avif' ? 'avif' : (ankiConfig.media?.imageFormat ?? 'jpg');
const imageFilename = `subminer_image_${timestamp}.${imageExt}`;
try {
await client.storeMediaFile(imageFilename, imageBuffer);
await timeMiningPhase('word', 'uploadImage', () =>
client.storeMediaFile(imageFilename, imageBuffer),
);
mediaFields[imageFieldName] = `<img src="${imageFilename}">`;
} catch (err) {
errors.push(`image upload: ${(err as Error).message}`);
@@ -1000,7 +1386,9 @@ export function createStatsApp(
if (Object.keys(mediaFields).length > 0) {
try {
await client.updateNoteFields(noteId, mediaFields);
await timeMiningPhase('word', 'updateNoteFields', () =>
client.updateNoteFields(noteId, mediaFields),
);
} catch (err) {
errors.push(`update fields: ${(err as Error).message}`);
}
@@ -1009,32 +1397,24 @@ export function createStatsApp(
return c.json({ noteId, ...(errors.length > 0 ? { errors } : {}) });
}
const [audioResult, imageResult] = await Promise.allSettled([audioPromise, imagePromise]);
const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
if (audioResult.status === 'rejected')
errors.push(`audio: ${(audioResult.reason as Error).message}`);
if (imageResult.status === 'rejected')
errors.push(`image: ${(imageResult.reason as Error).message}`);
const wordFieldName = getConfiguredWordFieldName(ankiConfig);
const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence';
const translationFieldName = ankiConfig.fields?.translation ?? 'SelectionText';
const audioFieldName = ankiConfig.fields?.audio ?? 'ExpressionAudio';
const imageFieldName = ankiConfig.fields?.image ?? 'Picture';
const miscInfoFieldName = ankiConfig.fields?.miscInfo ?? '';
const fields: Record<string, string> = {
[sentenceFieldName]: highlightedSentence,
[sentenceFieldName]: mode === 'sentence' ? sentence : highlightedSentence,
};
if (secondaryText) {
if (mode === 'sentence' && secondaryText) {
fields[translationFieldName] = secondaryText;
}
if (ankiConfig.isLapis?.enabled || ankiConfig.isKiku?.enabled) {
if (word) {
if (mode === 'sentence') {
fields[wordFieldName] = sentence;
} else if (word) {
fields[wordFieldName] = word;
}
if (mode === 'sentence') {
@@ -1045,23 +1425,62 @@ export function createStatsApp(
}
const model = ankiConfig.isLapis?.sentenceCardModel || 'Basic';
const deck = ankiConfig.deck ?? 'Default';
const tags = ankiConfig.tags ?? ['SubMiner'];
try {
noteId = await client.addNote(deck, model, fields, tags);
} catch (err) {
return c.json({ error: `Failed to add note: ${(err as Error).message}` }, 502);
const addNotePromise = timeMiningPhase(
mode,
'addNote',
async () =>
client.addNote((await getEffectiveDeckNameForRequest()) || 'Default', model, fields, tags),
(id) => ({
noteId: id,
}),
);
const [audioResult, imageResult, addNoteResult] = await Promise.allSettled([
audioPromise,
imagePromise,
addNotePromise,
]);
const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
if (audioResult.status === 'rejected')
errors.push(`audio: ${(audioResult.reason as Error).message}`);
if (imageResult.status === 'rejected')
errors.push(`image: ${(imageResult.reason as Error).message}`);
if (addNoteResult.status === 'rejected') {
return c.json(
{ error: `Failed to add note: ${(addNoteResult.reason as Error).message}` },
502,
);
}
noteId = addNoteResult.value;
await moveNoteToConfiguredDeck(noteId);
const mediaFields: Record<string, string> = {};
const timestamp = Date.now();
let noteInfo: StatsServerNoteInfo | null = null;
if (audioBuffer) {
try {
const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[];
noteInfo = noteInfoResult[0] ?? null;
} catch {
noteInfo = null;
}
}
if (audioBuffer) {
const audioFilename = `subminer_audio_${timestamp}.mp3`;
try {
await client.storeMediaFile(audioFilename, audioBuffer);
mediaFields[audioFieldName] = `[sound:${audioFilename}]`;
await timeMiningPhase(mode, 'uploadAudio', () =>
client.storeMediaFile(audioFilename, audioBuffer),
);
const audioValue = `[sound:${audioFilename}]`;
for (const fieldName of getStatsDirectMiningAudioFieldNames(ankiConfig, noteInfo, mode)) {
mediaFields[fieldName] = audioValue;
}
} catch (err) {
errors.push(`audio upload: ${(err as Error).message}`);
}
@@ -1071,7 +1490,9 @@ export function createStatsApp(
const imageExt = imageType === 'avif' ? 'avif' : (ankiConfig.media?.imageFormat ?? 'jpg');
const imageFilename = `subminer_image_${timestamp}.${imageExt}`;
try {
await client.storeMediaFile(imageFilename, imageBuffer);
await timeMiningPhase(mode, 'uploadImage', () =>
client.storeMediaFile(imageFilename, imageBuffer),
);
mediaFields[imageFieldName] = `<img src="${imageFilename}">`;
} catch (err) {
errors.push(`image upload: ${(err as Error).message}`);
@@ -1099,7 +1520,9 @@ export function createStatsApp(
if (Object.keys(mediaFields).length > 0) {
try {
await client.updateNoteFields(noteId, mediaFields);
await timeMiningPhase(mode, 'updateNoteFields', () =>
client.updateNoteFields(noteId, mediaFields),
);
} catch (err) {
errors.push(`update fields: ${(err as Error).message}`);
}
@@ -1139,9 +1562,17 @@ export function startStatsServer(config: StatsServerConfig): { close: () => void
knownWordCachePath: config.knownWordCachePath,
mpvSocketPath: config.mpvSocketPath,
ankiConnectConfig: config.ankiConnectConfig,
getAnkiConnectConfig: config.getAnkiConnectConfig,
getYomitanAnkiDeckName: config.getYomitanAnkiDeckName,
secondarySubtitleLanguages: config.secondarySubtitleLanguages,
getSecondarySubtitleLanguages: config.getSecondarySubtitleLanguages,
statsMiningAlassPath: config.statsMiningAlassPath,
getStatsMiningAlassPath: config.getStatsMiningAlassPath,
resolveRetimedSecondarySubtitleText: config.resolveRetimedSecondarySubtitleText,
anilistRateLimiter: config.anilistRateLimiter,
addYomitanNote: config.addYomitanNote,
resolveAnkiNoteId: config.resolveAnkiNoteId,
resolveSentenceSearchHeadwords: config.resolveSentenceSearchHeadwords,
});
const bunRuntime = globalThis as typeof globalThis & {
@@ -151,6 +151,56 @@ test('syncYomitanDefaultAnkiServer injects force override when enabled', async (
assert.match(scriptValue, /forceOverride = true/);
});
test('syncYomitanDefaultAnkiServer updates the active profile Anki deck', async () => {
const optionsFull = {
profileCurrent: 0,
profiles: [
{
options: {
anki: {
server: 'http://127.0.0.1:8766',
cardFormats: [
{ type: 'term', deck: 'Default', model: 'Mining Note', fields: {} },
{ type: 'kanji', deck: 'Kanji', model: 'Kanji Note', fields: {} },
],
terms: { deck: 'Default', model: 'Legacy Note', fields: {} },
},
},
},
],
};
let savedOptions: typeof optionsFull | null = null;
const deps = createDeps((script) =>
runInjectedYomitanScript(script, (action, params) => {
if (action === 'optionsGetFull') {
return JSON.parse(JSON.stringify(optionsFull));
}
if (action === 'setAllSettings') {
savedOptions = (params as { value: typeof optionsFull }).value;
return true;
}
throw new Error(`Unexpected action: ${action}`);
}),
);
const synced = await syncYomitanDefaultAnkiServer(
'http://127.0.0.1:8766',
deps,
{
error: () => undefined,
info: () => undefined,
},
{ deck: 'Minecraft', forceOverride: true },
);
assert.equal(synced, true);
assert.ok(savedOptions);
const saved = savedOptions as typeof optionsFull;
assert.equal(saved.profiles[0]?.options.anki.cardFormats[0]?.deck, 'Minecraft');
assert.equal(saved.profiles[0]?.options.anki.cardFormats[1]?.deck, 'Kanji');
assert.equal(saved.profiles[0]?.options.anki.terms.deck, 'Minecraft');
});
test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
const deps = createDeps(async () => {
throw new Error('execute failed');
@@ -1783,6 +1783,7 @@ export async function syncYomitanDefaultAnkiServer(
logger: LoggerLike,
options?: {
forceOverride?: boolean;
deck?: string;
},
): Promise<boolean> {
const normalizedTargetServer = serverUrl.trim();
@@ -1790,6 +1791,7 @@ export async function syncYomitanDefaultAnkiServer(
return false;
}
const forceOverride = options?.forceOverride === true;
const normalizedTargetDeck = options?.deck?.trim() ?? '';
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
@@ -1819,6 +1821,7 @@ export async function syncYomitanDefaultAnkiServer(
});
const targetServer = ${JSON.stringify(normalizedTargetServer)};
const targetDeck = ${JSON.stringify(normalizedTargetDeck)};
const forceOverride = ${forceOverride ? 'true' : 'false'};
const optionsFull = await invoke("optionsGetFull", undefined);
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
@@ -1843,18 +1846,54 @@ export async function syncYomitanDefaultAnkiServer(
const currentServerRaw = targetProfile.options.anki.server;
const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
if (currentServer === targetServer) {
return { updated: false, matched: true, reason: "already-target", currentServer, targetServer };
}
const canReplaceCurrent =
forceOverride || currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
if (!canReplaceCurrent) {
return { updated: false, matched: false, reason: "blocked-existing-server", currentServer, targetServer };
let changed = false;
if (currentServer !== targetServer) {
const canReplaceCurrent =
forceOverride || currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
if (!canReplaceCurrent) {
return { updated: false, matched: false, reason: "blocked-existing-server", currentServer, targetServer };
}
targetProfile.options.anki.server = targetServer;
changed = true;
}
if (targetDeck) {
const cardFormats = Array.isArray(targetProfile.options.anki.cardFormats)
? targetProfile.options.anki.cardFormats
: [];
for (const cardFormat of cardFormats) {
if (
!cardFormat ||
typeof cardFormat !== "object" ||
cardFormat.type !== "term" ||
cardFormat.enabled === false
) {
continue;
}
const currentDeck = typeof cardFormat.deck === "string" ? cardFormat.deck.trim() : "";
if (currentDeck !== targetDeck) {
cardFormat.deck = targetDeck;
changed = true;
}
}
const terms = targetProfile.options.anki.terms;
if (terms && typeof terms === "object") {
const currentTermDeck = typeof terms.deck === "string" ? terms.deck.trim() : "";
if (currentTermDeck !== targetDeck) {
terms.deck = targetDeck;
changed = true;
}
}
}
if (!changed) {
return { updated: false, matched: true, reason: "already-target", currentServer, targetServer, targetDeck };
}
targetProfile.options.anki.server = targetServer;
await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
return { updated: true, matched: true, currentServer, targetServer };
return { updated: true, matched: true, currentServer, targetServer, targetDeck };
})();
`;