feat(immersion): add anime metadata, occurrence tracking, and schema upgrades

- Add imm_anime table with AniList integration
- Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences
- Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words
- Add anime metadata parsing with guessit fallback
- Add video duration tracking and watched status
- Add episode, streak, trend, and word/kanji detail queries
- Deduplicate subtitle line recording within sessions
- Pass Anki note IDs through card mining callback chain
This commit is contained in:
2026-03-14 22:13:42 -07:00
parent cc5d270b8e
commit fe8bb167c4
19 changed files with 5231 additions and 122 deletions

View File

@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
});
assert.deepEqual(result, {
title: 'Guessit Title',
season: null,
episode: 7,
source: 'guessit',
});
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
});
assert.deepEqual(result, {
title: 'My Anime',
season: 1,
episode: 3,
source: 'fallback',
});
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
]);
assert.deepEqual(result, {
title: 'Rascal Does Not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
});
assert.deepEqual(result, {
title: 'Rascal Does not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});

View File

@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
export interface AnilistMediaGuess {
title: string;
season: number | null;
episode: number | null;
source: 'guessit' | 'fallback';
}
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
};
}
function runGuessit(target: string): Promise<string> {
export function runGuessit(target: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.execFile(
'guessit',
@@ -73,7 +74,7 @@ function runGuessit(target: string): Promise<string> {
});
}
type GuessAnilistMediaInfoDeps = {
export interface GuessAnilistMediaInfoDeps {
runGuessit: (target: string) => Promise<string>;
};
@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
const parsed = JSON.parse(stdout) as Record<string, unknown>;
const title = readGuessitTitle(parsed.title);
const episode = firstPositiveInteger(parsed.episode);
const season = firstPositiveInteger(parsed.season);
if (title) {
return { title, episode, source: 'guessit' };
return { title, season, episode, source: 'guessit' };
}
} catch {
// Ignore guessit failures and fall back to internal parser.
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
}
return {
title: parsed.title.trim(),
season: parsed.season,
episode: parsed.episode,
source: 'fallback',
};

View File

@@ -0,0 +1,239 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';
import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
import { Database } from '../immersion-tracker/sqlite.js';
import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
return path.join(dir, 'immersion.sqlite');
}
function cleanupDbPath(dbPath: string): void {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
}
test('stripFilenameTags normalizes common media-title formats', () => {
assert.equal(
stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
'The Eminence in Shadow',
);
assert.equal(
stripFilenameTags(
'[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags(
'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
),
'Little Witch Academia',
);
});
test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
canonicalTitle: 'Cover Fetcher Test',
sourcePath: '/tmp/cover-fetcher-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
upsertCoverArt(db, videoId, {
anilistId: 7,
coverUrl: 'https://images.test/cover.jpg',
coverBlob: null,
titleRomaji: 'Test Title',
titleEnglish: 'Test Title',
episodesTotal: 12,
});
const fetchCalls: string[] = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = (async (input: RequestInfo | URL) => {
const url = String(input);
fetchCalls.push(url);
assert.equal(url, 'https://images.test/cover.jpg');
return new Response(new Uint8Array([1, 2, 3, 4]), {
status: 200,
headers: { 'Content-Type': 'image/jpeg' },
});
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
);
const fetched = await fetcher.fetchIfMissing(
db,
videoId,
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
);
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(fetchCalls.length, 1);
assert.equal(stored?.coverBlob?.length, 4);
assert.equal(stored?.titleEnglish, 'Test Title');
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
function createJsonResponse(payload: unknown): Response {
return new Response(JSON.stringify(payload), {
status: 200,
headers: { 'content-type': 'application/json' },
});
}
test('fetchIfMissing uses guessit primary title and season when available', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
canonicalTitle: '[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
sourcePath: '/tmp/cover-fetcher-season-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const searchCalls: Array<{ search: string }> = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
const search = payload.variables.search;
searchCalls.push({ search });
if (search.includes('Season 2')) {
return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
}
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 19,
episodes: 24,
coverImage: { large: 'https://images.test/cover.jpg', medium: null },
title: { romaji: 'Little Witch Academia', english: 'Little Witch Academia', native: null },
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () =>
JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(searchCalls.length, 2);
assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
assert.equal(stored?.anilistId, 19);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
canonicalTitle: 'School Vlog S01E01',
sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
let requestCount = 0;
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
requestCount += 1;
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
assert.equal(payload.variables.search, 'School Vlog');
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 21,
episodes: 12,
coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(requestCount, 1);
assert.equal(stored?.anilistId, 21);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});

View File

@@ -0,0 +1,405 @@
import type { AnilistRateLimiter } from './rate-limiter';
import type { DatabaseSync } from '../immersion-tracker/sqlite';
import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
import { guessAnilistMediaInfo, runGuessit, type GuessAnilistMediaInfoDeps } from './anilist-updater';
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
const SEARCH_QUERY = `
query ($search: String!) {
Page(perPage: 5) {
media(search: $search, type: ANIME) {
id
episodes
season
seasonYear
coverImage { large medium }
title { romaji english native }
}
}
}
`;
interface AnilistMedia {
id: number;
episodes: number | null;
season: string | null;
seasonYear: number | null;
coverImage: { large: string | null; medium: string | null } | null;
title: { romaji: string | null; english: string | null; native: string | null } | null;
}
interface AnilistSearchResponse {
data?: {
Page?: {
media?: AnilistMedia[];
};
};
errors?: Array<{ message?: string }>;
}
export interface CoverArtFetcher {
fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
}
interface Logger {
info(msg: string, ...args: unknown[]): void;
warn(msg: string, ...args: unknown[]): void;
error(msg: string, ...args: unknown[]): void;
}
interface CoverArtCandidate {
title: string;
source: 'guessit' | 'fallback';
season: number | null;
episode: number | null;
}
interface CoverArtFetcherOptions {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
export function stripFilenameTags(raw: string): string {
let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
title = title.replace(/[._]+/g, ' ');
// Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
title = title.replace(/\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
title = title.replace(/\s+E\d+[: -].*$/i, '');
title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
// Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
// Remove common codec/source tags that may appear without brackets
title = title.replace(
/\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
'',
);
// Remove trailing dashes and group tags like "-Retr0"
title = title.replace(/\s*-\s*[\w]+$/, '');
return title.trim().replace(/\s{2,}/g, ' ');
}
function removeSeasonHint(title: string): string {
return title.replace(/\bseason\s*\d+\b/gi, '').replace(/\s{2,}/g, ' ').trim();
}
function normalizeTitle(text: string): string {
return text.trim().toLowerCase().replace(/\s+/g, ' ');
}
function extractCandidateSeasonHints(text: string): Set<number> {
const normalized = normalizeTitle(text);
const matches = [
...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
];
const values = new Set<number>();
for (const match of matches) {
const value = Number.parseInt(match[1]!, 10);
if (Number.isInteger(value)) {
values.add(value);
}
}
return values;
}
function isSeasonMentioned(titles: string[], season: number | null): boolean {
if (!season) {
return false;
}
const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
return hints.includes(season);
}
function pickBestSearchResult(
title: string,
episode: number | null,
season: number | null,
media: AnilistMedia[],
): { id: number; title: string } | null {
const cleanedTitle = removeSeasonHint(title);
const targets = [title, cleanedTitle]
.map(normalizeTitle)
.map((value) => value.trim())
.filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
const filtered = episode === null
? media
: media.filter((item) => {
const total = item.episodes;
return total === null || total >= episode;
});
const candidates = filtered.length > 0 ? filtered : media;
if (candidates.length === 0) {
return null;
}
const scored = candidates.map((item) => {
const candidateTitles = [
item.title?.romaji,
item.title?.english,
item.title?.native,
]
.filter((value): value is string => typeof value === 'string')
.map((value) => normalizeTitle(value));
let score = 0;
for (const target of targets) {
if (candidateTitles.includes(target)) {
score += 120;
continue;
}
if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
score += 30;
}
if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
score += 10;
}
}
if (episode !== null && item.episodes === episode) {
score += 20;
}
if (season !== null && isSeasonMentioned(candidateTitles, season)) {
score += 15;
}
return { item, score };
});
scored.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
return b.item.id - a.item.id;
});
const selected = scored[0]!;
const selectedTitle = selected.item.title?.english ?? selected.item.title?.romaji ?? selected.item.title?.native ?? title;
return { id: selected.item.id, title: selectedTitle };
}
function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
const candidateTitles = [
parsed.title,
...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
? [`${parsed.title} Season ${parsed.season}`]
: []),
];
return candidateTitles
.map((title) => title.trim())
.filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
}
async function searchAnilist(
rateLimiter: AnilistRateLimiter,
title: string,
): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
await rateLimiter.acquire();
const res = await fetch(ANILIST_GRAPHQL_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
});
rateLimiter.recordResponse(res.headers);
if (res.status === 429) {
return { media: [], rateLimited: true };
}
if (!res.ok) {
throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
}
const json = (await res.json()) as AnilistSearchResponse;
const mediaList = json.data?.Page?.media;
if (!mediaList || mediaList.length === 0) {
return { media: [], rateLimited: false };
}
return { media: mediaList, rateLimited: false };
}
async function downloadImage(url: string): Promise<Buffer | null> {
try {
const res = await fetch(url);
if (!res.ok) return null;
const arrayBuf = await res.arrayBuffer();
return Buffer.from(arrayBuf);
} catch {
return null;
}
}
export function createCoverArtFetcher(
rateLimiter: AnilistRateLimiter,
logger: Logger,
options: CoverArtFetcherOptions = {},
): CoverArtFetcher {
const resolveMediaInfo = async (canonicalTitle: string): Promise<CoverArtCandidate | null> => {
const parsed = await guessAnilistMediaInfo(null, canonicalTitle, {
runGuessit: options.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
return {
title: parsed.title,
season: parsed.season,
episode: parsed.episode,
source: parsed.source,
};
};
return {
async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
const existing = getCoverArt(db, videoId);
if (existing?.coverBlob) {
return true;
}
if (existing?.coverUrl) {
const coverBlob = await downloadImage(existing.coverUrl);
if (coverBlob) {
upsertCoverArt(db, videoId, {
anilistId: existing.anilistId,
coverUrl: existing.coverUrl,
coverBlob,
titleRomaji: existing.titleRomaji,
titleEnglish: existing.titleEnglish,
episodesTotal: existing.episodesTotal,
});
return true;
}
}
if (
existing &&
existing.coverUrl === null &&
existing.anilistId === null &&
Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
) {
return false;
}
const cleaned = stripFilenameTags(canonicalTitle);
if (!cleaned) {
logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const parsedInfo = await resolveMediaInfo(canonicalTitle);
const searchBase = parsedInfo?.title ?? cleaned;
const searchCandidates = parsedInfo
? buildSearchCandidates(parsedInfo)
: [cleaned];
const effectiveCandidates = searchCandidates.includes(cleaned)
? searchCandidates
: [...searchCandidates, cleaned];
let selected: AnilistMedia | null = null;
let rateLimited = false;
for (const candidate of effectiveCandidates) {
logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
try {
const result = await searchAnilist(rateLimiter, candidate);
rateLimited = result.rateLimited;
if (result.media.length === 0) {
continue;
}
const picked = pickBestSearchResult(
searchBase,
parsedInfo?.episode ?? null,
parsedInfo?.season ?? null,
result.media,
);
if (picked) {
const match = result.media.find((media) => media.id === picked.id);
if (match) {
selected = match;
break;
}
}
} catch (err) {
logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
return false;
}
}
if (rateLimited) {
logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
return false;
}
if (!selected) {
logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
let coverBlob: Buffer | null = null;
if (coverUrl) {
coverBlob = await downloadImage(coverUrl);
}
upsertCoverArt(db, videoId, {
anilistId: selected.id,
coverUrl,
coverBlob,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
episodesTotal: selected.episodes ?? null,
});
updateAnimeAnilistInfo(db, videoId, {
anilistId: selected.id,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
titleNative: selected.title?.native ?? null,
episodesTotal: selected.episodes ?? null,
});
logger.info(
'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
videoId,
selected.id,
selected.title?.romaji ?? searchBase,
);
return true;
},
};
}