mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 12:11:28 -07:00
feat(immersion): add anime metadata, occurrence tracking, and schema upgrades
- Add imm_anime table with AniList integration - Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences - Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words - Add anime metadata parsing with guessit fallback - Add video duration tracking and watched status - Add episode, streak, trend, and word/kanji detail queries - Deduplicate subtitle line recording within sessions - Pass Anki note IDs through card mining callback chain
This commit is contained in:
@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'Guessit Title',
|
||||
season: null,
|
||||
episode: 7,
|
||||
source: 'guessit',
|
||||
});
|
||||
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'My Anime',
|
||||
season: 1,
|
||||
episode: 3,
|
||||
source: 'fallback',
|
||||
});
|
||||
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
|
||||
]);
|
||||
assert.deepEqual(result, {
|
||||
title: 'Rascal Does Not Dream of Bunny Girl Senpai',
|
||||
season: null,
|
||||
episode: 1,
|
||||
source: 'guessit',
|
||||
});
|
||||
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'Rascal Does not Dream of Bunny Girl Senpai',
|
||||
season: null,
|
||||
episode: 1,
|
||||
source: 'guessit',
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
|
||||
|
||||
export interface AnilistMediaGuess {
|
||||
title: string;
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
source: 'guessit' | 'fallback';
|
||||
}
|
||||
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
|
||||
};
|
||||
}
|
||||
|
||||
function runGuessit(target: string): Promise<string> {
|
||||
export function runGuessit(target: string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
childProcess.execFile(
|
||||
'guessit',
|
||||
@@ -73,7 +74,7 @@ function runGuessit(target: string): Promise<string> {
|
||||
});
|
||||
}
|
||||
|
||||
type GuessAnilistMediaInfoDeps = {
|
||||
export interface GuessAnilistMediaInfoDeps {
|
||||
runGuessit: (target: string) => Promise<string>;
|
||||
};
|
||||
|
||||
@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
|
||||
const parsed = JSON.parse(stdout) as Record<string, unknown>;
|
||||
const title = readGuessitTitle(parsed.title);
|
||||
const episode = firstPositiveInteger(parsed.episode);
|
||||
const season = firstPositiveInteger(parsed.season);
|
||||
if (title) {
|
||||
return { title, episode, source: 'guessit' };
|
||||
return { title, season, episode, source: 'guessit' };
|
||||
}
|
||||
} catch {
|
||||
// Ignore guessit failures and fall back to internal parser.
|
||||
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
|
||||
}
|
||||
return {
|
||||
title: parsed.title.trim(),
|
||||
season: parsed.season,
|
||||
episode: parsed.episode,
|
||||
source: 'fallback',
|
||||
};
|
||||
|
||||
239
src/core/services/anilist/cover-art-fetcher.test.ts
Normal file
239
src/core/services/anilist/cover-art-fetcher.test.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
|
||||
import { Database } from '../immersion-tracker/sqlite.js';
|
||||
import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
|
||||
import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
|
||||
import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
|
||||
|
||||
function makeDbPath(): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
|
||||
return path.join(dir, 'immersion.sqlite');
|
||||
}
|
||||
|
||||
function cleanupDbPath(dbPath: string): void {
|
||||
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
|
||||
}
|
||||
|
||||
test('stripFilenameTags normalizes common media-title formats', () => {
|
||||
assert.equal(
|
||||
stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
|
||||
'The Eminence in Shadow',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags(
|
||||
'[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
|
||||
),
|
||||
'Kono Subarashii Sekai ni Shukufuku wo!',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
|
||||
'Kono Subarashii Sekai ni Shukufuku wo!',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags(
|
||||
'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
|
||||
),
|
||||
'Little Witch Academia',
|
||||
);
|
||||
});
|
||||
|
||||
test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
|
||||
canonicalTitle: 'Cover Fetcher Test',
|
||||
sourcePath: '/tmp/cover-fetcher-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: 7,
|
||||
coverUrl: 'https://images.test/cover.jpg',
|
||||
coverBlob: null,
|
||||
titleRomaji: 'Test Title',
|
||||
titleEnglish: 'Test Title',
|
||||
episodesTotal: 12,
|
||||
});
|
||||
|
||||
const fetchCalls: string[] = [];
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = (async (input: RequestInfo | URL) => {
|
||||
const url = String(input);
|
||||
fetchCalls.push(url);
|
||||
assert.equal(url, 'https://images.test/cover.jpg');
|
||||
return new Response(new Uint8Array([1, 2, 3, 4]), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'image/jpeg' },
|
||||
});
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(
|
||||
db,
|
||||
videoId,
|
||||
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
|
||||
);
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(fetchCalls.length, 1);
|
||||
assert.equal(stored?.coverBlob?.length, 4);
|
||||
assert.equal(stored?.titleEnglish, 'Test Title');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
function createJsonResponse(payload: unknown): Response {
|
||||
return new Response(JSON.stringify(payload), {
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
});
|
||||
}
|
||||
|
||||
test('fetchIfMissing uses guessit primary title and season when available', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
|
||||
canonicalTitle: '[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
|
||||
sourcePath: '/tmp/cover-fetcher-season-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const searchCalls: Array<{ search: string }> = [];
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const raw = (init?.body as string | undefined) ?? '';
|
||||
const payload = JSON.parse(raw) as { variables: { search: string } };
|
||||
const search = payload.variables.search;
|
||||
searchCalls.push({ search });
|
||||
|
||||
if (search.includes('Season 2')) {
|
||||
return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
createJsonResponse({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 19,
|
||||
episodes: 24,
|
||||
coverImage: { large: 'https://images.test/cover.jpg', medium: null },
|
||||
title: { romaji: 'Little Witch Academia', english: 'Little Witch Academia', native: null },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
{
|
||||
runGuessit: async () =>
|
||||
JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
|
||||
},
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(searchCalls.length, 2);
|
||||
assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
|
||||
assert.equal(stored?.anilistId, 19);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
|
||||
canonicalTitle: 'School Vlog S01E01',
|
||||
sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
let requestCount = 0;
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
|
||||
requestCount += 1;
|
||||
const raw = (init?.body as string | undefined) ?? '';
|
||||
const payload = JSON.parse(raw) as { variables: { search: string } };
|
||||
assert.equal(payload.variables.search, 'School Vlog');
|
||||
|
||||
return Promise.resolve(
|
||||
createJsonResponse({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 21,
|
||||
episodes: 12,
|
||||
coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
|
||||
title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
{
|
||||
runGuessit: async () => {
|
||||
throw new Error('guessit unavailable');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(requestCount, 1);
|
||||
assert.equal(stored?.anilistId, 21);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
405
src/core/services/anilist/cover-art-fetcher.ts
Normal file
405
src/core/services/anilist/cover-art-fetcher.ts
Normal file
@@ -0,0 +1,405 @@
|
||||
import type { AnilistRateLimiter } from './rate-limiter';
|
||||
import type { DatabaseSync } from '../immersion-tracker/sqlite';
|
||||
import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
|
||||
import { guessAnilistMediaInfo, runGuessit, type GuessAnilistMediaInfoDeps } from './anilist-updater';
|
||||
|
||||
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
|
||||
const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
|
||||
|
||||
const SEARCH_QUERY = `
|
||||
query ($search: String!) {
|
||||
Page(perPage: 5) {
|
||||
media(search: $search, type: ANIME) {
|
||||
id
|
||||
episodes
|
||||
season
|
||||
seasonYear
|
||||
coverImage { large medium }
|
||||
title { romaji english native }
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
interface AnilistMedia {
|
||||
id: number;
|
||||
episodes: number | null;
|
||||
season: string | null;
|
||||
seasonYear: number | null;
|
||||
coverImage: { large: string | null; medium: string | null } | null;
|
||||
title: { romaji: string | null; english: string | null; native: string | null } | null;
|
||||
}
|
||||
|
||||
interface AnilistSearchResponse {
|
||||
data?: {
|
||||
Page?: {
|
||||
media?: AnilistMedia[];
|
||||
};
|
||||
};
|
||||
errors?: Array<{ message?: string }>;
|
||||
}
|
||||
|
||||
export interface CoverArtFetcher {
|
||||
fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
interface Logger {
|
||||
info(msg: string, ...args: unknown[]): void;
|
||||
warn(msg: string, ...args: unknown[]): void;
|
||||
error(msg: string, ...args: unknown[]): void;
|
||||
}
|
||||
|
||||
interface CoverArtCandidate {
|
||||
title: string;
|
||||
source: 'guessit' | 'fallback';
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
}
|
||||
|
||||
interface CoverArtFetcherOptions {
|
||||
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
|
||||
}
|
||||
|
||||
export function stripFilenameTags(raw: string): string {
|
||||
let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
|
||||
|
||||
title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
|
||||
title = title.replace(/[._]+/g, ' ');
|
||||
|
||||
// Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
|
||||
title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
|
||||
title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
|
||||
title = title.replace(/\s+S\d+E\d+.*$/i, '');
|
||||
title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
|
||||
title = title.replace(/\s+E\d+[: -].*$/i, '');
|
||||
title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
|
||||
|
||||
// Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
|
||||
title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
|
||||
title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
|
||||
|
||||
// Remove common codec/source tags that may appear without brackets
|
||||
title = title.replace(
|
||||
/\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
|
||||
'',
|
||||
);
|
||||
|
||||
// Remove trailing dashes and group tags like "-Retr0"
|
||||
title = title.replace(/\s*-\s*[\w]+$/, '');
|
||||
|
||||
return title.trim().replace(/\s{2,}/g, ' ');
|
||||
}
|
||||
|
||||
function removeSeasonHint(title: string): string {
|
||||
return title.replace(/\bseason\s*\d+\b/gi, '').replace(/\s{2,}/g, ' ').trim();
|
||||
}
|
||||
|
||||
function normalizeTitle(text: string): string {
|
||||
return text.trim().toLowerCase().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function extractCandidateSeasonHints(text: string): Set<number> {
|
||||
const normalized = normalizeTitle(text);
|
||||
const matches = [
|
||||
...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
|
||||
...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
|
||||
];
|
||||
const values = new Set<number>();
|
||||
for (const match of matches) {
|
||||
const value = Number.parseInt(match[1]!, 10);
|
||||
if (Number.isInteger(value)) {
|
||||
values.add(value);
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
function isSeasonMentioned(titles: string[], season: number | null): boolean {
|
||||
if (!season) {
|
||||
return false;
|
||||
}
|
||||
const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
|
||||
return hints.includes(season);
|
||||
}
|
||||
|
||||
function pickBestSearchResult(
|
||||
title: string,
|
||||
episode: number | null,
|
||||
season: number | null,
|
||||
media: AnilistMedia[],
|
||||
): { id: number; title: string } | null {
|
||||
const cleanedTitle = removeSeasonHint(title);
|
||||
const targets = [title, cleanedTitle]
|
||||
.map(normalizeTitle)
|
||||
.map((value) => value.trim())
|
||||
.filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
|
||||
|
||||
const filtered = episode === null
|
||||
? media
|
||||
: media.filter((item) => {
|
||||
const total = item.episodes;
|
||||
return total === null || total >= episode;
|
||||
});
|
||||
const candidates = filtered.length > 0 ? filtered : media;
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const scored = candidates.map((item) => {
|
||||
const candidateTitles = [
|
||||
item.title?.romaji,
|
||||
item.title?.english,
|
||||
item.title?.native,
|
||||
]
|
||||
.filter((value): value is string => typeof value === 'string')
|
||||
.map((value) => normalizeTitle(value));
|
||||
|
||||
let score = 0;
|
||||
|
||||
for (const target of targets) {
|
||||
if (candidateTitles.includes(target)) {
|
||||
score += 120;
|
||||
continue;
|
||||
}
|
||||
if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
|
||||
score += 30;
|
||||
}
|
||||
if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
|
||||
score += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (episode !== null && item.episodes === episode) {
|
||||
score += 20;
|
||||
}
|
||||
|
||||
if (season !== null && isSeasonMentioned(candidateTitles, season)) {
|
||||
score += 15;
|
||||
}
|
||||
|
||||
return { item, score };
|
||||
});
|
||||
|
||||
scored.sort((a, b) => {
|
||||
if (b.score !== a.score) return b.score - a.score;
|
||||
return b.item.id - a.item.id;
|
||||
});
|
||||
|
||||
const selected = scored[0]!;
|
||||
const selectedTitle = selected.item.title?.english ?? selected.item.title?.romaji ?? selected.item.title?.native ?? title;
|
||||
return { id: selected.item.id, title: selectedTitle };
|
||||
}
|
||||
|
||||
function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
|
||||
const candidateTitles = [
|
||||
parsed.title,
|
||||
...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
|
||||
? [`${parsed.title} Season ${parsed.season}`]
|
||||
: []),
|
||||
];
|
||||
return candidateTitles
|
||||
.map((title) => title.trim())
|
||||
.filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
|
||||
}
|
||||
|
||||
async function searchAnilist(
|
||||
rateLimiter: AnilistRateLimiter,
|
||||
title: string,
|
||||
): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
|
||||
await rateLimiter.acquire();
|
||||
|
||||
const res = await fetch(ANILIST_GRAPHQL_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
||||
body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
|
||||
});
|
||||
|
||||
rateLimiter.recordResponse(res.headers);
|
||||
|
||||
if (res.status === 429) {
|
||||
return { media: [], rateLimited: true };
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
const json = (await res.json()) as AnilistSearchResponse;
|
||||
const mediaList = json.data?.Page?.media;
|
||||
if (!mediaList || mediaList.length === 0) {
|
||||
return { media: [], rateLimited: false };
|
||||
}
|
||||
|
||||
return { media: mediaList, rateLimited: false };
|
||||
}
|
||||
|
||||
async function downloadImage(url: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) return null;
|
||||
const arrayBuf = await res.arrayBuffer();
|
||||
return Buffer.from(arrayBuf);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function createCoverArtFetcher(
|
||||
rateLimiter: AnilistRateLimiter,
|
||||
logger: Logger,
|
||||
options: CoverArtFetcherOptions = {},
|
||||
): CoverArtFetcher {
|
||||
const resolveMediaInfo = async (canonicalTitle: string): Promise<CoverArtCandidate | null> => {
|
||||
const parsed = await guessAnilistMediaInfo(null, canonicalTitle, {
|
||||
runGuessit: options.runGuessit ?? runGuessit,
|
||||
});
|
||||
if (!parsed) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
title: parsed.title,
|
||||
season: parsed.season,
|
||||
episode: parsed.episode,
|
||||
source: parsed.source,
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
|
||||
const existing = getCoverArt(db, videoId);
|
||||
if (existing?.coverBlob) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (existing?.coverUrl) {
|
||||
const coverBlob = await downloadImage(existing.coverUrl);
|
||||
if (coverBlob) {
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: existing.anilistId,
|
||||
coverUrl: existing.coverUrl,
|
||||
coverBlob,
|
||||
titleRomaji: existing.titleRomaji,
|
||||
titleEnglish: existing.titleEnglish,
|
||||
episodesTotal: existing.episodesTotal,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
existing &&
|
||||
existing.coverUrl === null &&
|
||||
existing.anilistId === null &&
|
||||
Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const cleaned = stripFilenameTags(canonicalTitle);
|
||||
if (!cleaned) {
|
||||
logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: null,
|
||||
coverUrl: null,
|
||||
coverBlob: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
episodesTotal: null,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
const parsedInfo = await resolveMediaInfo(canonicalTitle);
|
||||
const searchBase = parsedInfo?.title ?? cleaned;
|
||||
const searchCandidates = parsedInfo
|
||||
? buildSearchCandidates(parsedInfo)
|
||||
: [cleaned];
|
||||
|
||||
const effectiveCandidates = searchCandidates.includes(cleaned)
|
||||
? searchCandidates
|
||||
: [...searchCandidates, cleaned];
|
||||
|
||||
let selected: AnilistMedia | null = null;
|
||||
let rateLimited = false;
|
||||
|
||||
for (const candidate of effectiveCandidates) {
|
||||
logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
|
||||
|
||||
try {
|
||||
const result = await searchAnilist(rateLimiter, candidate);
|
||||
rateLimited = result.rateLimited;
|
||||
if (result.media.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const picked = pickBestSearchResult(
|
||||
searchBase,
|
||||
parsedInfo?.episode ?? null,
|
||||
parsedInfo?.season ?? null,
|
||||
result.media,
|
||||
);
|
||||
if (picked) {
|
||||
const match = result.media.find((media) => media.id === picked.id);
|
||||
if (match) {
|
||||
selected = match;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (rateLimited) {
|
||||
logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!selected) {
|
||||
logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: null,
|
||||
coverUrl: null,
|
||||
coverBlob: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
episodesTotal: null,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
|
||||
let coverBlob: Buffer | null = null;
|
||||
if (coverUrl) {
|
||||
coverBlob = await downloadImage(coverUrl);
|
||||
}
|
||||
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: selected.id,
|
||||
coverUrl,
|
||||
coverBlob,
|
||||
titleRomaji: selected.title?.romaji ?? null,
|
||||
titleEnglish: selected.title?.english ?? null,
|
||||
episodesTotal: selected.episodes ?? null,
|
||||
});
|
||||
|
||||
updateAnimeAnilistInfo(db, videoId, {
|
||||
anilistId: selected.id,
|
||||
titleRomaji: selected.title?.romaji ?? null,
|
||||
titleEnglish: selected.title?.english ?? null,
|
||||
titleNative: selected.title?.native ?? null,
|
||||
episodesTotal: selected.episodes ?? null,
|
||||
});
|
||||
|
||||
logger.info(
|
||||
'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
|
||||
videoId,
|
||||
selected.id,
|
||||
selected.title?.romaji ?? searchBase,
|
||||
);
|
||||
|
||||
return true;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
resolveBoundedInt,
|
||||
} from './immersion-tracker/reducer';
|
||||
import type { QueuedWrite } from './immersion-tracker/types';
|
||||
import { PartOfSpeech, type MergedToken } from '../../types';
|
||||
|
||||
type ImmersionTrackerService = import('./immersion-tracker-service').ImmersionTrackerService;
|
||||
type ImmersionTrackerServiceCtor =
|
||||
@@ -26,6 +27,34 @@ async function loadTrackerCtor(): Promise<ImmersionTrackerServiceCtor> {
|
||||
return trackerCtor;
|
||||
}
|
||||
|
||||
async function waitForPendingAnimeMetadata(tracker: ImmersionTrackerService): Promise<void> {
|
||||
const privateApi = tracker as unknown as {
|
||||
sessionState: { videoId: number } | null;
|
||||
pendingAnimeMetadataUpdates?: Map<number, Promise<void>>;
|
||||
};
|
||||
const videoId = privateApi.sessionState?.videoId;
|
||||
if (!videoId) return;
|
||||
await privateApi.pendingAnimeMetadataUpdates?.get(videoId);
|
||||
}
|
||||
|
||||
function makeMergedToken(overrides: Partial<MergedToken>): MergedToken {
|
||||
return {
|
||||
surface: '',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 0,
|
||||
endPos: 0,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
pos3: '',
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeDbPath(): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-immersion-test-'));
|
||||
return path.join(dir, 'immersion.sqlite');
|
||||
@@ -222,6 +251,308 @@ test('persists and retrieves minimum immersion tracking fields', async () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('recordSubtitleLine persists counted allowed tokenized vocabulary rows and subtitle-line occurrences', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
let tracker: ImmersionTrackerService | null = null;
|
||||
|
||||
try {
|
||||
const Ctor = await loadTrackerCtor();
|
||||
tracker = new Ctor({ dbPath });
|
||||
|
||||
tracker.handleMediaChange('/tmp/Little Witch Academia S02E04.mkv', 'Episode 4');
|
||||
await waitForPendingAnimeMetadata(tracker);
|
||||
tracker.recordSubtitleLine('猫 猫 日 日 は 知っている', 0, 1, [
|
||||
makeMergedToken({
|
||||
surface: '猫',
|
||||
headword: '猫',
|
||||
reading: 'ねこ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
}),
|
||||
makeMergedToken({
|
||||
surface: '猫',
|
||||
headword: '猫',
|
||||
reading: 'ねこ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
}),
|
||||
makeMergedToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
reading: 'は',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
pos2: '係助詞',
|
||||
}),
|
||||
makeMergedToken({
|
||||
surface: '知っている',
|
||||
headword: '知る',
|
||||
reading: 'しっている',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
}),
|
||||
]);
|
||||
|
||||
const privateApi = tracker as unknown as {
|
||||
flushTelemetry: (force?: boolean) => void;
|
||||
flushNow: () => void;
|
||||
};
|
||||
privateApi.flushTelemetry(true);
|
||||
privateApi.flushNow();
|
||||
|
||||
const db = new Database(dbPath);
|
||||
const rows = db
|
||||
.prepare(
|
||||
`SELECT headword, word, reading, part_of_speech, pos1, pos2, frequency
|
||||
FROM imm_words
|
||||
ORDER BY id ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
frequency: number;
|
||||
}>;
|
||||
const lineRows = db
|
||||
.prepare(
|
||||
`SELECT video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text
|
||||
FROM imm_subtitle_lines
|
||||
ORDER BY line_id ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
video_id: number;
|
||||
anime_id: number | null;
|
||||
line_index: number;
|
||||
segment_start_ms: number | null;
|
||||
segment_end_ms: number | null;
|
||||
text: string;
|
||||
}>;
|
||||
const wordOccurrenceRows = db
|
||||
.prepare(
|
||||
`SELECT o.occurrence_count, w.headword, w.word, w.reading
|
||||
FROM imm_word_line_occurrences o
|
||||
JOIN imm_words w ON w.id = o.word_id
|
||||
ORDER BY o.line_id ASC, o.word_id ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
occurrence_count: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
}>;
|
||||
const kanjiOccurrenceRows = db
|
||||
.prepare(
|
||||
`SELECT o.occurrence_count, k.kanji
|
||||
FROM imm_kanji_line_occurrences o
|
||||
JOIN imm_kanji k ON k.id = o.kanji_id
|
||||
ORDER BY o.line_id ASC, k.kanji ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
occurrence_count: number;
|
||||
kanji: string;
|
||||
}>;
|
||||
db.close();
|
||||
|
||||
assert.deepEqual(rows, [
|
||||
{
|
||||
headword: '猫',
|
||||
word: '猫',
|
||||
reading: 'ねこ',
|
||||
part_of_speech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
frequency: 2,
|
||||
},
|
||||
{
|
||||
headword: '知る',
|
||||
word: '知っている',
|
||||
reading: 'しっている',
|
||||
part_of_speech: PartOfSpeech.verb,
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
frequency: 1,
|
||||
},
|
||||
]);
|
||||
assert.equal(lineRows.length, 1);
|
||||
assert.equal(lineRows[0]?.line_index, 1);
|
||||
assert.equal(lineRows[0]?.segment_start_ms, 0);
|
||||
assert.equal(lineRows[0]?.segment_end_ms, 1000);
|
||||
assert.equal(lineRows[0]?.text, '猫 猫 日 日 は 知っている');
|
||||
assert.ok(lineRows[0]?.video_id);
|
||||
assert.ok(lineRows[0]?.anime_id);
|
||||
assert.deepEqual(wordOccurrenceRows, [
|
||||
{
|
||||
occurrence_count: 2,
|
||||
headword: '猫',
|
||||
word: '猫',
|
||||
reading: 'ねこ',
|
||||
},
|
||||
{
|
||||
occurrence_count: 1,
|
||||
headword: '知る',
|
||||
word: '知っている',
|
||||
reading: 'しっている',
|
||||
},
|
||||
]);
|
||||
assert.deepEqual(kanjiOccurrenceRows, [
|
||||
{
|
||||
occurrence_count: 2,
|
||||
kanji: '日',
|
||||
},
|
||||
{
|
||||
occurrence_count: 2,
|
||||
kanji: '猫',
|
||||
},
|
||||
{
|
||||
occurrence_count: 1,
|
||||
kanji: '知',
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
tracker?.destroy();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('handleMediaChange links parsed anime metadata on the active video row', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
let tracker: ImmersionTrackerService | null = null;
|
||||
|
||||
try {
|
||||
const Ctor = await loadTrackerCtor();
|
||||
tracker = new Ctor({ dbPath });
|
||||
|
||||
tracker.handleMediaChange('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5');
|
||||
await waitForPendingAnimeMetadata(tracker);
|
||||
|
||||
const privateApi = tracker as unknown as {
|
||||
db: DatabaseSync;
|
||||
sessionState: { videoId: number } | null;
|
||||
};
|
||||
const videoId = privateApi.sessionState?.videoId;
|
||||
assert.ok(videoId);
|
||||
|
||||
const row = privateApi.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
v.anime_id,
|
||||
v.parsed_basename,
|
||||
v.parsed_title,
|
||||
v.parsed_season,
|
||||
v.parsed_episode,
|
||||
v.parser_source,
|
||||
a.canonical_title AS anime_title,
|
||||
a.anilist_id
|
||||
FROM imm_videos v
|
||||
LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.video_id = ?
|
||||
`,
|
||||
)
|
||||
.get(videoId) as {
|
||||
anime_id: number | null;
|
||||
parsed_basename: string | null;
|
||||
parsed_title: string | null;
|
||||
parsed_season: number | null;
|
||||
parsed_episode: number | null;
|
||||
parser_source: string | null;
|
||||
anime_title: string | null;
|
||||
anilist_id: number | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(row);
|
||||
assert.ok(row?.anime_id);
|
||||
assert.equal(row?.parsed_basename, 'Little Witch Academia S02E05.mkv');
|
||||
assert.equal(row?.parsed_title, 'Little Witch Academia');
|
||||
assert.equal(row?.parsed_season, 2);
|
||||
assert.equal(row?.parsed_episode, 5);
|
||||
assert.ok(row?.parser_source === 'guessit' || row?.parser_source === 'fallback');
|
||||
assert.equal(row?.anime_title, 'Little Witch Academia');
|
||||
assert.equal(row?.anilist_id, null);
|
||||
} finally {
|
||||
tracker?.destroy();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('handleMediaChange reuses the same provisional anime row across matching files', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
let tracker: ImmersionTrackerService | null = null;
|
||||
|
||||
try {
|
||||
const Ctor = await loadTrackerCtor();
|
||||
tracker = new Ctor({ dbPath });
|
||||
|
||||
tracker.handleMediaChange('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5');
|
||||
await waitForPendingAnimeMetadata(tracker);
|
||||
|
||||
tracker.handleMediaChange('/tmp/Little Witch Academia S02E06.mkv', 'Episode 6');
|
||||
await waitForPendingAnimeMetadata(tracker);
|
||||
|
||||
const privateApi = tracker as unknown as {
|
||||
db: DatabaseSync;
|
||||
};
|
||||
const rows = privateApi.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
v.source_path,
|
||||
v.anime_id,
|
||||
v.parsed_episode,
|
||||
a.canonical_title AS anime_title,
|
||||
a.anilist_id
|
||||
FROM imm_videos v
|
||||
LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.source_path IN (?, ?)
|
||||
ORDER BY v.source_path
|
||||
`,
|
||||
)
|
||||
.all('/tmp/Little Witch Academia S02E05.mkv', '/tmp/Little Witch Academia S02E06.mkv') as
|
||||
Array<{
|
||||
source_path: string | null;
|
||||
anime_id: number | null;
|
||||
parsed_episode: number | null;
|
||||
anime_title: string | null;
|
||||
anilist_id: number | null;
|
||||
}>;
|
||||
|
||||
assert.equal(rows.length, 2);
|
||||
assert.ok(rows[0]?.anime_id);
|
||||
assert.equal(rows[0]?.anime_id, rows[1]?.anime_id);
|
||||
assert.deepEqual(
|
||||
rows.map((row) => ({
|
||||
sourcePath: row.source_path,
|
||||
parsedEpisode: row.parsed_episode,
|
||||
animeTitle: row.anime_title,
|
||||
anilistId: row.anilist_id,
|
||||
})),
|
||||
[
|
||||
{
|
||||
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
|
||||
parsedEpisode: 5,
|
||||
animeTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
},
|
||||
{
|
||||
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
|
||||
parsedEpisode: 6,
|
||||
animeTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
},
|
||||
],
|
||||
);
|
||||
} finally {
|
||||
tracker?.destroy();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('applies configurable queue, flush, and retention policy', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
let tracker: ImmersionTrackerService | null = null;
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import path from 'node:path';
|
||||
import * as fs from 'node:fs';
|
||||
import { createLogger } from '../../logger';
|
||||
import { getLocalVideoMetadata } from './immersion-tracker/metadata';
|
||||
import type { CoverArtFetcher } from './anilist/cover-art-fetcher';
|
||||
import { getLocalVideoMetadata, guessAnimeVideoMetadata } from './immersion-tracker/metadata';
|
||||
import { pruneRetention, runRollupMaintenance } from './immersion-tracker/maintenance';
|
||||
import { Database, type DatabaseSync } from './immersion-tracker/sqlite';
|
||||
import { finalizeSessionRecord, startSessionRecord } from './immersion-tracker/session';
|
||||
@@ -10,23 +11,58 @@ import {
|
||||
createTrackerPreparedStatements,
|
||||
ensureSchema,
|
||||
executeQueuedWrite,
|
||||
getOrCreateAnimeRecord,
|
||||
getOrCreateVideoRecord,
|
||||
linkVideoToAnimeRecord,
|
||||
type TrackerPreparedStatements,
|
||||
updateVideoMetadataRecord,
|
||||
updateVideoTitleRecord,
|
||||
} from './immersion-tracker/storage';
|
||||
import {
|
||||
cleanupVocabularyStats,
|
||||
getAnimeCoverArt,
|
||||
getAnimeDailyRollups,
|
||||
getAnimeAnilistEntries,
|
||||
getAnimeDetail,
|
||||
getAnimeEpisodes,
|
||||
getAnimeLibrary,
|
||||
getAnimeWords,
|
||||
getEpisodeCardEvents,
|
||||
getEpisodeSessions,
|
||||
getEpisodeWords,
|
||||
getCoverArt,
|
||||
getDailyRollups,
|
||||
getEpisodesPerDay,
|
||||
getKanjiAnimeAppearances,
|
||||
getKanjiDetail,
|
||||
getKanjiWords,
|
||||
getNewAnimePerDay,
|
||||
getSimilarWords,
|
||||
getStreakCalendar,
|
||||
getKanjiOccurrences,
|
||||
getKanjiStats,
|
||||
getMediaDailyRollups,
|
||||
getMediaDetail,
|
||||
getMediaLibrary,
|
||||
getMediaSessions,
|
||||
getMonthlyRollups,
|
||||
getQueryHints,
|
||||
getSessionEvents,
|
||||
getSessionSummaries,
|
||||
getSessionTimeline,
|
||||
getVocabularyStats,
|
||||
getWatchTimePerAnime,
|
||||
getWordAnimeAppearances,
|
||||
getWordDetail,
|
||||
getWordOccurrences,
|
||||
getVideoDurationMs,
|
||||
markVideoWatched,
|
||||
} from './immersion-tracker/query';
|
||||
import {
|
||||
buildVideoKey,
|
||||
calculateTextMetrics,
|
||||
extractLineVocabulary,
|
||||
deriveCanonicalTitle,
|
||||
isKanji,
|
||||
isRemoteSource,
|
||||
normalizeMediaPath,
|
||||
normalizeText,
|
||||
@@ -57,19 +93,73 @@ import {
|
||||
SOURCE_TYPE_LOCAL,
|
||||
SOURCE_TYPE_REMOTE,
|
||||
type ImmersionSessionRollupRow,
|
||||
type EpisodeCardEventRow,
|
||||
type EpisodesPerDayRow,
|
||||
type ImmersionTrackerOptions,
|
||||
type KanjiAnimeAppearanceRow,
|
||||
type KanjiDetailRow,
|
||||
type KanjiOccurrenceRow,
|
||||
type KanjiStatsRow,
|
||||
type KanjiWordRow,
|
||||
type LegacyVocabularyPosResolution,
|
||||
type LegacyVocabularyPosRow,
|
||||
type AnimeAnilistEntryRow,
|
||||
type AnimeDetailRow,
|
||||
type AnimeEpisodeRow,
|
||||
type AnimeLibraryRow,
|
||||
type AnimeWordRow,
|
||||
type MediaArtRow,
|
||||
type MediaDetailRow,
|
||||
type MediaLibraryRow,
|
||||
type NewAnimePerDayRow,
|
||||
type QueuedWrite,
|
||||
type SessionEventRow,
|
||||
type SessionState,
|
||||
type SessionSummaryQueryRow,
|
||||
type SessionTimelineRow,
|
||||
type SimilarWordRow,
|
||||
type StreakCalendarRow,
|
||||
type VocabularyCleanupSummary,
|
||||
type WatchTimePerAnimeRow,
|
||||
type WordAnimeAppearanceRow,
|
||||
type WordDetailRow,
|
||||
type WordOccurrenceRow,
|
||||
type VocabularyStatsRow,
|
||||
} from './immersion-tracker/types';
|
||||
import type { MergedToken } from '../../types';
|
||||
import { shouldExcludeTokenFromVocabularyPersistence } from './tokenizer/annotation-stage';
|
||||
import { deriveStoredPartOfSpeech } from './tokenizer/part-of-speech';
|
||||
|
||||
export type {
|
||||
AnimeAnilistEntryRow,
|
||||
AnimeDetailRow,
|
||||
AnimeEpisodeRow,
|
||||
AnimeLibraryRow,
|
||||
AnimeWordRow,
|
||||
EpisodeCardEventRow,
|
||||
EpisodesPerDayRow,
|
||||
ImmersionSessionRollupRow,
|
||||
ImmersionTrackerOptions,
|
||||
ImmersionTrackerPolicy,
|
||||
KanjiAnimeAppearanceRow,
|
||||
KanjiDetailRow,
|
||||
KanjiOccurrenceRow,
|
||||
KanjiStatsRow,
|
||||
KanjiWordRow,
|
||||
MediaArtRow,
|
||||
MediaDetailRow,
|
||||
MediaLibraryRow,
|
||||
NewAnimePerDayRow,
|
||||
SessionEventRow,
|
||||
SessionSummaryQueryRow,
|
||||
SessionTimelineRow,
|
||||
SimilarWordRow,
|
||||
StreakCalendarRow,
|
||||
WatchTimePerAnimeRow,
|
||||
WordAnimeAppearanceRow,
|
||||
WordDetailRow,
|
||||
WordOccurrenceRow,
|
||||
VocabularyStatsRow,
|
||||
} from './immersion-tracker/types';
|
||||
|
||||
export class ImmersionTrackerService {
|
||||
@@ -98,9 +188,17 @@ export class ImmersionTrackerService {
|
||||
private currentVideoKey = '';
|
||||
private currentMediaPathOrUrl = '';
|
||||
private readonly preparedStatements: TrackerPreparedStatements;
|
||||
private coverArtFetcher: CoverArtFetcher | null = null;
|
||||
private readonly pendingCoverFetches = new Map<number, Promise<boolean>>();
|
||||
private readonly recordedSubtitleKeys = new Set<string>();
|
||||
private readonly pendingAnimeMetadataUpdates = new Map<number, Promise<void>>();
|
||||
private readonly resolveLegacyVocabularyPos:
|
||||
| ((row: LegacyVocabularyPosRow) => Promise<LegacyVocabularyPosResolution | null>)
|
||||
| undefined;
|
||||
|
||||
constructor(options: ImmersionTrackerOptions) {
|
||||
this.dbPath = options.dbPath;
|
||||
this.resolveLegacyVocabularyPos = options.resolveLegacyVocabularyPos;
|
||||
const parentDir = path.dirname(this.dbPath);
|
||||
if (!fs.existsSync(parentDir)) {
|
||||
fs.mkdirSync(parentDir, { recursive: true });
|
||||
@@ -198,6 +296,8 @@ export class ImmersionTrackerService {
|
||||
async getQueryHints(): Promise<{
|
||||
totalSessions: number;
|
||||
activeSessions: number;
|
||||
episodesToday: number;
|
||||
activeAnimeCount: number;
|
||||
}> {
|
||||
return getQueryHints(this.db);
|
||||
}
|
||||
@@ -210,6 +310,180 @@ export class ImmersionTrackerService {
|
||||
return getMonthlyRollups(this.db, limit);
|
||||
}
|
||||
|
||||
async getVocabularyStats(limit = 100, excludePos?: string[]): Promise<VocabularyStatsRow[]> {
|
||||
return getVocabularyStats(this.db, limit, excludePos);
|
||||
}
|
||||
|
||||
async cleanupVocabularyStats(): Promise<VocabularyCleanupSummary> {
|
||||
return cleanupVocabularyStats(this.db, {
|
||||
resolveLegacyPos: this.resolveLegacyVocabularyPos,
|
||||
});
|
||||
}
|
||||
|
||||
async getKanjiStats(limit = 100): Promise<KanjiStatsRow[]> {
|
||||
return getKanjiStats(this.db, limit);
|
||||
}
|
||||
|
||||
async getWordOccurrences(
|
||||
headword: string,
|
||||
word: string,
|
||||
reading: string,
|
||||
limit = 100,
|
||||
offset = 0,
|
||||
): Promise<WordOccurrenceRow[]> {
|
||||
return getWordOccurrences(this.db, headword, word, reading, limit, offset);
|
||||
}
|
||||
|
||||
async getKanjiOccurrences(
|
||||
kanji: string,
|
||||
limit = 100,
|
||||
offset = 0,
|
||||
): Promise<KanjiOccurrenceRow[]> {
|
||||
return getKanjiOccurrences(this.db, kanji, limit, offset);
|
||||
}
|
||||
|
||||
async getSessionEvents(sessionId: number, limit = 500): Promise<SessionEventRow[]> {
|
||||
return getSessionEvents(this.db, sessionId, limit);
|
||||
}
|
||||
|
||||
async getMediaLibrary(): Promise<MediaLibraryRow[]> {
|
||||
return getMediaLibrary(this.db);
|
||||
}
|
||||
|
||||
async getMediaDetail(videoId: number): Promise<MediaDetailRow | null> {
|
||||
return getMediaDetail(this.db, videoId);
|
||||
}
|
||||
|
||||
async getMediaSessions(videoId: number, limit = 100): Promise<SessionSummaryQueryRow[]> {
|
||||
return getMediaSessions(this.db, videoId, limit);
|
||||
}
|
||||
|
||||
async getMediaDailyRollups(videoId: number, limit = 90): Promise<ImmersionSessionRollupRow[]> {
|
||||
return getMediaDailyRollups(this.db, videoId, limit);
|
||||
}
|
||||
|
||||
async getCoverArt(videoId: number): Promise<MediaArtRow | null> {
|
||||
return getCoverArt(this.db, videoId);
|
||||
}
|
||||
|
||||
async getAnimeLibrary(): Promise<AnimeLibraryRow[]> {
|
||||
return getAnimeLibrary(this.db);
|
||||
}
|
||||
|
||||
async getAnimeDetail(animeId: number): Promise<AnimeDetailRow | null> {
|
||||
return getAnimeDetail(this.db, animeId);
|
||||
}
|
||||
|
||||
async getAnimeEpisodes(animeId: number): Promise<AnimeEpisodeRow[]> {
|
||||
return getAnimeEpisodes(this.db, animeId);
|
||||
}
|
||||
|
||||
async getAnimeAnilistEntries(animeId: number): Promise<AnimeAnilistEntryRow[]> {
|
||||
return getAnimeAnilistEntries(this.db, animeId);
|
||||
}
|
||||
|
||||
async getAnimeCoverArt(animeId: number): Promise<MediaArtRow | null> {
|
||||
return getAnimeCoverArt(this.db, animeId);
|
||||
}
|
||||
|
||||
async getAnimeWords(animeId: number, limit = 50): Promise<AnimeWordRow[]> {
|
||||
return getAnimeWords(this.db, animeId, limit);
|
||||
}
|
||||
|
||||
async getEpisodeWords(videoId: number, limit = 50): Promise<AnimeWordRow[]> {
|
||||
return getEpisodeWords(this.db, videoId, limit);
|
||||
}
|
||||
|
||||
async getEpisodeSessions(videoId: number): Promise<SessionSummaryQueryRow[]> {
|
||||
return getEpisodeSessions(this.db, videoId);
|
||||
}
|
||||
|
||||
async setVideoWatched(videoId: number, watched: boolean): Promise<void> {
|
||||
markVideoWatched(this.db, videoId, watched);
|
||||
}
|
||||
|
||||
async getEpisodeCardEvents(videoId: number): Promise<EpisodeCardEventRow[]> {
|
||||
return getEpisodeCardEvents(this.db, videoId);
|
||||
}
|
||||
|
||||
async getAnimeDailyRollups(animeId: number, limit = 90): Promise<ImmersionSessionRollupRow[]> {
|
||||
return getAnimeDailyRollups(this.db, animeId, limit);
|
||||
}
|
||||
|
||||
async getStreakCalendar(days = 90): Promise<StreakCalendarRow[]> {
|
||||
return getStreakCalendar(this.db, days);
|
||||
}
|
||||
|
||||
async getEpisodesPerDay(limit = 90): Promise<EpisodesPerDayRow[]> {
|
||||
return getEpisodesPerDay(this.db, limit);
|
||||
}
|
||||
|
||||
async getNewAnimePerDay(limit = 90): Promise<NewAnimePerDayRow[]> {
|
||||
return getNewAnimePerDay(this.db, limit);
|
||||
}
|
||||
|
||||
async getWatchTimePerAnime(limit = 90): Promise<WatchTimePerAnimeRow[]> {
|
||||
return getWatchTimePerAnime(this.db, limit);
|
||||
}
|
||||
|
||||
async getWordDetail(wordId: number): Promise<WordDetailRow | null> {
|
||||
return getWordDetail(this.db, wordId);
|
||||
}
|
||||
|
||||
async getWordAnimeAppearances(wordId: number): Promise<WordAnimeAppearanceRow[]> {
|
||||
return getWordAnimeAppearances(this.db, wordId);
|
||||
}
|
||||
|
||||
async getSimilarWords(wordId: number, limit = 10): Promise<SimilarWordRow[]> {
|
||||
return getSimilarWords(this.db, wordId, limit);
|
||||
}
|
||||
|
||||
async getKanjiDetail(kanjiId: number): Promise<KanjiDetailRow | null> {
|
||||
return getKanjiDetail(this.db, kanjiId);
|
||||
}
|
||||
|
||||
async getKanjiAnimeAppearances(kanjiId: number): Promise<KanjiAnimeAppearanceRow[]> {
|
||||
return getKanjiAnimeAppearances(this.db, kanjiId);
|
||||
}
|
||||
|
||||
async getKanjiWords(kanjiId: number, limit = 20): Promise<KanjiWordRow[]> {
|
||||
return getKanjiWords(this.db, kanjiId, limit);
|
||||
}
|
||||
|
||||
setCoverArtFetcher(fetcher: CoverArtFetcher | null): void {
|
||||
this.coverArtFetcher = fetcher;
|
||||
}
|
||||
|
||||
async ensureCoverArt(videoId: number): Promise<boolean> {
|
||||
const existing = getCoverArt(this.db, videoId);
|
||||
if (existing?.coverBlob) {
|
||||
return true;
|
||||
}
|
||||
if (!this.coverArtFetcher) {
|
||||
return false;
|
||||
}
|
||||
const inFlight = this.pendingCoverFetches.get(videoId);
|
||||
if (inFlight) {
|
||||
return await inFlight;
|
||||
}
|
||||
|
||||
const fetchPromise = (async () => {
|
||||
const detail = getMediaDetail(this.db, videoId);
|
||||
const canonicalTitle = detail?.canonicalTitle?.trim();
|
||||
if (!canonicalTitle) {
|
||||
return false;
|
||||
}
|
||||
return await this.coverArtFetcher!.fetchIfMissing(this.db, videoId, canonicalTitle);
|
||||
})();
|
||||
|
||||
this.pendingCoverFetches.set(videoId, fetchPromise);
|
||||
try {
|
||||
return await fetchPromise;
|
||||
} finally {
|
||||
this.pendingCoverFetches.delete(videoId);
|
||||
}
|
||||
}
|
||||
|
||||
handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
|
||||
const normalizedPath = normalizeMediaPath(mediaPath);
|
||||
const normalizedTitle = normalizeText(mediaTitle);
|
||||
@@ -254,6 +528,7 @@ export class ImmersionTrackerService {
|
||||
`Starting immersion session for path=${normalizedPath} videoId=${sessionInfo.videoId}`,
|
||||
);
|
||||
this.startSession(sessionInfo.videoId, sessionInfo.startedAtMs);
|
||||
this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
|
||||
this.captureVideoMetadataAsync(sessionInfo.videoId, sourceType, normalizedPath);
|
||||
}
|
||||
|
||||
@@ -265,40 +540,110 @@ export class ImmersionTrackerService {
|
||||
this.updateVideoTitleForActiveSession(normalizedTitle);
|
||||
}
|
||||
|
||||
recordSubtitleLine(text: string, startSec: number, endSec: number): void {
|
||||
recordSubtitleLine(
|
||||
text: string,
|
||||
startSec: number,
|
||||
endSec: number,
|
||||
tokens?: MergedToken[] | null,
|
||||
): void {
|
||||
if (!this.sessionState || !text.trim()) return;
|
||||
const cleaned = normalizeText(text);
|
||||
if (!cleaned) return;
|
||||
|
||||
if (!endSec || endSec <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const startMs = secToMs(startSec);
|
||||
const subtitleKey = `${startMs}:${cleaned}`;
|
||||
if (this.recordedSubtitleKeys.has(subtitleKey)) {
|
||||
return;
|
||||
}
|
||||
this.recordedSubtitleKeys.add(subtitleKey);
|
||||
|
||||
const nowMs = Date.now();
|
||||
const nowSec = nowMs / 1000;
|
||||
|
||||
const metrics = calculateTextMetrics(cleaned);
|
||||
const extractedVocabulary = extractLineVocabulary(cleaned);
|
||||
this.sessionState.currentLineIndex += 1;
|
||||
this.sessionState.linesSeen += 1;
|
||||
this.sessionState.wordsSeen += metrics.words;
|
||||
this.sessionState.tokensSeen += metrics.tokens;
|
||||
this.sessionState.pendingTelemetry = true;
|
||||
|
||||
for (const { headword, word, reading } of extractedVocabulary.words) {
|
||||
this.recordWrite({
|
||||
kind: 'word',
|
||||
const wordOccurrences = new Map<
|
||||
string,
|
||||
{
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
>();
|
||||
for (const token of tokens ?? []) {
|
||||
if (shouldExcludeTokenFromVocabularyPersistence(token)) {
|
||||
continue;
|
||||
}
|
||||
const headword = normalizeText(token.headword || token.surface);
|
||||
const word = normalizeText(token.surface || token.headword);
|
||||
const reading = normalizeText(token.reading);
|
||||
if (!headword || !word) {
|
||||
continue;
|
||||
}
|
||||
const wordKey = [
|
||||
headword,
|
||||
word,
|
||||
reading,
|
||||
firstSeen: nowSec,
|
||||
lastSeen: nowSec,
|
||||
].join('\u0000');
|
||||
const storedPartOfSpeech = deriveStoredPartOfSpeech({
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
pos1: token.pos1 ?? '',
|
||||
});
|
||||
const existing = wordOccurrences.get(wordKey);
|
||||
if (existing) {
|
||||
existing.occurrenceCount += 1;
|
||||
continue;
|
||||
}
|
||||
wordOccurrences.set(wordKey, {
|
||||
headword,
|
||||
word,
|
||||
reading,
|
||||
partOfSpeech: storedPartOfSpeech,
|
||||
pos1: token.pos1 ?? '',
|
||||
pos2: token.pos2 ?? '',
|
||||
pos3: token.pos3 ?? '',
|
||||
occurrenceCount: 1,
|
||||
});
|
||||
}
|
||||
|
||||
for (const kanji of extractedVocabulary.kanji) {
|
||||
const kanjiCounts = new Map<string, number>();
|
||||
for (const char of cleaned) {
|
||||
if (!isKanji(char)) {
|
||||
continue;
|
||||
}
|
||||
kanjiCounts.set(char, (kanjiCounts.get(char) ?? 0) + 1);
|
||||
}
|
||||
|
||||
this.recordWrite({
|
||||
kind: 'kanji',
|
||||
kind: 'subtitleLine',
|
||||
sessionId: this.sessionState.sessionId,
|
||||
videoId: this.sessionState.videoId,
|
||||
lineIndex: this.sessionState.currentLineIndex,
|
||||
segmentStartMs: secToMs(startSec),
|
||||
segmentEndMs: secToMs(endSec),
|
||||
text: cleaned,
|
||||
wordOccurrences: Array.from(wordOccurrences.values()),
|
||||
kanjiOccurrences: Array.from(kanjiCounts.entries()).map(([kanji, occurrenceCount]) => ({
|
||||
kanji,
|
||||
occurrenceCount,
|
||||
})),
|
||||
firstSeen: nowSec,
|
||||
lastSeen: nowSec,
|
||||
});
|
||||
}
|
||||
|
||||
this.recordWrite({
|
||||
kind: 'event',
|
||||
@@ -321,6 +666,16 @@ export class ImmersionTrackerService {
|
||||
});
|
||||
}
|
||||
|
||||
recordMediaDuration(durationSec: number): void {
|
||||
if (!this.sessionState || !Number.isFinite(durationSec) || durationSec <= 0) return;
|
||||
const durationMs = Math.round(durationSec * 1000);
|
||||
const current = getVideoDurationMs(this.db, this.sessionState.videoId);
|
||||
if (current === 0 || Math.abs(current - durationMs) > 1000) {
|
||||
this.db.prepare('UPDATE imm_videos SET duration_ms = ?, LAST_UPDATE_DATE = ? WHERE video_id = ?')
|
||||
.run(durationMs, Date.now(), this.sessionState.videoId);
|
||||
}
|
||||
}
|
||||
|
||||
recordPlaybackPosition(mediaTimeSec: number | null): void {
|
||||
if (!this.sessionState || mediaTimeSec === null || !Number.isFinite(mediaTimeSec)) {
|
||||
return;
|
||||
@@ -391,6 +746,14 @@ export class ImmersionTrackerService {
|
||||
this.sessionState.lastWallClockMs = nowMs;
|
||||
this.sessionState.lastMediaMs = mediaMs;
|
||||
this.sessionState.pendingTelemetry = true;
|
||||
|
||||
if (!this.sessionState.markedWatched) {
|
||||
const durationMs = getVideoDurationMs(this.db, this.sessionState.videoId);
|
||||
if (durationMs > 0 && mediaMs >= durationMs * 0.98) {
|
||||
markVideoWatched(this.db, this.sessionState.videoId, true);
|
||||
this.sessionState.markedWatched = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordPauseState(isPaused: boolean): void {
|
||||
@@ -454,7 +817,7 @@ export class ImmersionTrackerService {
|
||||
});
|
||||
}
|
||||
|
||||
recordCardsMined(count = 1): void {
|
||||
recordCardsMined(count = 1, noteIds?: number[]): void {
|
||||
if (!this.sessionState) return;
|
||||
this.sessionState.cardsMined += count;
|
||||
this.sessionState.pendingTelemetry = true;
|
||||
@@ -465,7 +828,10 @@ export class ImmersionTrackerService {
|
||||
eventType: EVENT_CARD_MINED,
|
||||
wordsDelta: 0,
|
||||
cardsDelta: count,
|
||||
payloadJson: sanitizePayload({ cardsMined: count }, this.maxPayloadBytes),
|
||||
payloadJson: sanitizePayload(
|
||||
{ cardsMined: count, ...(noteIds?.length ? { noteIds } : {}) },
|
||||
this.maxPayloadBytes,
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -615,6 +981,7 @@ export class ImmersionTrackerService {
|
||||
private startSession(videoId: number, startedAtMs?: number): void {
|
||||
const { sessionId, state } = startSessionRecord(this.db, videoId, startedAtMs);
|
||||
this.sessionState = state;
|
||||
this.recordedSubtitleKeys.clear();
|
||||
this.recordWrite({
|
||||
kind: 'telemetry',
|
||||
sessionId,
|
||||
@@ -673,6 +1040,48 @@ export class ImmersionTrackerService {
|
||||
})();
|
||||
}
|
||||
|
||||
private captureAnimeMetadataAsync(
|
||||
videoId: number,
|
||||
mediaPath: string | null,
|
||||
mediaTitle: string | null,
|
||||
): void {
|
||||
const updatePromise = (async () => {
|
||||
try {
|
||||
const parsed = await guessAnimeVideoMetadata(mediaPath, mediaTitle);
|
||||
if (this.isDestroyed || !parsed?.parsedTitle.trim()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const animeId = getOrCreateAnimeRecord(this.db, {
|
||||
parsedTitle: parsed.parsedTitle,
|
||||
canonicalTitle: parsed.parsedTitle,
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: parsed.parseMetadataJson,
|
||||
});
|
||||
linkVideoToAnimeRecord(this.db, videoId, {
|
||||
animeId,
|
||||
parsedBasename: parsed.parsedBasename,
|
||||
parsedTitle: parsed.parsedTitle,
|
||||
parsedSeason: parsed.parsedSeason,
|
||||
parsedEpisode: parsed.parsedEpisode,
|
||||
parserSource: parsed.parserSource,
|
||||
parserConfidence: parsed.parserConfidence,
|
||||
parseMetadataJson: parsed.parseMetadataJson,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.warn('Unable to capture anime metadata', (error as Error).message);
|
||||
}
|
||||
})();
|
||||
|
||||
this.pendingAnimeMetadataUpdates.set(videoId, updatePromise);
|
||||
void updatePromise.finally(() => {
|
||||
this.pendingAnimeMetadataUpdates.delete(videoId);
|
||||
});
|
||||
}
|
||||
|
||||
private updateVideoTitleForActiveSession(canonicalTitle: string): void {
|
||||
if (!this.sessionState) return;
|
||||
updateVideoTitleRecord(this.db, this.sessionState.videoId, canonicalTitle);
|
||||
|
||||
976
src/core/services/immersion-tracker/__tests__/query.test.ts
Normal file
976
src/core/services/immersion-tracker/__tests__/query.test.ts
Normal file
@@ -0,0 +1,976 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
import { Database } from '../sqlite.js';
|
||||
import {
|
||||
createTrackerPreparedStatements,
|
||||
ensureSchema,
|
||||
getOrCreateAnimeRecord,
|
||||
getOrCreateVideoRecord,
|
||||
linkVideoToAnimeRecord,
|
||||
} from '../storage.js';
|
||||
import { startSessionRecord } from '../session.js';
|
||||
import {
|
||||
cleanupVocabularyStats,
|
||||
getAnimeDetail,
|
||||
getAnimeEpisodes,
|
||||
getAnimeLibrary,
|
||||
getKanjiOccurrences,
|
||||
getSessionSummaries,
|
||||
getVocabularyStats,
|
||||
getKanjiStats,
|
||||
getSessionEvents,
|
||||
getWordOccurrences,
|
||||
} from '../query.js';
|
||||
import { SOURCE_TYPE_LOCAL, EVENT_SUBTITLE_LINE } from '../types.js';
|
||||
|
||||
function makeDbPath(): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-imm-query-test-'));
|
||||
return path.join(dir, 'immersion.sqlite');
|
||||
}
|
||||
|
||||
function cleanupDbPath(dbPath: string): void {
|
||||
const dir = path.dirname(dbPath);
|
||||
if (!fs.existsSync(dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const bunRuntime = globalThis as typeof globalThis & {
|
||||
Bun?: {
|
||||
gc?: (force?: boolean) => void;
|
||||
};
|
||||
};
|
||||
let lastError: NodeJS.ErrnoException | null = null;
|
||||
for (let attempt = 0; attempt < 3; attempt += 1) {
|
||||
try {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
return;
|
||||
} catch (error) {
|
||||
const err = error as NodeJS.ErrnoException;
|
||||
lastError = err;
|
||||
if (process.platform !== 'win32' || err.code !== 'EBUSY') {
|
||||
throw error;
|
||||
}
|
||||
bunRuntime.Bun?.gc?.(true);
|
||||
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 25);
|
||||
}
|
||||
}
|
||||
if (lastError) {
|
||||
throw lastError;
|
||||
}
|
||||
}
|
||||
|
||||
test('getSessionSummaries returns sessionId and canonicalTitle', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/query-test.mkv', {
|
||||
canonicalTitle: 'Query Test Episode',
|
||||
sourcePath: '/tmp/query-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const startedAtMs = 1_000_000;
|
||||
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
|
||||
|
||||
stmts.telemetryInsertStmt.run(
|
||||
sessionId,
|
||||
startedAtMs + 1_000,
|
||||
3_000,
|
||||
2_500,
|
||||
5,
|
||||
10,
|
||||
10,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
startedAtMs + 1_000,
|
||||
startedAtMs + 1_000,
|
||||
);
|
||||
|
||||
const rows = getSessionSummaries(db, 10);
|
||||
|
||||
assert.ok(rows.length >= 1);
|
||||
const row = rows.find((r) => r.sessionId === sessionId);
|
||||
assert.ok(row, 'expected to find a row for the created session');
|
||||
assert.equal(typeof row.sessionId, 'number');
|
||||
assert.equal(row.sessionId, sessionId);
|
||||
assert.equal(row.canonicalTitle, 'Query Test Episode');
|
||||
assert.equal(row.videoId, videoId);
|
||||
assert.ok(row.linesSeen >= 5);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getSessionSummaries with no telemetry returns zero aggregates', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/no-telemetry.mkv', {
|
||||
canonicalTitle: 'No Telemetry',
|
||||
sourcePath: '/tmp/no-telemetry.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const { sessionId } = startSessionRecord(db, videoId, 3_000_000);
|
||||
|
||||
const rows = getSessionSummaries(db, 10);
|
||||
const row = rows.find((r) => r.sessionId === sessionId);
|
||||
assert.ok(row, 'expected to find the session with no telemetry');
|
||||
assert.equal(row.canonicalTitle, 'No Telemetry');
|
||||
assert.equal(row.totalWatchedMs, 0);
|
||||
assert.equal(row.linesSeen, 0);
|
||||
assert.equal(row.cardsMined, 0);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getVocabularyStats returns rows ordered by frequency descending', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
// Insert words: 猫 twice, 犬 once
|
||||
stmts.wordUpsertStmt.run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 2_000);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 3_000);
|
||||
stmts.wordUpsertStmt.run('犬', '犬', 'いぬ', 'noun', '名詞', '一般', '', 1_500, 1_500);
|
||||
|
||||
const rows = getVocabularyStats(db, 10);
|
||||
|
||||
assert.ok(rows.length >= 2);
|
||||
// First row should be 猫 (frequency 2)
|
||||
const nekRow = rows.find((r) => r.headword === '猫');
|
||||
const inuRow = rows.find((r) => r.headword === '犬');
|
||||
assert.ok(nekRow, 'expected 猫 row');
|
||||
assert.ok(inuRow, 'expected 犬 row');
|
||||
assert.equal(nekRow.headword, '猫');
|
||||
assert.equal(nekRow.word, '猫');
|
||||
assert.equal(nekRow.reading, 'ねこ');
|
||||
assert.equal(nekRow.frequency, 2);
|
||||
assert.equal(typeof nekRow.firstSeen, 'number');
|
||||
assert.equal(typeof nekRow.lastSeen, 'number');
|
||||
// Higher frequency should come first
|
||||
const nekIdx = rows.indexOf(nekRow);
|
||||
const inuIdx = rows.indexOf(inuRow);
|
||||
assert.ok(nekIdx < inuIdx, 'higher frequency word should appear first');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getVocabularyStats returns empty array when no words exist', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const rows = getVocabularyStats(db, 10);
|
||||
assert.deepEqual(rows, []);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('cleanupVocabularyStats repairs stored POS metadata and removes excluded imm_words rows', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 1_500, 3);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run('知っている', '知っている', '', 'other', '動詞', '自立', '', 1_025, 1_525, 4);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run('は', 'は', 'は', 'particle', '助詞', '係助詞', '', 1_100, 1_600, 9);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run('旧', '旧', '', '', '', '', '', 900, 950, 1);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
).run('未解決', '未解決', '', '', '', '', '', 901, 951, 1);
|
||||
|
||||
const result = await cleanupVocabularyStats(db, {
|
||||
resolveLegacyPos: async (row) => {
|
||||
if (row.headword === '旧') {
|
||||
return {
|
||||
partOfSpeech: 'noun',
|
||||
headword: '旧',
|
||||
reading: 'きゅう',
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
pos3: '',
|
||||
};
|
||||
}
|
||||
if (row.headword === '知っている') {
|
||||
return {
|
||||
partOfSpeech: 'verb',
|
||||
headword: '知る',
|
||||
reading: 'しっている',
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
pos3: '',
|
||||
};
|
||||
}
|
||||
return null;
|
||||
},
|
||||
});
|
||||
const rows = getVocabularyStats(db, 10);
|
||||
const repairedRows = db
|
||||
.prepare(
|
||||
`SELECT headword, word, reading, part_of_speech, pos1, pos2
|
||||
FROM imm_words
|
||||
ORDER BY headword ASC, word ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
}>;
|
||||
|
||||
assert.deepEqual(result, { scanned: 5, kept: 3, deleted: 2, repaired: 2 });
|
||||
assert.deepEqual(
|
||||
rows.map((row) => ({ headword: row.headword, frequency: row.frequency })),
|
||||
[
|
||||
{ headword: '知る', frequency: 4 },
|
||||
{ headword: '猫', frequency: 3 },
|
||||
{ headword: '旧', frequency: 1 },
|
||||
],
|
||||
);
|
||||
assert.deepEqual(
|
||||
repairedRows,
|
||||
[
|
||||
{
|
||||
headword: '旧',
|
||||
word: '旧',
|
||||
reading: 'きゅう',
|
||||
part_of_speech: 'noun',
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
},
|
||||
{
|
||||
headword: '猫',
|
||||
word: '猫',
|
||||
reading: 'ねこ',
|
||||
part_of_speech: 'noun',
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
},
|
||||
{
|
||||
headword: '知る',
|
||||
word: '知っている',
|
||||
reading: 'しっている',
|
||||
part_of_speech: 'verb',
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
},
|
||||
],
|
||||
);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('cleanupVocabularyStats merges repaired duplicates instead of violating the imm_words unique key', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cleanup-merge.mkv', {
|
||||
canonicalTitle: 'Cleanup Merge',
|
||||
sourcePath: '/tmp/cleanup-merge.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const { sessionId } = startSessionRecord(db, videoId, 2_000_000);
|
||||
const duplicateResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run('知る', '知っている', 'しっている', 'verb', '動詞', '自立', '', 2_000, 2_500, 3);
|
||||
const legacyResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run('知っている', '知っている', '', 'other', '動詞', '自立', '', 1_000, 3_000, 4);
|
||||
const lineResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_subtitle_lines (
|
||||
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(sessionId, null, videoId, null, 1, 0, 1000, '知っている', 2_000, 2_000);
|
||||
const lineId = Number(lineResult.lastInsertRowid);
|
||||
const duplicateId = Number(duplicateResult.lastInsertRowid);
|
||||
const legacyId = Number(legacyResult.lastInsertRowid);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
|
||||
VALUES (?, ?, ?)`,
|
||||
).run(lineId, duplicateId, 2);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
|
||||
VALUES (?, ?, ?)`,
|
||||
).run(lineId, legacyId, 1);
|
||||
|
||||
const result = await cleanupVocabularyStats(db, {
|
||||
resolveLegacyPos: async (row) => {
|
||||
if (row.id !== legacyId) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
partOfSpeech: 'verb',
|
||||
headword: '知る',
|
||||
reading: 'しっている',
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
pos3: '',
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const rows = db
|
||||
.prepare(
|
||||
`SELECT id, headword, word, reading, frequency, first_seen, last_seen
|
||||
FROM imm_words
|
||||
ORDER BY id ASC`,
|
||||
)
|
||||
.all() as Array<{
|
||||
id: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
frequency: number;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
}>;
|
||||
const occurrences = getWordOccurrences(db, '知る', '知っている', 'しっている', 10);
|
||||
|
||||
assert.deepEqual(result, { scanned: 2, kept: 1, deleted: 1, repaired: 1 });
|
||||
assert.deepEqual(rows, [
|
||||
{
|
||||
id: duplicateId,
|
||||
headword: '知る',
|
||||
word: '知っている',
|
||||
reading: 'しっている',
|
||||
frequency: 7,
|
||||
first_seen: 1_000,
|
||||
last_seen: 3_000,
|
||||
},
|
||||
]);
|
||||
assert.deepEqual(occurrences, [
|
||||
{
|
||||
animeId: null,
|
||||
animeTitle: null,
|
||||
videoId,
|
||||
videoTitle: 'Cleanup Merge',
|
||||
sessionId,
|
||||
lineIndex: 1,
|
||||
segmentStartMs: 0,
|
||||
segmentEndMs: 1000,
|
||||
text: '知っている',
|
||||
occurrenceCount: 3,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getKanjiStats returns rows ordered by frequency descending', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
// Insert kanji: 日 twice, 月 once
|
||||
stmts.kanjiUpsertStmt.run('日', 1_000, 2_000);
|
||||
stmts.kanjiUpsertStmt.run('日', 1_000, 3_000);
|
||||
stmts.kanjiUpsertStmt.run('月', 1_500, 1_500);
|
||||
|
||||
const rows = getKanjiStats(db, 10);
|
||||
|
||||
assert.ok(rows.length >= 2);
|
||||
const nichiRow = rows.find((r) => r.kanji === '日');
|
||||
const tsukiRow = rows.find((r) => r.kanji === '月');
|
||||
assert.ok(nichiRow, 'expected 日 row');
|
||||
assert.ok(tsukiRow, 'expected 月 row');
|
||||
assert.equal(nichiRow.kanji, '日');
|
||||
assert.equal(nichiRow.frequency, 2);
|
||||
assert.equal(typeof nichiRow.firstSeen, 'number');
|
||||
assert.equal(typeof nichiRow.lastSeen, 'number');
|
||||
// Higher frequency should come first
|
||||
const nichiIdx = rows.indexOf(nichiRow);
|
||||
const tsukiIdx = rows.indexOf(tsukiRow);
|
||||
assert.ok(nichiIdx < tsukiIdx, 'higher frequency kanji should appear first');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getKanjiStats returns empty array when no kanji exist', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const rows = getKanjiStats(db, 10);
|
||||
assert.deepEqual(rows, []);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getSessionEvents returns events ordered by ts_ms ascending', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/events-test.mkv', {
|
||||
canonicalTitle: 'Events Test',
|
||||
sourcePath: '/tmp/events-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const startedAtMs = 5_000_000;
|
||||
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
|
||||
|
||||
// Insert two events at different timestamps
|
||||
stmts.eventInsertStmt.run(
|
||||
sessionId,
|
||||
startedAtMs + 2_000,
|
||||
EVENT_SUBTITLE_LINE,
|
||||
1,
|
||||
0,
|
||||
800,
|
||||
2,
|
||||
0,
|
||||
'{"line":"second"}',
|
||||
startedAtMs + 2_000,
|
||||
startedAtMs + 2_000,
|
||||
);
|
||||
stmts.eventInsertStmt.run(
|
||||
sessionId,
|
||||
startedAtMs + 1_000,
|
||||
EVENT_SUBTITLE_LINE,
|
||||
0,
|
||||
0,
|
||||
600,
|
||||
3,
|
||||
0,
|
||||
'{"line":"first"}',
|
||||
startedAtMs + 1_000,
|
||||
startedAtMs + 1_000,
|
||||
);
|
||||
|
||||
const events = getSessionEvents(db, sessionId, 50);
|
||||
|
||||
assert.equal(events.length, 2);
|
||||
// Should be ordered ASC by ts_ms
|
||||
assert.equal(events[0]!.tsMs, startedAtMs + 1_000);
|
||||
assert.equal(events[1]!.tsMs, startedAtMs + 2_000);
|
||||
assert.equal(events[0]!.eventType, EVENT_SUBTITLE_LINE);
|
||||
assert.equal(events[0]!.payload, '{"line":"first"}');
|
||||
assert.equal(events[1]!.payload, '{"line":"second"}');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getSessionEvents returns empty array for session with no events', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const events = getSessionEvents(db, 9999, 50);
|
||||
assert.deepEqual(events, []);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getSessionEvents respects limit parameter', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/events-limit.mkv', {
|
||||
canonicalTitle: 'Events Limit Test',
|
||||
sourcePath: '/tmp/events-limit.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const startedAtMs = 7_000_000;
|
||||
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
|
||||
|
||||
// Insert 5 events
|
||||
for (let i = 0; i < 5; i += 1) {
|
||||
stmts.eventInsertStmt.run(
|
||||
sessionId,
|
||||
startedAtMs + i * 1_000,
|
||||
EVENT_SUBTITLE_LINE,
|
||||
i,
|
||||
0,
|
||||
500,
|
||||
1,
|
||||
0,
|
||||
null,
|
||||
startedAtMs + i * 1_000,
|
||||
startedAtMs + i * 1_000,
|
||||
);
|
||||
}
|
||||
|
||||
const limited = getSessionEvents(db, sessionId, 3);
|
||||
assert.equal(limited.length, 3);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('anime-level queries group by anime_id and preserve episode-level rows', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
const lwaAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: 33_435,
|
||||
titleRomaji: 'Little Witch Academia',
|
||||
titleEnglish: 'Little Witch Academia',
|
||||
titleNative: 'リトルウィッチアカデミア',
|
||||
metadataJson: '{"source":"anilist"}',
|
||||
});
|
||||
const frierenAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Frieren',
|
||||
canonicalTitle: 'Frieren',
|
||||
anilistId: 52_921,
|
||||
titleRomaji: 'Sousou no Frieren',
|
||||
titleEnglish: 'Frieren: Beyond Journey\'s End',
|
||||
titleNative: '葬送のフリーレン',
|
||||
metadataJson: '{"source":"anilist"}',
|
||||
});
|
||||
|
||||
const lwaEpisode5 = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
|
||||
canonicalTitle: 'Episode 5',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const lwaEpisode6 = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
|
||||
canonicalTitle: 'Episode 6',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const frierenEpisode3 = getOrCreateVideoRecord(db, 'local:/tmp/frieren-03.mkv', {
|
||||
canonicalTitle: 'Episode 3',
|
||||
sourcePath: '/tmp/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
linkVideoToAnimeRecord(db, lwaEpisode5, {
|
||||
animeId: lwaAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: '{"episode":5}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, lwaEpisode6, {
|
||||
animeId: lwaAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E06.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 6,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: '{"episode":6}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, frierenEpisode3, {
|
||||
animeId: frierenAnimeId,
|
||||
parsedBasename: '[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
parsedTitle: 'Frieren',
|
||||
parsedSeason: 1,
|
||||
parsedEpisode: 3,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.6,
|
||||
parseMetadataJson: '{"episode":3}',
|
||||
});
|
||||
|
||||
const sessionA = startSessionRecord(db, lwaEpisode5, 1_000_000);
|
||||
const sessionB = startSessionRecord(db, lwaEpisode5, 1_010_000);
|
||||
const sessionC = startSessionRecord(db, lwaEpisode6, 1_020_000);
|
||||
const sessionD = startSessionRecord(db, frierenEpisode3, 1_030_000);
|
||||
|
||||
stmts.telemetryInsertStmt.run(
|
||||
sessionA.sessionId,
|
||||
1_001_000,
|
||||
4_000,
|
||||
3_000,
|
||||
10,
|
||||
25,
|
||||
25,
|
||||
1,
|
||||
3,
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1_001_000,
|
||||
1_001_000,
|
||||
);
|
||||
stmts.telemetryInsertStmt.run(
|
||||
sessionB.sessionId,
|
||||
1_011_000,
|
||||
5_000,
|
||||
4_000,
|
||||
11,
|
||||
27,
|
||||
27,
|
||||
2,
|
||||
4,
|
||||
2,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1_011_000,
|
||||
1_011_000,
|
||||
);
|
||||
stmts.telemetryInsertStmt.run(
|
||||
sessionC.sessionId,
|
||||
1_021_000,
|
||||
6_000,
|
||||
5_000,
|
||||
12,
|
||||
28,
|
||||
28,
|
||||
3,
|
||||
5,
|
||||
4,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1_021_000,
|
||||
1_021_000,
|
||||
);
|
||||
stmts.telemetryInsertStmt.run(
|
||||
sessionD.sessionId,
|
||||
1_031_000,
|
||||
4_000,
|
||||
3_500,
|
||||
8,
|
||||
20,
|
||||
20,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1_031_000,
|
||||
1_031_000,
|
||||
);
|
||||
|
||||
const animeLibrary = getAnimeLibrary(db);
|
||||
assert.equal(animeLibrary.length, 2);
|
||||
assert.deepEqual(
|
||||
animeLibrary.map((row) => ({
|
||||
animeId: row.animeId,
|
||||
canonicalTitle: row.canonicalTitle,
|
||||
totalSessions: row.totalSessions,
|
||||
totalActiveMs: row.totalActiveMs,
|
||||
totalCards: row.totalCards,
|
||||
episodeCount: row.episodeCount,
|
||||
})),
|
||||
[
|
||||
{
|
||||
animeId: lwaAnimeId,
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
totalSessions: 3,
|
||||
totalActiveMs: 12_000,
|
||||
totalCards: 6,
|
||||
episodeCount: 2,
|
||||
},
|
||||
{
|
||||
animeId: frierenAnimeId,
|
||||
canonicalTitle: 'Frieren',
|
||||
totalSessions: 1,
|
||||
totalActiveMs: 3_500,
|
||||
totalCards: 1,
|
||||
episodeCount: 1,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
const animeDetail = getAnimeDetail(db, lwaAnimeId);
|
||||
assert.ok(animeDetail);
|
||||
assert.equal(animeDetail?.animeId, lwaAnimeId);
|
||||
assert.equal(animeDetail?.canonicalTitle, 'Little Witch Academia');
|
||||
assert.equal(animeDetail?.anilistId, 33_435);
|
||||
assert.equal(animeDetail?.totalSessions, 3);
|
||||
assert.equal(animeDetail?.totalActiveMs, 12_000);
|
||||
assert.equal(animeDetail?.totalCards, 6);
|
||||
assert.equal(animeDetail?.totalWordsSeen, 80);
|
||||
assert.equal(animeDetail?.totalLinesSeen, 33);
|
||||
assert.equal(animeDetail?.totalLookupCount, 12);
|
||||
assert.equal(animeDetail?.totalLookupHits, 8);
|
||||
assert.equal(animeDetail?.episodeCount, 2);
|
||||
|
||||
const episodes = getAnimeEpisodes(db, lwaAnimeId);
|
||||
assert.deepEqual(
|
||||
episodes.map((row) => ({
|
||||
videoId: row.videoId,
|
||||
season: row.season,
|
||||
episode: row.episode,
|
||||
totalSessions: row.totalSessions,
|
||||
totalActiveMs: row.totalActiveMs,
|
||||
totalCards: row.totalCards,
|
||||
})),
|
||||
[
|
||||
{
|
||||
videoId: lwaEpisode5,
|
||||
season: 2,
|
||||
episode: 5,
|
||||
totalSessions: 2,
|
||||
totalActiveMs: 7_000,
|
||||
totalCards: 3,
|
||||
},
|
||||
{
|
||||
videoId: lwaEpisode6,
|
||||
season: 2,
|
||||
episode: 6,
|
||||
totalSessions: 1,
|
||||
totalActiveMs: 5_000,
|
||||
totalCards: 3,
|
||||
},
|
||||
],
|
||||
);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getWordOccurrences maps a normalized word back to anime, video, and subtitle line context', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const animeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"test"}',
|
||||
});
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e04.mkv', {
|
||||
canonicalTitle: 'Episode 4',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E04.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
linkVideoToAnimeRecord(db, videoId, {
|
||||
animeId,
|
||||
parsedBasename: 'Little Witch Academia S02E04.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 4,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: '{"episode":4}',
|
||||
});
|
||||
const { sessionId } = startSessionRecord(db, videoId, 1_000_000);
|
||||
const wordResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_words (
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 1_500, 4);
|
||||
const lineResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_subtitle_lines (
|
||||
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(sessionId, null, videoId, animeId, 1, 0, 1000, '猫 猫 日 日 は', 1_000, 1_000);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
|
||||
VALUES (?, ?, ?)`,
|
||||
).run(Number(lineResult.lastInsertRowid), Number(wordResult.lastInsertRowid), 2);
|
||||
|
||||
const rows = getWordOccurrences(db, '猫', '猫', 'ねこ', 10);
|
||||
|
||||
assert.deepEqual(rows, [
|
||||
{
|
||||
animeId,
|
||||
animeTitle: 'Little Witch Academia',
|
||||
videoId,
|
||||
videoTitle: 'Episode 4',
|
||||
sessionId,
|
||||
lineIndex: 1,
|
||||
segmentStartMs: 0,
|
||||
segmentEndMs: 1000,
|
||||
text: '猫 猫 日 日 は',
|
||||
occurrenceCount: 2,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('getKanjiOccurrences maps a kanji back to anime, video, and subtitle line context', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const animeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Frieren',
|
||||
canonicalTitle: 'Frieren',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"test"}',
|
||||
});
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/frieren-03.mkv', {
|
||||
canonicalTitle: 'Episode 3',
|
||||
sourcePath: '/tmp/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
linkVideoToAnimeRecord(db, videoId, {
|
||||
animeId,
|
||||
parsedBasename: '[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
parsedTitle: 'Frieren',
|
||||
parsedSeason: 1,
|
||||
parsedEpisode: 3,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: '{"episode":3}',
|
||||
});
|
||||
const { sessionId } = startSessionRecord(db, videoId, 2_000_000);
|
||||
const kanjiResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_kanji (
|
||||
kanji, first_seen, last_seen, frequency
|
||||
) VALUES (?, ?, ?, ?)`,
|
||||
)
|
||||
.run('日', 2_000, 2_500, 8);
|
||||
const lineResult = db
|
||||
.prepare(
|
||||
`INSERT INTO imm_subtitle_lines (
|
||||
session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
)
|
||||
.run(sessionId, null, videoId, animeId, 3, 5000, 6500, '今日は日曜', 2_000, 2_000);
|
||||
db.prepare(
|
||||
`INSERT INTO imm_kanji_line_occurrences (line_id, kanji_id, occurrence_count)
|
||||
VALUES (?, ?, ?)`,
|
||||
).run(Number(lineResult.lastInsertRowid), Number(kanjiResult.lastInsertRowid), 2);
|
||||
|
||||
const rows = getKanjiOccurrences(db, '日', 10);
|
||||
|
||||
assert.deepEqual(rows, [
|
||||
{
|
||||
animeId,
|
||||
animeTitle: 'Frieren',
|
||||
videoId,
|
||||
videoTitle: 'Episode 3',
|
||||
sessionId,
|
||||
lineIndex: 3,
|
||||
segmentStartMs: 5000,
|
||||
segmentEndMs: 6500,
|
||||
text: '今日は日曜',
|
||||
occurrenceCount: 2,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
71
src/core/services/immersion-tracker/legacy-vocabulary-pos.ts
Normal file
71
src/core/services/immersion-tracker/legacy-vocabulary-pos.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import type { Token } from '../../../types';
|
||||
import type { LegacyVocabularyPosResolution } from './types';
|
||||
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
|
||||
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
|
||||
function normalizeLookupText(value: string | null | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
function katakanaToHiragana(text: string): string {
|
||||
let normalized = '';
|
||||
for (const char of text) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
|
||||
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
|
||||
continue;
|
||||
}
|
||||
normalized += char;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function toResolution(token: Token): LegacyVocabularyPosResolution {
|
||||
return {
|
||||
headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
|
||||
reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
|
||||
partOfSpeech: deriveStoredPartOfSpeech({
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
pos1: token.pos1,
|
||||
}),
|
||||
pos1: normalizeLookupText(token.pos1),
|
||||
pos2: normalizeLookupText(token.pos2),
|
||||
pos3: normalizeLookupText(token.pos3),
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveLegacyVocabularyPosFromTokens(
|
||||
lookupText: string,
|
||||
tokens: Token[] | null,
|
||||
): LegacyVocabularyPosResolution | null {
|
||||
const normalizedLookup = normalizeLookupText(lookupText);
|
||||
if (!normalizedLookup || !tokens || tokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const exactSurfaceMatches = tokens.filter(
|
||||
(token) => normalizeLookupText(token.word) === normalizedLookup,
|
||||
);
|
||||
if (exactSurfaceMatches.length === 1) {
|
||||
return toResolution(exactSurfaceMatches[0]!);
|
||||
}
|
||||
|
||||
const exactHeadwordMatches = tokens.filter(
|
||||
(token) => normalizeLookupText(token.headword) === normalizedLookup,
|
||||
);
|
||||
if (exactHeadwordMatches.length === 1) {
|
||||
return toResolution(exactHeadwordMatches[0]!);
|
||||
}
|
||||
|
||||
if (tokens.length === 1) {
|
||||
return toResolution(tokens[0]!);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -112,35 +112,46 @@ function upsertDailyRollupsForGroups(
|
||||
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
|
||||
s.video_id AS video_id,
|
||||
COUNT(DISTINCT s.session_id) AS total_sessions,
|
||||
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
|
||||
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
|
||||
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(sm.max_words), 0) AS total_words_seen,
|
||||
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
|
||||
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
|
||||
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
|
||||
THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
|
||||
ELSE NULL
|
||||
END AS cards_per_hour,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
|
||||
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
|
||||
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
|
||||
THEN COALESCE(SUM(sm.max_words), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
|
||||
ELSE NULL
|
||||
END AS words_per_min,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
|
||||
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
|
||||
WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
|
||||
THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
|
||||
ELSE NULL
|
||||
END AS lookup_hit_rate,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
|
||||
JOIN (
|
||||
SELECT
|
||||
t.session_id,
|
||||
MAX(t.active_watched_ms) AS max_active_ms,
|
||||
MAX(t.lines_seen) AS max_lines,
|
||||
MAX(t.words_seen) AS max_words,
|
||||
MAX(t.tokens_seen) AS max_tokens,
|
||||
MAX(t.cards_mined) AS max_cards,
|
||||
MAX(t.lookup_count) AS max_lookups,
|
||||
MAX(t.lookup_hits) AS max_hits
|
||||
FROM imm_session_telemetry t
|
||||
GROUP BY t.session_id
|
||||
) sm ON s.session_id = sm.session_id
|
||||
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_day, s.video_id
|
||||
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
@@ -176,20 +187,29 @@ function upsertMonthlyRollupsForGroups(
|
||||
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
|
||||
s.video_id AS video_id,
|
||||
COUNT(DISTINCT s.session_id) AS total_sessions,
|
||||
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
|
||||
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
|
||||
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(sm.max_words), 0) AS total_words_seen,
|
||||
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
|
||||
JOIN (
|
||||
SELECT
|
||||
t.session_id,
|
||||
MAX(t.active_watched_ms) AS max_active_ms,
|
||||
MAX(t.lines_seen) AS max_lines,
|
||||
MAX(t.words_seen) AS max_words,
|
||||
MAX(t.tokens_seen) AS max_tokens,
|
||||
MAX(t.cards_mined) AS max_cards
|
||||
FROM imm_session_telemetry t
|
||||
GROUP BY t.session_id
|
||||
) sm ON s.session_id = sm.session_id
|
||||
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_month, s.video_id
|
||||
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
@@ -216,8 +236,8 @@ function getAffectedRollupGroups(
|
||||
.prepare(
|
||||
`
|
||||
SELECT DISTINCT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
|
||||
s.video_id AS video_id
|
||||
FROM imm_session_telemetry t
|
||||
JOIN imm_sessions s
|
||||
|
||||
@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
|
||||
import test from 'node:test';
|
||||
import type { spawn as spawnFn } from 'node:child_process';
|
||||
import { SOURCE_TYPE_LOCAL } from './types';
|
||||
import { getLocalVideoMetadata, runFfprobe } from './metadata';
|
||||
import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';
|
||||
|
||||
type Spawn = typeof spawnFn;
|
||||
|
||||
@@ -146,3 +146,79 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
|
||||
assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
|
||||
assert.equal(hashFallbackMetadata.hashSha256, null);
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
|
||||
const seenTargets: string[] = [];
|
||||
const parsed = await guessAnimeVideoMetadata('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5', {
|
||||
runGuessit: async (target) => {
|
||||
seenTargets.push(target);
|
||||
return JSON.stringify({
|
||||
title: 'Little Witch Academia',
|
||||
season: 2,
|
||||
episode: 5,
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'guessit',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
filename: 'Little Witch Academia S02E05.mkv',
|
||||
source: 'guessit',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
|
||||
const parsed = await guessAnimeVideoMetadata('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5', {
|
||||
runGuessit: async () => {
|
||||
throw new Error('guessit unavailable');
|
||||
},
|
||||
});
|
||||
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: 'high',
|
||||
filename: 'Little Witch Academia S02E05.mkv',
|
||||
rawTitle: 'Little Witch Academia S02E05',
|
||||
source: 'fallback',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
|
||||
const parsed = await guessAnimeVideoMetadata(
|
||||
'/tmp/[SubsPlease] Frieren - 03 (1080p).mkv',
|
||||
null,
|
||||
{
|
||||
runGuessit: async () => JSON.stringify({ episode: 3 }),
|
||||
},
|
||||
);
|
||||
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
|
||||
parsedTitle: 'Frieren - 03 (1080p)',
|
||||
parsedSeason: null,
|
||||
parsedEpisode: null,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.2,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: 'low',
|
||||
filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
|
||||
rawTitle: 'Frieren - 03 (1080p)',
|
||||
source: 'fallback',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import crypto from 'node:crypto';
|
||||
import { spawn as nodeSpawn } from 'node:child_process';
|
||||
import * as fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { parseMediaInfo } from '../../../jimaku/utils';
|
||||
import {
|
||||
guessAnilistMediaInfo,
|
||||
runGuessit,
|
||||
type GuessAnilistMediaInfoDeps,
|
||||
} from '../anilist/anilist-updater';
|
||||
import {
|
||||
deriveCanonicalTitle,
|
||||
emptyMetadata,
|
||||
@@ -8,7 +15,12 @@ import {
|
||||
parseFps,
|
||||
toNullableInt,
|
||||
} from './reducer';
|
||||
import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
|
||||
import {
|
||||
SOURCE_TYPE_LOCAL,
|
||||
type ParsedAnimeVideoGuess,
|
||||
type ProbeMetadata,
|
||||
type VideoMetadata,
|
||||
} from './types';
|
||||
|
||||
type SpawnFn = typeof nodeSpawn;
|
||||
|
||||
@@ -24,6 +36,21 @@ interface MetadataDeps {
|
||||
fs?: FsDeps;
|
||||
}
|
||||
|
||||
interface GuessAnimeVideoMetadataDeps {
|
||||
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
|
||||
}
|
||||
|
||||
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
|
||||
switch (confidence) {
|
||||
case 'high':
|
||||
return 1;
|
||||
case 'medium':
|
||||
return 0.6;
|
||||
default:
|
||||
return 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
export async function computeSha256(
|
||||
mediaPath: string,
|
||||
deps: MetadataDeps = {},
|
||||
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
|
||||
metadataJson: null,
|
||||
};
|
||||
}
|
||||
|
||||
export async function guessAnimeVideoMetadata(
|
||||
mediaPath: string | null,
|
||||
mediaTitle: string | null,
|
||||
deps: GuessAnimeVideoMetadataDeps = {},
|
||||
): Promise<ParsedAnimeVideoGuess | null> {
|
||||
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
|
||||
runGuessit: deps.runGuessit ?? runGuessit,
|
||||
});
|
||||
if (!parsed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
|
||||
if (parsed.source === 'guessit') {
|
||||
return {
|
||||
parsedBasename,
|
||||
parsedTitle: parsed.title,
|
||||
parsedSeason: parsed.season,
|
||||
parsedEpisode: parsed.episode,
|
||||
parserSource: 'guessit',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
filename: parsedBasename,
|
||||
source: 'guessit',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
|
||||
return {
|
||||
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
|
||||
parsedTitle: parsed.title,
|
||||
parsedSeason: parsed.season,
|
||||
parsedEpisode: parsed.episode,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: fallbackInfo.confidence,
|
||||
filename: fallbackInfo.filename,
|
||||
rawTitle: fallbackInfo.rawTitle,
|
||||
source: 'fallback',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ export function createInitialSessionState(
|
||||
lastPauseStartMs: null,
|
||||
isPaused: false,
|
||||
pendingTelemetry: true,
|
||||
markedWatched: false,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,9 @@ import {
|
||||
createTrackerPreparedStatements,
|
||||
ensureSchema,
|
||||
executeQueuedWrite,
|
||||
getOrCreateAnimeRecord,
|
||||
getOrCreateVideoRecord,
|
||||
linkVideoToAnimeRecord,
|
||||
} from './storage';
|
||||
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
|
||||
|
||||
@@ -60,6 +62,7 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
const tableNames = new Set(rows.map((row) => row.name));
|
||||
|
||||
assert.ok(tableNames.has('imm_videos'));
|
||||
assert.ok(tableNames.has('imm_anime'));
|
||||
assert.ok(tableNames.has('imm_sessions'));
|
||||
assert.ok(tableNames.has('imm_session_telemetry'));
|
||||
assert.ok(tableNames.has('imm_session_events'));
|
||||
@@ -67,8 +70,28 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
assert.ok(tableNames.has('imm_monthly_rollups'));
|
||||
assert.ok(tableNames.has('imm_words'));
|
||||
assert.ok(tableNames.has('imm_kanji'));
|
||||
assert.ok(tableNames.has('imm_subtitle_lines'));
|
||||
assert.ok(tableNames.has('imm_word_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_rollup_state'));
|
||||
|
||||
const videoColumns = new Set(
|
||||
(
|
||||
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
|
||||
assert.ok(videoColumns.has('anime_id'));
|
||||
assert.ok(videoColumns.has('parsed_basename'));
|
||||
assert.ok(videoColumns.has('parsed_title'));
|
||||
assert.ok(videoColumns.has('parsed_season'));
|
||||
assert.ok(videoColumns.has('parsed_episode'));
|
||||
assert.ok(videoColumns.has('parser_source'));
|
||||
assert.ok(videoColumns.has('parser_confidence'));
|
||||
assert.ok(videoColumns.has('parse_metadata_json'));
|
||||
|
||||
const rollupStateRow = db
|
||||
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
|
||||
.get('last_rollup_sample_ms') as {
|
||||
@@ -82,6 +105,470 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE imm_schema_version (
|
||||
schema_version INTEGER PRIMARY KEY,
|
||||
applied_at_ms INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
|
||||
|
||||
CREATE TABLE imm_videos(
|
||||
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_key TEXT NOT NULL UNIQUE,
|
||||
canonical_title TEXT NOT NULL,
|
||||
source_type INTEGER NOT NULL,
|
||||
source_path TEXT,
|
||||
source_url TEXT,
|
||||
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
|
||||
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
|
||||
codec_id INTEGER, container_id INTEGER,
|
||||
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
|
||||
bitrate_kbps INTEGER, audio_codec_id INTEGER,
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
`);
|
||||
|
||||
const insertLegacyVideo = db.prepare(`
|
||||
INSERT INTO imm_videos (
|
||||
video_key, canonical_title, source_type, source_path, source_url,
|
||||
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
|
||||
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
|
||||
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/Little Witch Academia S02E05.mkv',
|
||||
'Episode 5',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/Little Witch Academia S02E05.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/Little Witch Academia S02E06.mkv',
|
||||
'Episode 6',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/Little Witch Academia S02E06.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
'Episode 3',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
|
||||
ensureSchema(db);
|
||||
|
||||
const videoColumns = new Set(
|
||||
(
|
||||
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
assert.ok(videoColumns.has('anime_id'));
|
||||
assert.ok(videoColumns.has('parsed_basename'));
|
||||
assert.ok(videoColumns.has('parsed_title'));
|
||||
assert.ok(videoColumns.has('parsed_season'));
|
||||
assert.ok(videoColumns.has('parsed_episode'));
|
||||
assert.ok(videoColumns.has('parser_source'));
|
||||
assert.ok(videoColumns.has('parser_confidence'));
|
||||
assert.ok(videoColumns.has('parse_metadata_json'));
|
||||
|
||||
const animeRows = db
|
||||
.prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
|
||||
.all() as Array<{ canonical_title: string }>;
|
||||
assert.deepEqual(
|
||||
animeRows.map((row) => row.canonical_title),
|
||||
['Frieren', 'Little Witch Academia'],
|
||||
);
|
||||
|
||||
const littleWitchRows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
a.canonical_title AS anime_title,
|
||||
v.parsed_title,
|
||||
v.parsed_basename,
|
||||
v.parsed_season,
|
||||
v.parsed_episode,
|
||||
v.parser_source,
|
||||
v.parser_confidence
|
||||
FROM imm_videos v
|
||||
JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
|
||||
ORDER BY v.video_key
|
||||
`,
|
||||
)
|
||||
.all() as Array<{
|
||||
anime_title: string;
|
||||
parsed_title: string | null;
|
||||
parsed_basename: string | null;
|
||||
parsed_season: number | null;
|
||||
parsed_episode: number | null;
|
||||
parser_source: string | null;
|
||||
parser_confidence: number | null;
|
||||
}>;
|
||||
|
||||
assert.equal(littleWitchRows.length, 2);
|
||||
assert.deepEqual(
|
||||
littleWitchRows.map((row) => ({
|
||||
animeTitle: row.anime_title,
|
||||
parsedTitle: row.parsed_title,
|
||||
parsedBasename: row.parsed_basename,
|
||||
parsedSeason: row.parsed_season,
|
||||
parsedEpisode: row.parsed_episode,
|
||||
parserSource: row.parser_source,
|
||||
})),
|
||||
[
|
||||
{
|
||||
animeTitle: 'Little Witch Academia',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
},
|
||||
{
|
||||
animeTitle: 'Little Witch Academia',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedBasename: 'Little Witch Academia S02E06.mkv',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 6,
|
||||
parserSource: 'fallback',
|
||||
},
|
||||
],
|
||||
);
|
||||
assert.ok(
|
||||
littleWitchRows.every(
|
||||
(row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
|
||||
),
|
||||
);
|
||||
|
||||
const frierenRow = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
a.canonical_title AS anime_title,
|
||||
v.parsed_title,
|
||||
v.parsed_episode,
|
||||
v.parser_source
|
||||
FROM imm_videos v
|
||||
JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.video_key = ?
|
||||
`,
|
||||
)
|
||||
.get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
|
||||
anime_title: string;
|
||||
parsed_title: string | null;
|
||||
parsed_episode: number | null;
|
||||
parser_source: string | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(frierenRow);
|
||||
assert.equal(frierenRow?.anime_title, 'Frieren');
|
||||
assert.equal(frierenRow?.parsed_title, 'Frieren');
|
||||
assert.equal(frierenRow?.parsed_episode, 3);
|
||||
assert.equal(frierenRow?.parser_source, 'fallback');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE imm_schema_version (
|
||||
schema_version INTEGER PRIMARY KEY,
|
||||
applied_at_ms INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
|
||||
|
||||
CREATE TABLE imm_videos(
|
||||
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_key TEXT NOT NULL UNIQUE,
|
||||
anime_id INTEGER,
|
||||
canonical_title TEXT NOT NULL,
|
||||
source_type INTEGER NOT NULL,
|
||||
source_path TEXT,
|
||||
source_url TEXT,
|
||||
parsed_basename TEXT,
|
||||
parsed_title TEXT,
|
||||
parsed_season INTEGER,
|
||||
parsed_episode INTEGER,
|
||||
parser_source TEXT,
|
||||
parser_confidence REAL,
|
||||
parse_metadata_json TEXT,
|
||||
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
|
||||
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
|
||||
codec_id INTEGER, container_id INTEGER,
|
||||
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
|
||||
bitrate_kbps INTEGER, audio_codec_id INTEGER,
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_sessions(
|
||||
session_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_uuid TEXT NOT NULL UNIQUE,
|
||||
video_id INTEGER NOT NULL,
|
||||
started_at_ms INTEGER NOT NULL,
|
||||
ended_at_ms INTEGER,
|
||||
status INTEGER NOT NULL,
|
||||
locale_id INTEGER,
|
||||
target_lang_id INTEGER,
|
||||
difficulty_tier INTEGER,
|
||||
subtitle_mode INTEGER,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_session_events(
|
||||
event_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id INTEGER NOT NULL,
|
||||
ts_ms INTEGER NOT NULL,
|
||||
event_type INTEGER NOT NULL,
|
||||
line_index INTEGER,
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
words_delta INTEGER NOT NULL DEFAULT 0,
|
||||
cards_delta INTEGER NOT NULL DEFAULT 0,
|
||||
payload_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_words(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
headword TEXT,
|
||||
word TEXT,
|
||||
reading TEXT,
|
||||
part_of_speech TEXT,
|
||||
pos1 TEXT,
|
||||
pos2 TEXT,
|
||||
pos3 TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(headword, word, reading)
|
||||
);
|
||||
CREATE TABLE imm_kanji(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
kanji TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(kanji)
|
||||
);
|
||||
CREATE TABLE imm_rollup_state(
|
||||
state_key TEXT PRIMARY KEY,
|
||||
state_value INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
|
||||
ensureSchema(db);
|
||||
|
||||
const tableNames = new Set(
|
||||
(
|
||||
db.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`).all() as
|
||||
Array<{ name: string }>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
|
||||
assert.ok(tableNames.has('imm_subtitle_lines'));
|
||||
assert.ok(tableNames.has('imm_word_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
|
||||
const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
|
||||
canonicalTitle: 'Episode 5',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
|
||||
canonicalTitle: 'Episode 6',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const provisionalAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"parsed"}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, firstVideoId, {
|
||||
animeId: provisionalAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.6,
|
||||
parseMetadataJson: '{"source":"parsed","episode":5}',
|
||||
});
|
||||
|
||||
const reusedAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: ' little witch academia ',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"parsed"}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, secondVideoId, {
|
||||
animeId: reusedAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E06.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 6,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.6,
|
||||
parseMetadataJson: '{"source":"parsed","episode":6}',
|
||||
});
|
||||
|
||||
assert.equal(reusedAnimeId, provisionalAnimeId);
|
||||
|
||||
const upgradedAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia TV',
|
||||
anilistId: 33_435,
|
||||
titleRomaji: 'Little Witch Academia',
|
||||
titleEnglish: 'Little Witch Academia',
|
||||
titleNative: 'リトルウィッチアカデミア',
|
||||
metadataJson: '{"source":"anilist"}',
|
||||
});
|
||||
|
||||
assert.equal(upgradedAnimeId, provisionalAnimeId);
|
||||
|
||||
const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
|
||||
anime_id: number;
|
||||
normalized_title_key: string;
|
||||
canonical_title: string;
|
||||
anilist_id: number | null;
|
||||
title_romaji: string | null;
|
||||
title_english: string | null;
|
||||
title_native: string | null;
|
||||
metadata_json: string | null;
|
||||
}>;
|
||||
assert.equal(animeRows.length, 1);
|
||||
assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
|
||||
assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
|
||||
assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
|
||||
assert.equal(animeRows[0]?.anilist_id, 33_435);
|
||||
assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
|
||||
assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
|
||||
assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
|
||||
assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
|
||||
|
||||
const linkedVideos = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT anime_id, parsed_title, parsed_season, parsed_episode
|
||||
FROM imm_videos
|
||||
WHERE video_id IN (?, ?)
|
||||
ORDER BY video_id
|
||||
`,
|
||||
)
|
||||
.all(firstVideoId, secondVideoId) as Array<{
|
||||
anime_id: number | null;
|
||||
parsed_title: string | null;
|
||||
parsed_season: number | null;
|
||||
parsed_episode: number | null;
|
||||
}>;
|
||||
|
||||
assert.deepEqual(linkedVideos, [
|
||||
{
|
||||
anime_id: provisionalAnimeId,
|
||||
parsed_title: 'Little Witch Academia',
|
||||
parsed_season: 2,
|
||||
parsed_episode: 5,
|
||||
},
|
||||
{
|
||||
anime_id: provisionalAnimeId,
|
||||
parsed_title: 'Little Witch Academia',
|
||||
parsed_season: 2,
|
||||
parsed_episode: 6,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('start/finalize session updates ended_at and status', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
@@ -191,18 +678,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
|
||||
|
||||
const wordRow = db
|
||||
.prepare(
|
||||
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
|
||||
`SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
|
||||
FROM imm_words WHERE headword = ?`,
|
||||
)
|
||||
.get('猫') as {
|
||||
headword: string;
|
||||
frequency: number;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
} | null;
|
||||
@@ -218,6 +709,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
assert.ok(wordRow);
|
||||
assert.ok(kanjiRow);
|
||||
assert.equal(wordRow?.frequency, 2);
|
||||
assert.equal(wordRow?.part_of_speech, 'noun');
|
||||
assert.equal(wordRow?.pos1, '名詞');
|
||||
assert.equal(wordRow?.pos2, '一般');
|
||||
assert.equal(kanjiRow?.frequency, 2);
|
||||
assert.equal(wordRow?.first_seen, 5);
|
||||
assert.equal(wordRow?.last_seen, 15);
|
||||
@@ -228,3 +722,34 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
stmts.wordUpsertStmt.run('知っている', '知っている', 'しっている', 'other', '動詞', '自立', '', 10, 10);
|
||||
stmts.wordUpsertStmt.run('知っている', '知っている', 'しっている', 'verb', '動詞', '自立', '', 11, 12);
|
||||
|
||||
const row = db
|
||||
.prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
|
||||
.get('知っている') as {
|
||||
frequency: number;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
} | null;
|
||||
|
||||
assert.ok(row);
|
||||
assert.equal(row?.frequency, 2);
|
||||
assert.equal(row?.part_of_speech, 'verb');
|
||||
assert.equal(row?.pos1, '動詞');
|
||||
assert.equal(row?.pos2, '自立');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { parseMediaInfo } from '../../../jimaku/utils';
|
||||
import type { DatabaseSync } from './sqlite';
|
||||
import { SCHEMA_VERSION } from './types';
|
||||
import type { QueuedWrite, VideoMetadata } from './types';
|
||||
@@ -7,6 +8,33 @@ export interface TrackerPreparedStatements {
|
||||
eventInsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
wordUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
kanjiUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
subtitleLineInsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
wordIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
kanjiIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
wordLineOccurrenceUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
kanjiLineOccurrenceUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
videoAnimeIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
}
|
||||
|
||||
export interface AnimeRecordInput {
|
||||
parsedTitle: string;
|
||||
canonicalTitle: string;
|
||||
anilistId: number | null;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
titleNative: string | null;
|
||||
metadataJson: string | null;
|
||||
}
|
||||
|
||||
export interface VideoAnimeLinkInput {
|
||||
animeId: number | null;
|
||||
parsedBasename: string | null;
|
||||
parsedTitle: string | null;
|
||||
parsedSeason: number | null;
|
||||
parsedEpisode: number | null;
|
||||
parserSource: string | null;
|
||||
parserConfidence: number | null;
|
||||
parseMetadataJson: string | null;
|
||||
}
|
||||
|
||||
function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boolean {
|
||||
@@ -16,9 +44,14 @@ function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boo
|
||||
.some((row: unknown) => (row as { name: string }).name === columnName);
|
||||
}
|
||||
|
||||
function addColumnIfMissing(db: DatabaseSync, tableName: string, columnName: string): void {
|
||||
function addColumnIfMissing(
|
||||
db: DatabaseSync,
|
||||
tableName: string,
|
||||
columnName: string,
|
||||
columnType = 'INTEGER',
|
||||
): void {
|
||||
if (!hasColumn(db, tableName, columnName)) {
|
||||
db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER`);
|
||||
db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} ${columnType}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +68,247 @@ export function applyPragmas(db: DatabaseSync): void {
|
||||
db.exec('PRAGMA busy_timeout = 2500');
|
||||
}
|
||||
|
||||
export function normalizeAnimeIdentityKey(title: string): string {
|
||||
return title
|
||||
.normalize('NFKC')
|
||||
.toLowerCase()
|
||||
.replace(/[^\p{L}\p{N}]+/gu, ' ')
|
||||
.trim()
|
||||
.replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function looksLikeEpisodeOnlyTitle(title: string): boolean {
|
||||
const normalized = title
|
||||
.normalize('NFKC')
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
return /^(episode|ep)\s*\d{1,3}$/.test(normalized) || /^第\s*\d{1,3}\s*話$/.test(normalized);
|
||||
}
|
||||
|
||||
function parserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
|
||||
switch (confidence) {
|
||||
case 'high':
|
||||
return 1;
|
||||
case 'medium':
|
||||
return 0.6;
|
||||
default:
|
||||
return 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
function parseLegacyAnimeBackfillCandidate(
|
||||
sourcePath: string | null,
|
||||
canonicalTitle: string,
|
||||
): {
|
||||
basename: string | null;
|
||||
title: string;
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
source: 'fallback';
|
||||
confidenceScore: number;
|
||||
metadataJson: string;
|
||||
} | null {
|
||||
const fromPath =
|
||||
sourcePath && sourcePath.trim().length > 0 ? parseMediaInfo(sourcePath.trim()) : null;
|
||||
if (fromPath?.title && !looksLikeEpisodeOnlyTitle(fromPath.title)) {
|
||||
return {
|
||||
basename: fromPath.filename || null,
|
||||
title: fromPath.title,
|
||||
season: fromPath.season,
|
||||
episode: fromPath.episode,
|
||||
source: 'fallback',
|
||||
confidenceScore: parserConfidenceToScore(fromPath.confidence),
|
||||
metadataJson: JSON.stringify({
|
||||
confidence: fromPath.confidence,
|
||||
filename: fromPath.filename,
|
||||
rawTitle: fromPath.rawTitle,
|
||||
migrationSource: 'source_path',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
const fallbackTitle = canonicalTitle.trim();
|
||||
if (!fallbackTitle) return null;
|
||||
const fromTitle = parseMediaInfo(fallbackTitle);
|
||||
if (!fromTitle.title || looksLikeEpisodeOnlyTitle(fromTitle.title)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
basename: null,
|
||||
title: fromTitle.title,
|
||||
season: fromTitle.season,
|
||||
episode: fromTitle.episode,
|
||||
source: 'fallback',
|
||||
confidenceScore: parserConfidenceToScore(fromTitle.confidence),
|
||||
metadataJson: JSON.stringify({
|
||||
confidence: fromTitle.confidence,
|
||||
filename: fromTitle.filename,
|
||||
rawTitle: fromTitle.rawTitle,
|
||||
migrationSource: 'canonical_title',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
export function getOrCreateAnimeRecord(db: DatabaseSync, input: AnimeRecordInput): number {
|
||||
const normalizedTitleKey = normalizeAnimeIdentityKey(input.parsedTitle);
|
||||
if (!normalizedTitleKey) {
|
||||
throw new Error('parsedTitle is required to create or update an anime record');
|
||||
}
|
||||
|
||||
const byAnilistId =
|
||||
input.anilistId !== null
|
||||
? (db.prepare('SELECT anime_id FROM imm_anime WHERE anilist_id = ?').get(input.anilistId) as {
|
||||
anime_id: number;
|
||||
} | null)
|
||||
: null;
|
||||
const byNormalizedTitle = db
|
||||
.prepare('SELECT anime_id FROM imm_anime WHERE normalized_title_key = ?')
|
||||
.get(normalizedTitleKey) as { anime_id: number } | null;
|
||||
const existing = byAnilistId ?? byNormalizedTitle;
|
||||
if (existing?.anime_id) {
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_anime
|
||||
SET
|
||||
canonical_title = COALESCE(NULLIF(?, ''), canonical_title),
|
||||
anilist_id = COALESCE(?, anilist_id),
|
||||
title_romaji = COALESCE(?, title_romaji),
|
||||
title_english = COALESCE(?, title_english),
|
||||
title_native = COALESCE(?, title_native),
|
||||
metadata_json = COALESCE(?, metadata_json),
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE anime_id = ?
|
||||
`,
|
||||
).run(
|
||||
input.canonicalTitle,
|
||||
input.anilistId,
|
||||
input.titleRomaji,
|
||||
input.titleEnglish,
|
||||
input.titleNative,
|
||||
input.metadataJson,
|
||||
Date.now(),
|
||||
existing.anime_id,
|
||||
);
|
||||
return existing.anime_id;
|
||||
}
|
||||
|
||||
const nowMs = Date.now();
|
||||
const result = db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO imm_anime(
|
||||
normalized_title_key,
|
||||
canonical_title,
|
||||
anilist_id,
|
||||
title_romaji,
|
||||
title_english,
|
||||
title_native,
|
||||
metadata_json,
|
||||
CREATED_DATE,
|
||||
LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`,
|
||||
)
|
||||
.run(
|
||||
normalizedTitleKey,
|
||||
input.canonicalTitle,
|
||||
input.anilistId,
|
||||
input.titleRomaji,
|
||||
input.titleEnglish,
|
||||
input.titleNative,
|
||||
input.metadataJson,
|
||||
nowMs,
|
||||
nowMs,
|
||||
);
|
||||
return Number(result.lastInsertRowid);
|
||||
}
|
||||
|
||||
export function linkVideoToAnimeRecord(
|
||||
db: DatabaseSync,
|
||||
videoId: number,
|
||||
input: VideoAnimeLinkInput,
|
||||
): void {
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_videos
|
||||
SET
|
||||
anime_id = ?,
|
||||
parsed_basename = ?,
|
||||
parsed_title = ?,
|
||||
parsed_season = ?,
|
||||
parsed_episode = ?,
|
||||
parser_source = ?,
|
||||
parser_confidence = ?,
|
||||
parse_metadata_json = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE video_id = ?
|
||||
`,
|
||||
).run(
|
||||
input.animeId,
|
||||
input.parsedBasename,
|
||||
input.parsedTitle,
|
||||
input.parsedSeason,
|
||||
input.parsedEpisode,
|
||||
input.parserSource,
|
||||
input.parserConfidence,
|
||||
input.parseMetadataJson,
|
||||
Date.now(),
|
||||
videoId,
|
||||
);
|
||||
}
|
||||
|
||||
function migrateLegacyAnimeMetadata(db: DatabaseSync): void {
|
||||
addColumnIfMissing(db, 'imm_videos', 'anime_id', 'INTEGER REFERENCES imm_anime(anime_id)');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parsed_basename', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parsed_title', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parsed_season', 'INTEGER');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parsed_episode', 'INTEGER');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parser_source', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parser_confidence', 'REAL');
|
||||
addColumnIfMissing(db, 'imm_videos', 'parse_metadata_json', 'TEXT');
|
||||
|
||||
const legacyRows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT video_id, source_path, canonical_title
|
||||
FROM imm_videos
|
||||
WHERE anime_id IS NULL
|
||||
`,
|
||||
)
|
||||
.all() as Array<{
|
||||
video_id: number;
|
||||
source_path: string | null;
|
||||
canonical_title: string;
|
||||
}>;
|
||||
|
||||
for (const row of legacyRows) {
|
||||
const parsed = parseLegacyAnimeBackfillCandidate(row.source_path, row.canonical_title);
|
||||
if (!parsed) continue;
|
||||
|
||||
const animeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: parsed.title,
|
||||
canonicalTitle: parsed.title,
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: parsed.metadataJson,
|
||||
});
|
||||
linkVideoToAnimeRecord(db, row.video_id, {
|
||||
animeId,
|
||||
parsedBasename: parsed.basename,
|
||||
parsedTitle: parsed.title,
|
||||
parsedSeason: parsed.season,
|
||||
parsedEpisode: parsed.episode,
|
||||
parserSource: parsed.source,
|
||||
parserConfidence: parsed.confidenceScore,
|
||||
parseMetadataJson: parsed.metadataJson,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function ensureSchema(db: DatabaseSync): void {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_schema_version (
|
||||
@@ -61,14 +335,38 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
return;
|
||||
}
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_anime(
|
||||
anime_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
normalized_title_key TEXT NOT NULL UNIQUE,
|
||||
canonical_title TEXT NOT NULL,
|
||||
anilist_id INTEGER UNIQUE,
|
||||
title_romaji TEXT,
|
||||
title_english TEXT,
|
||||
title_native TEXT,
|
||||
episodes_total INTEGER,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_videos(
|
||||
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_key TEXT NOT NULL UNIQUE,
|
||||
anime_id INTEGER,
|
||||
canonical_title TEXT NOT NULL,
|
||||
source_type INTEGER NOT NULL,
|
||||
source_path TEXT,
|
||||
source_url TEXT,
|
||||
parsed_basename TEXT,
|
||||
parsed_title TEXT,
|
||||
parsed_season INTEGER,
|
||||
parsed_episode INTEGER,
|
||||
parser_source TEXT,
|
||||
parser_confidence REAL,
|
||||
parse_metadata_json TEXT,
|
||||
watched INTEGER NOT NULL DEFAULT 0,
|
||||
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
|
||||
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
|
||||
codec_id INTEGER, container_id INTEGER,
|
||||
@@ -77,7 +375,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
@@ -173,6 +472,10 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
headword TEXT,
|
||||
word TEXT,
|
||||
reading TEXT,
|
||||
part_of_speech TEXT,
|
||||
pos1 TEXT,
|
||||
pos2 TEXT,
|
||||
pos3 TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
@@ -189,42 +492,59 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
UNIQUE(kanji)
|
||||
);
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_video_started
|
||||
ON imm_sessions(video_id, started_at_ms DESC)
|
||||
CREATE TABLE IF NOT EXISTS imm_subtitle_lines(
|
||||
line_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id INTEGER NOT NULL,
|
||||
event_id INTEGER,
|
||||
video_id INTEGER NOT NULL,
|
||||
anime_id INTEGER,
|
||||
line_index INTEGER NOT NULL,
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
text TEXT NOT NULL,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(event_id) REFERENCES imm_session_events(event_id) ON DELETE SET NULL,
|
||||
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_status_started
|
||||
ON imm_sessions(status, started_at_ms DESC)
|
||||
CREATE TABLE IF NOT EXISTS imm_word_line_occurrences(
|
||||
line_id INTEGER NOT NULL,
|
||||
word_id INTEGER NOT NULL,
|
||||
occurrence_count INTEGER NOT NULL,
|
||||
PRIMARY KEY(line_id, word_id),
|
||||
FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(word_id) REFERENCES imm_words(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_telemetry_session_sample
|
||||
ON imm_session_telemetry(session_id, sample_ms DESC)
|
||||
CREATE TABLE IF NOT EXISTS imm_kanji_line_occurrences(
|
||||
line_id INTEGER NOT NULL,
|
||||
kanji_id INTEGER NOT NULL,
|
||||
occurrence_count INTEGER NOT NULL,
|
||||
PRIMARY KEY(line_id, kanji_id),
|
||||
FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(kanji_id) REFERENCES imm_kanji(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_events_session_ts
|
||||
ON imm_session_events(session_id, ts_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_events_type_ts
|
||||
ON imm_session_events(event_type, ts_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_rollups_day_video
|
||||
ON imm_daily_rollups(rollup_day, video_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_rollups_month_video
|
||||
ON imm_monthly_rollups(rollup_month, video_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_words_headword_word_reading
|
||||
ON imm_words(headword, word, reading)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_kanji_kanji
|
||||
ON imm_kanji(kanji)
|
||||
CREATE TABLE IF NOT EXISTS imm_media_art(
|
||||
video_id INTEGER PRIMARY KEY,
|
||||
anilist_id INTEGER,
|
||||
cover_url TEXT,
|
||||
cover_blob BLOB,
|
||||
title_romaji TEXT,
|
||||
title_english TEXT,
|
||||
episodes_total INTEGER,
|
||||
fetched_at_ms INTEGER NOT NULL,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
|
||||
if (currentVersion?.schema_version === 1) {
|
||||
@@ -299,6 +619,134 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
dropColumnIfExists(db, 'imm_sessions', 'updated_at_ms');
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < 5) {
|
||||
migrateLegacyAnimeMetadata(db);
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < 6) {
|
||||
addColumnIfMissing(db, 'imm_words', 'part_of_speech', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_words', 'pos1', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_words', 'pos2', 'TEXT');
|
||||
addColumnIfMissing(db, 'imm_words', 'pos3', 'TEXT');
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < 7) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_subtitle_lines(
|
||||
line_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id INTEGER NOT NULL,
|
||||
event_id INTEGER,
|
||||
video_id INTEGER NOT NULL,
|
||||
anime_id INTEGER,
|
||||
line_index INTEGER NOT NULL,
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
text TEXT NOT NULL,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(event_id) REFERENCES imm_session_events(event_id) ON DELETE SET NULL,
|
||||
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_word_line_occurrences(
|
||||
line_id INTEGER NOT NULL,
|
||||
word_id INTEGER NOT NULL,
|
||||
occurrence_count INTEGER NOT NULL,
|
||||
PRIMARY KEY(line_id, word_id),
|
||||
FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(word_id) REFERENCES imm_words(id) ON DELETE CASCADE
|
||||
)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_kanji_line_occurrences(
|
||||
line_id INTEGER NOT NULL,
|
||||
kanji_id INTEGER NOT NULL,
|
||||
occurrence_count INTEGER NOT NULL,
|
||||
PRIMARY KEY(line_id, kanji_id),
|
||||
FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(kanji_id) REFERENCES imm_kanji(id) ON DELETE CASCADE
|
||||
)
|
||||
`);
|
||||
}
|
||||
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_anime_normalized_title
|
||||
ON imm_anime(normalized_title_key)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_anime_anilist_id
|
||||
ON imm_anime(anilist_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_videos_anime_id
|
||||
ON imm_videos(anime_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_video_started
|
||||
ON imm_sessions(video_id, started_at_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_status_started
|
||||
ON imm_sessions(status, started_at_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_telemetry_session_sample
|
||||
ON imm_session_telemetry(session_id, sample_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_events_session_ts
|
||||
ON imm_session_events(session_id, ts_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_events_type_ts
|
||||
ON imm_session_events(event_type, ts_ms DESC)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_rollups_day_video
|
||||
ON imm_daily_rollups(rollup_day, video_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_rollups_month_video
|
||||
ON imm_monthly_rollups(rollup_month, video_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_words_headword_word_reading
|
||||
ON imm_words(headword, word, reading)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_kanji_kanji
|
||||
ON imm_kanji(kanji)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_subtitle_lines_session_line
|
||||
ON imm_subtitle_lines(session_id, line_index)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_subtitle_lines_video_line
|
||||
ON imm_subtitle_lines(video_id, line_index)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_subtitle_lines_anime_line
|
||||
ON imm_subtitle_lines(anime_id, line_index)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_word_line_occurrences_word
|
||||
ON imm_word_line_occurrences(word_id, line_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_kanji_line_occurrences_kanji
|
||||
ON imm_kanji_line_occurrences(kanji_id, line_id)
|
||||
`);
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < SCHEMA_VERSION) {
|
||||
db.exec('DELETE FROM imm_daily_rollups');
|
||||
db.exec('DELETE FROM imm_monthly_rollups');
|
||||
db.exec(`UPDATE imm_rollup_state SET state_value = 0 WHERE state_key = 'last_rollup_sample_ms'`);
|
||||
}
|
||||
|
||||
db.exec(`
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms)
|
||||
VALUES (${SCHEMA_VERSION}, ${Date.now()})
|
||||
@@ -328,12 +776,21 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
|
||||
`),
|
||||
wordUpsertStmt: db.prepare(`
|
||||
INSERT INTO imm_words (
|
||||
headword, word, reading, first_seen, last_seen, frequency
|
||||
headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, 1
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, 1
|
||||
)
|
||||
ON CONFLICT(headword, word, reading) DO UPDATE SET
|
||||
frequency = COALESCE(frequency, 0) + 1,
|
||||
part_of_speech = CASE
|
||||
WHEN COALESCE(NULLIF(imm_words.part_of_speech, ''), 'other') = 'other'
|
||||
AND COALESCE(NULLIF(excluded.part_of_speech, ''), '') <> ''
|
||||
THEN excluded.part_of_speech
|
||||
ELSE imm_words.part_of_speech
|
||||
END,
|
||||
pos1 = COALESCE(NULLIF(imm_words.pos1, ''), excluded.pos1),
|
||||
pos2 = COALESCE(NULLIF(imm_words.pos2, ''), excluded.pos2),
|
||||
pos3 = COALESCE(NULLIF(imm_words.pos3, ''), excluded.pos3),
|
||||
first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
|
||||
last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
|
||||
`),
|
||||
@@ -348,9 +805,93 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
|
||||
first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
|
||||
last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
|
||||
`),
|
||||
subtitleLineInsertStmt: db.prepare(`
|
||||
INSERT INTO imm_subtitle_lines (
|
||||
session_id, event_id, video_id, anime_id, line_index, segment_start_ms,
|
||||
segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
`),
|
||||
wordIdSelectStmt: db.prepare(`
|
||||
SELECT id FROM imm_words
|
||||
WHERE headword = ? AND word = ? AND reading = ?
|
||||
`),
|
||||
kanjiIdSelectStmt: db.prepare(`
|
||||
SELECT id FROM imm_kanji
|
||||
WHERE kanji = ?
|
||||
`),
|
||||
wordLineOccurrenceUpsertStmt: db.prepare(`
|
||||
INSERT INTO imm_word_line_occurrences (
|
||||
line_id, word_id, occurrence_count
|
||||
) VALUES (
|
||||
?, ?, ?
|
||||
)
|
||||
ON CONFLICT(line_id, word_id) DO UPDATE SET
|
||||
occurrence_count = imm_word_line_occurrences.occurrence_count + excluded.occurrence_count
|
||||
`),
|
||||
kanjiLineOccurrenceUpsertStmt: db.prepare(`
|
||||
INSERT INTO imm_kanji_line_occurrences (
|
||||
line_id, kanji_id, occurrence_count
|
||||
) VALUES (
|
||||
?, ?, ?
|
||||
)
|
||||
ON CONFLICT(line_id, kanji_id) DO UPDATE SET
|
||||
occurrence_count = imm_kanji_line_occurrences.occurrence_count + excluded.occurrence_count
|
||||
`),
|
||||
videoAnimeIdSelectStmt: db.prepare(`
|
||||
SELECT anime_id FROM imm_videos
|
||||
WHERE video_id = ?
|
||||
`),
|
||||
};
|
||||
}
|
||||
|
||||
function incrementWordAggregate(
|
||||
stmts: TrackerPreparedStatements,
|
||||
occurrence: Extract<QueuedWrite, { kind: 'subtitleLine' }>['wordOccurrences'][number],
|
||||
firstSeen: number,
|
||||
lastSeen: number,
|
||||
): number {
|
||||
for (let i = 0; i < occurrence.occurrenceCount; i += 1) {
|
||||
stmts.wordUpsertStmt.run(
|
||||
occurrence.headword,
|
||||
occurrence.word,
|
||||
occurrence.reading,
|
||||
occurrence.partOfSpeech,
|
||||
occurrence.pos1,
|
||||
occurrence.pos2,
|
||||
occurrence.pos3,
|
||||
firstSeen,
|
||||
lastSeen,
|
||||
);
|
||||
}
|
||||
const row = stmts.wordIdSelectStmt.get(
|
||||
occurrence.headword,
|
||||
occurrence.word,
|
||||
occurrence.reading,
|
||||
) as { id: number } | null;
|
||||
if (!row?.id) {
|
||||
throw new Error(`Failed to resolve imm_words id for ${occurrence.headword}`);
|
||||
}
|
||||
return row.id;
|
||||
}
|
||||
|
||||
function incrementKanjiAggregate(
|
||||
stmts: TrackerPreparedStatements,
|
||||
occurrence: Extract<QueuedWrite, { kind: 'subtitleLine' }>['kanjiOccurrences'][number],
|
||||
firstSeen: number,
|
||||
lastSeen: number,
|
||||
): number {
|
||||
for (let i = 0; i < occurrence.occurrenceCount; i += 1) {
|
||||
stmts.kanjiUpsertStmt.run(occurrence.kanji, firstSeen, lastSeen);
|
||||
}
|
||||
const row = stmts.kanjiIdSelectStmt.get(occurrence.kanji) as { id: number } | null;
|
||||
if (!row?.id) {
|
||||
throw new Error(`Failed to resolve imm_kanji id for ${occurrence.kanji}`);
|
||||
}
|
||||
return row.id;
|
||||
}
|
||||
|
||||
export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedStatements): void {
|
||||
if (write.kind === 'telemetry') {
|
||||
stmts.telemetryInsertStmt.run(
|
||||
@@ -379,6 +920,10 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
|
||||
write.headword,
|
||||
write.word,
|
||||
write.reading,
|
||||
write.partOfSpeech,
|
||||
write.pos1,
|
||||
write.pos2,
|
||||
write.pos3,
|
||||
write.firstSeen,
|
||||
write.lastSeen,
|
||||
);
|
||||
@@ -388,6 +933,31 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
|
||||
stmts.kanjiUpsertStmt.run(write.kanji, write.firstSeen, write.lastSeen);
|
||||
return;
|
||||
}
|
||||
if (write.kind === 'subtitleLine') {
|
||||
const animeRow = stmts.videoAnimeIdSelectStmt.get(write.videoId) as { anime_id: number | null } | null;
|
||||
const lineResult = stmts.subtitleLineInsertStmt.run(
|
||||
write.sessionId,
|
||||
null,
|
||||
write.videoId,
|
||||
animeRow?.anime_id ?? null,
|
||||
write.lineIndex,
|
||||
write.segmentStartMs ?? null,
|
||||
write.segmentEndMs ?? null,
|
||||
write.text,
|
||||
Date.now(),
|
||||
Date.now(),
|
||||
);
|
||||
const lineId = Number(lineResult.lastInsertRowid);
|
||||
for (const occurrence of write.wordOccurrences) {
|
||||
const wordId = incrementWordAggregate(stmts, occurrence, write.firstSeen, write.lastSeen);
|
||||
stmts.wordLineOccurrenceUpsertStmt.run(lineId, wordId, occurrence.occurrenceCount);
|
||||
}
|
||||
for (const occurrence of write.kanjiOccurrences) {
|
||||
const kanjiId = incrementKanjiAggregate(stmts, occurrence, write.firstSeen, write.lastSeen);
|
||||
stmts.kanjiLineOccurrenceUpsertStmt.run(lineId, kanjiId, occurrence.occurrenceCount);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
stmts.eventInsertStmt.run(
|
||||
write.sessionId,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export const SCHEMA_VERSION = 3;
|
||||
export const SCHEMA_VERSION = 7;
|
||||
export const DEFAULT_QUEUE_CAP = 1_000;
|
||||
export const DEFAULT_BATCH_SIZE = 25;
|
||||
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
|
||||
@@ -29,6 +29,9 @@ export const EVENT_PAUSE_END = 8;
|
||||
export interface ImmersionTrackerOptions {
|
||||
dbPath: string;
|
||||
policy?: ImmersionTrackerPolicy;
|
||||
resolveLegacyVocabularyPos?: (
|
||||
row: LegacyVocabularyPosRow,
|
||||
) => Promise<LegacyVocabularyPosResolution | null>;
|
||||
}
|
||||
|
||||
export interface ImmersionTrackerPolicy {
|
||||
@@ -72,6 +75,7 @@ export interface SessionState extends TelemetryAccumulator {
|
||||
lastPauseStartMs: number | null;
|
||||
isPaused: boolean;
|
||||
pendingTelemetry: boolean;
|
||||
markedWatched: boolean;
|
||||
}
|
||||
|
||||
interface QueuedTelemetryWrite {
|
||||
@@ -118,6 +122,10 @@ interface QueuedWordWrite {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
@@ -129,11 +137,42 @@ interface QueuedKanjiWrite {
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface CountedWordOccurrence {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface CountedKanjiOccurrence {
|
||||
kanji: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
interface QueuedSubtitleLineWrite {
|
||||
kind: 'subtitleLine';
|
||||
sessionId: number;
|
||||
videoId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
wordOccurrences: CountedWordOccurrence[];
|
||||
kanjiOccurrences: CountedKanjiOccurrence[];
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export type QueuedWrite =
|
||||
| QueuedTelemetryWrite
|
||||
| QueuedEventWrite
|
||||
| QueuedWordWrite
|
||||
| QueuedKanjiWrite;
|
||||
| QueuedKanjiWrite
|
||||
| QueuedSubtitleLineWrite;
|
||||
|
||||
export interface VideoMetadata {
|
||||
sourceType: number;
|
||||
@@ -152,8 +191,33 @@ export interface VideoMetadata {
|
||||
metadataJson: string | null;
|
||||
}
|
||||
|
||||
export interface ParsedAnimeVideoMetadata {
|
||||
animeId: number | null;
|
||||
parsedBasename: string | null;
|
||||
parsedTitle: string | null;
|
||||
parsedSeason: number | null;
|
||||
parsedEpisode: number | null;
|
||||
parserSource: string | null;
|
||||
parserConfidence: number | null;
|
||||
parseMetadataJson: string | null;
|
||||
}
|
||||
|
||||
export interface ParsedAnimeVideoGuess {
|
||||
parsedBasename: string | null;
|
||||
parsedTitle: string;
|
||||
parsedSeason: number | null;
|
||||
parsedEpisode: number | null;
|
||||
parserSource: 'guessit' | 'fallback';
|
||||
parserConfidence: number;
|
||||
parseMetadataJson: string;
|
||||
}
|
||||
|
||||
export interface SessionSummaryQueryRow {
|
||||
sessionId: number;
|
||||
videoId: number | null;
|
||||
canonicalTitle: string | null;
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
startedAtMs: number;
|
||||
endedAtMs: number | null;
|
||||
totalWatchedMs: number;
|
||||
@@ -166,6 +230,82 @@ export interface SessionSummaryQueryRow {
|
||||
lookupHits: number;
|
||||
}
|
||||
|
||||
export interface VocabularyStatsRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
pos1: string | null;
|
||||
pos2: string | null;
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface VocabularyCleanupSummary {
|
||||
scanned: number;
|
||||
kept: number;
|
||||
deleted: number;
|
||||
repaired: number;
|
||||
}
|
||||
|
||||
export interface LegacyVocabularyPosRow {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
export interface LegacyVocabularyPosResolution {
|
||||
headword: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
}
|
||||
|
||||
export interface KanjiStatsRow {
|
||||
kanjiId: number;
|
||||
kanji: string;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface WordOccurrenceRow {
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface KanjiOccurrenceRow {
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface SessionEventRow {
|
||||
eventType: number;
|
||||
tsMs: number;
|
||||
payload: string | null;
|
||||
}
|
||||
|
||||
export interface SessionTimelineRow {
|
||||
sampleMs: number;
|
||||
totalWatchedMs: number;
|
||||
@@ -200,3 +340,180 @@ export interface ProbeMetadata {
|
||||
bitrateKbps: number | null;
|
||||
audioCodecId: number | null;
|
||||
}
|
||||
|
||||
export interface MediaArtRow {
|
||||
videoId: number;
|
||||
anilistId: number | null;
|
||||
coverUrl: string | null;
|
||||
coverBlob: Buffer | null;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
episodesTotal: number | null;
|
||||
fetchedAtMs: number;
|
||||
}
|
||||
|
||||
export interface MediaLibraryRow {
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalWordsSeen: number;
|
||||
lastWatchedMs: number;
|
||||
hasCoverArt: number;
|
||||
}
|
||||
|
||||
export interface MediaDetailRow {
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalWordsSeen: number;
|
||||
totalLinesSeen: number;
|
||||
totalLookupCount: number;
|
||||
totalLookupHits: number;
|
||||
}
|
||||
|
||||
export interface AnimeLibraryRow {
|
||||
animeId: number;
|
||||
canonicalTitle: string;
|
||||
anilistId: number | null;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalWordsSeen: number;
|
||||
episodeCount: number;
|
||||
episodesTotal: number | null;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface AnimeDetailRow {
|
||||
animeId: number;
|
||||
canonicalTitle: string;
|
||||
anilistId: number | null;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
titleNative: string | null;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalWordsSeen: number;
|
||||
totalLinesSeen: number;
|
||||
totalLookupCount: number;
|
||||
totalLookupHits: number;
|
||||
episodeCount: number;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface AnimeAnilistEntryRow {
|
||||
anilistId: number;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
season: number | null;
|
||||
}
|
||||
|
||||
export interface AnimeEpisodeRow {
|
||||
animeId: number;
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
parsedTitle: string | null;
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
durationMs: number;
|
||||
watched: number;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalWordsSeen: number;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface StreakCalendarRow {
|
||||
epochDay: number;
|
||||
totalActiveMin: number;
|
||||
}
|
||||
|
||||
export interface AnimeWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface EpisodesPerDayRow {
|
||||
epochDay: number;
|
||||
episodeCount: number;
|
||||
}
|
||||
|
||||
export interface NewAnimePerDayRow {
|
||||
epochDay: number;
|
||||
newAnimeCount: number;
|
||||
}
|
||||
|
||||
export interface WatchTimePerAnimeRow {
|
||||
epochDay: number;
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
totalActiveMin: number;
|
||||
}
|
||||
|
||||
export interface WordDetailRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
pos1: string | null;
|
||||
pos2: string | null;
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface WordAnimeAppearanceRow {
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface SimilarWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface KanjiDetailRow {
|
||||
kanjiId: number;
|
||||
kanji: string;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface KanjiAnimeAppearanceRow {
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface KanjiWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface EpisodeCardEventRow {
|
||||
eventId: number;
|
||||
sessionId: number;
|
||||
tsMs: number;
|
||||
cardsDelta: number;
|
||||
noteIds: number[];
|
||||
}
|
||||
|
||||
@@ -133,6 +133,17 @@ function isFrequencyExcludedByPos(
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldExcludeTokenFromVocabularyPersistence(
|
||||
token: MergedToken,
|
||||
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
|
||||
): boolean {
|
||||
return isFrequencyExcludedByPos(
|
||||
token,
|
||||
resolvePos1Exclusions(options),
|
||||
resolvePos2Exclusions(options),
|
||||
);
|
||||
}
|
||||
|
||||
function applyFrequencyMarking(
|
||||
tokens: MergedToken[],
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
|
||||
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { PartOfSpeech } from '../../../types';
|
||||
|
||||
function normalizePosTag(value: string | null | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
|
||||
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
|
||||
}
|
||||
|
||||
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
|
||||
switch (normalizePosTag(pos1)) {
|
||||
case '名詞':
|
||||
return PartOfSpeech.noun;
|
||||
case '動詞':
|
||||
return PartOfSpeech.verb;
|
||||
case '形容詞':
|
||||
return PartOfSpeech.i_adjective;
|
||||
case '形状詞':
|
||||
case '形容動詞':
|
||||
return PartOfSpeech.na_adjective;
|
||||
case '助詞':
|
||||
return PartOfSpeech.particle;
|
||||
case '助動詞':
|
||||
return PartOfSpeech.bound_auxiliary;
|
||||
case '記号':
|
||||
case '補助記号':
|
||||
return PartOfSpeech.symbol;
|
||||
default:
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
}
|
||||
|
||||
export function deriveStoredPartOfSpeech(input: {
|
||||
partOfSpeech?: string | null;
|
||||
pos1?: string | null;
|
||||
}): PartOfSpeech {
|
||||
const pos1Parts = normalizePosTag(input.pos1)
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
|
||||
if (pos1Parts.length > 0) {
|
||||
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
|
||||
if (derivedParts.length === 1) {
|
||||
return derivedParts[0]!;
|
||||
}
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
|
||||
if (isPartOfSpeechValue(input.partOfSpeech)) {
|
||||
return input.partOfSpeech;
|
||||
}
|
||||
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
@@ -19,34 +19,12 @@
|
||||
import * as childProcess from 'child_process';
|
||||
import { PartOfSpeech, Token, MecabStatus } from './types';
|
||||
import { createLogger } from './logger';
|
||||
import { mapMecabPos1ToPartOfSpeech } from './core/services/tokenizer/part-of-speech';
|
||||
|
||||
export { PartOfSpeech };
|
||||
|
||||
const log = createLogger('mecab');
|
||||
|
||||
function mapPartOfSpeech(pos1: string): PartOfSpeech {
|
||||
switch (pos1) {
|
||||
case '名詞':
|
||||
return PartOfSpeech.noun;
|
||||
case '動詞':
|
||||
return PartOfSpeech.verb;
|
||||
case '形容詞':
|
||||
return PartOfSpeech.i_adjective;
|
||||
case '形状詞':
|
||||
case '形容動詞':
|
||||
return PartOfSpeech.na_adjective;
|
||||
case '助詞':
|
||||
return PartOfSpeech.particle;
|
||||
case '助動詞':
|
||||
return PartOfSpeech.bound_auxiliary;
|
||||
case '記号':
|
||||
case '補助記号':
|
||||
return PartOfSpeech.symbol;
|
||||
default:
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseMecabLine(line: string): Token | null {
|
||||
if (!line || line === 'EOS' || line.trim() === '') {
|
||||
return null;
|
||||
@@ -73,7 +51,7 @@ export function parseMecabLine(line: string): Token | null {
|
||||
|
||||
return {
|
||||
word: surface,
|
||||
partOfSpeech: mapPartOfSpeech(pos1),
|
||||
partOfSpeech: mapMecabPos1ToPartOfSpeech(pos1),
|
||||
pos1,
|
||||
pos2,
|
||||
pos3,
|
||||
@@ -446,4 +424,4 @@ export class MecabTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
export { mapPartOfSpeech };
|
||||
export { mapMecabPos1ToPartOfSpeech as mapPartOfSpeech };
|
||||
|
||||
Reference in New Issue
Block a user