feat(immersion): add anime metadata, occurrence tracking, and schema upgrades

- Add imm_anime table with AniList integration - Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences - Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words - Add anime metadata parsing with guessit fallback - Add video duration tracking and watched status - Add episode, streak, trend, and word/kanji detail queries - Deduplicate subtitle line recording within sessions - Pass Anki note IDs through card mining callback chain
2026-05-29 12:55:16 -07:00 · 2026-03-14 22:13:42 -07:00
parent ee95e86ad5
commit f005f542a3
19 changed files with 5231 additions and 122 deletions
@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
  });
  assert.deepEqual(result, {
    title: 'Guessit Title',
+    season: null,
    episode: 7,
    source: 'guessit',
  });
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
  });
  assert.deepEqual(result, {
    title: 'My Anime',
+    season: 1,
    episode: 3,
    source: 'fallback',
  });
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
  ]);
  assert.deepEqual(result, {
    title: 'Rascal Does Not Dream of Bunny Girl Senpai',
+    season: null,
    episode: 1,
    source: 'guessit',
  });
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
  });
  assert.deepEqual(result, {
    title: 'Rascal Does not Dream of Bunny Girl Senpai',
+    season: null,
    episode: 1,
    source: 'guessit',
  });
@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';

 export interface AnilistMediaGuess {
  title: string;
+  season: number | null;
  episode: number | null;
  source: 'guessit' | 'fallback';
 }
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
  };
 }

-function runGuessit(target: string): Promise<string> {
+export function runGuessit(target: string): Promise<string> {
  return new Promise((resolve, reject) => {
    childProcess.execFile(
      'guessit',
@@ -73,7 +74,7 @@ function runGuessit(target: string): Promise<string> {
  });
 }

-type GuessAnilistMediaInfoDeps = {
+export interface GuessAnilistMediaInfoDeps {
  runGuessit: (target: string) => Promise<string>;
 };

@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
      const parsed = JSON.parse(stdout) as Record<string, unknown>;
      const title = readGuessitTitle(parsed.title);
      const episode = firstPositiveInteger(parsed.episode);
+      const season = firstPositiveInteger(parsed.season);
      if (title) {
-        return { title, episode, source: 'guessit' };
+        return { title, season, episode, source: 'guessit' };
      }
    } catch {
      // Ignore guessit failures and fall back to internal parser.
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
  }
  return {
    title: parsed.title.trim(),
+    season: parsed.season,
    episode: parsed.episode,
    source: 'fallback',
  };
@@ -0,0 +1,239 @@
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import test from 'node:test';
+import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
+import { Database } from '../immersion-tracker/sqlite.js';
+import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
+import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
+import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
+
+function makeDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
+  return path.join(dir, 'immersion.sqlite');
+}
+
+function cleanupDbPath(dbPath: string): void {
+  fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
+}
+
+test('stripFilenameTags normalizes common media-title formats', () => {
+  assert.equal(
+    stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
+    'The Eminence in Shadow',
+  );
+  assert.equal(
+    stripFilenameTags(
+      '[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
+    ),
+    'Kono Subarashii Sekai ni Shukufuku wo!',
+  );
+  assert.equal(
+    stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
+    'Kono Subarashii Sekai ni Shukufuku wo!',
+  );
+  assert.equal(
+    stripFilenameTags(
+      'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
+    ),
+    'Little Witch Academia',
+  );
+});
+
+test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+  ensureSchema(db);
+  const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
+    canonicalTitle: 'Cover Fetcher Test',
+    sourcePath: '/tmp/cover-fetcher-test.mkv',
+    sourceUrl: null,
+    sourceType: SOURCE_TYPE_LOCAL,
+  });
+  upsertCoverArt(db, videoId, {
+    anilistId: 7,
+    coverUrl: 'https://images.test/cover.jpg',
+    coverBlob: null,
+    titleRomaji: 'Test Title',
+    titleEnglish: 'Test Title',
+    episodesTotal: 12,
+  });
+
+  const fetchCalls: string[] = [];
+  const originalFetch = globalThis.fetch;
+  globalThis.fetch = (async (input: RequestInfo | URL) => {
+    const url = String(input);
+    fetchCalls.push(url);
+    assert.equal(url, 'https://images.test/cover.jpg');
+    return new Response(new Uint8Array([1, 2, 3, 4]), {
+      status: 200,
+      headers: { 'Content-Type': 'image/jpeg' },
+    });
+  }) as typeof fetch;
+
+  try {
+    const fetcher = createCoverArtFetcher(
+      {
+        acquire: async () => {},
+        recordResponse: () => {},
+      },
+      console,
+    );
+
+    const fetched = await fetcher.fetchIfMissing(
+      db,
+      videoId,
+      '[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
+    );
+    const stored = getCoverArt(db, videoId);
+
+    assert.equal(fetched, true);
+    assert.equal(fetchCalls.length, 1);
+    assert.equal(stored?.coverBlob?.length, 4);
+    assert.equal(stored?.titleEnglish, 'Test Title');
+  } finally {
+    globalThis.fetch = originalFetch;
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+function createJsonResponse(payload: unknown): Response {
+  return new Response(JSON.stringify(payload), {
+    status: 200,
+    headers: { 'content-type': 'application/json' },
+  });
+}
+
+test('fetchIfMissing uses guessit primary title and season when available', async () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+  ensureSchema(db);
+  const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
+    canonicalTitle: '[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
+    sourcePath: '/tmp/cover-fetcher-season-test.mkv',
+    sourceUrl: null,
+    sourceType: SOURCE_TYPE_LOCAL,
+  });
+
+  const searchCalls: Array<{ search: string }> = [];
+  const originalFetch = globalThis.fetch;
+  globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
+    const raw = (init?.body as string | undefined) ?? '';
+    const payload = JSON.parse(raw) as { variables: { search: string } };
+    const search = payload.variables.search;
+    searchCalls.push({ search });
+
+    if (search.includes('Season 2')) {
+      return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
+    }
+
+    return Promise.resolve(
+      createJsonResponse({
+        data: {
+          Page: {
+            media: [
+              {
+                id: 19,
+                episodes: 24,
+                coverImage: { large: 'https://images.test/cover.jpg', medium: null },
+                title: { romaji: 'Little Witch Academia', english: 'Little Witch Academia', native: null },
+              },
+            ],
+          },
+        },
+      }),
+    );
+  }) as typeof fetch;
+
+  try {
+    const fetcher = createCoverArtFetcher(
+      {
+        acquire: async () => {},
+        recordResponse: () => {},
+      },
+      console,
+      {
+        runGuessit: async () =>
+          JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
+      },
+    );
+
+    const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
+    const stored = getCoverArt(db, videoId);
+
+    assert.equal(fetched, true);
+    assert.equal(searchCalls.length, 2);
+    assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
+    assert.equal(stored?.anilistId, 19);
+  } finally {
+    globalThis.fetch = originalFetch;
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+  ensureSchema(db);
+  const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
+    canonicalTitle: 'School Vlog S01E01',
+    sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
+    sourceUrl: null,
+    sourceType: SOURCE_TYPE_LOCAL,
+  });
+
+  let requestCount = 0;
+  const originalFetch = globalThis.fetch;
+  globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
+    requestCount += 1;
+    const raw = (init?.body as string | undefined) ?? '';
+    const payload = JSON.parse(raw) as { variables: { search: string } };
+    assert.equal(payload.variables.search, 'School Vlog');
+
+    return Promise.resolve(
+      createJsonResponse({
+        data: {
+          Page: {
+            media: [
+              {
+                id: 21,
+                episodes: 12,
+                coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
+                title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
+              },
+            ],
+          },
+        },
+      }),
+    );
+  }) as typeof fetch;
+
+  try {
+    const fetcher = createCoverArtFetcher(
+      {
+        acquire: async () => {},
+        recordResponse: () => {},
+      },
+      console,
+      {
+        runGuessit: async () => {
+          throw new Error('guessit unavailable');
+        },
+      },
+    );
+
+    const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
+    const stored = getCoverArt(db, videoId);
+
+    assert.equal(fetched, true);
+    assert.equal(requestCount, 1);
+    assert.equal(stored?.anilistId, 21);
+  } finally {
+    globalThis.fetch = originalFetch;
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
@@ -0,0 +1,405 @@
+import type { AnilistRateLimiter } from './rate-limiter';
+import type { DatabaseSync } from '../immersion-tracker/sqlite';
+import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
+import { guessAnilistMediaInfo, runGuessit, type GuessAnilistMediaInfoDeps } from './anilist-updater';
+
+const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
+const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
+
+const SEARCH_QUERY = `
+query ($search: String!) {
+  Page(perPage: 5) {
+    media(search: $search, type: ANIME) {
+      id
+      episodes
+      season
+      seasonYear
+      coverImage { large medium }
+      title { romaji english native }
+    }
+  }
+}
+`;
+
+interface AnilistMedia {
+  id: number;
+  episodes: number | null;
+  season: string | null;
+  seasonYear: number | null;
+  coverImage: { large: string | null; medium: string | null } | null;
+  title: { romaji: string | null; english: string | null; native: string | null } | null;
+}
+
+interface AnilistSearchResponse {
+  data?: {
+    Page?: {
+      media?: AnilistMedia[];
+    };
+  };
+  errors?: Array<{ message?: string }>;
+}
+
+export interface CoverArtFetcher {
+  fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
+}
+
+interface Logger {
+  info(msg: string, ...args: unknown[]): void;
+  warn(msg: string, ...args: unknown[]): void;
+  error(msg: string, ...args: unknown[]): void;
+}
+
+interface CoverArtCandidate {
+  title: string;
+  source: 'guessit' | 'fallback';
+  season: number | null;
+  episode: number | null;
+}
+
+interface CoverArtFetcherOptions {
+  runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
+}
+
+export function stripFilenameTags(raw: string): string {
+  let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
+
+  title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
+  title = title.replace(/[._]+/g, ' ');
+
+  // Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
+  title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
+  title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
+  title = title.replace(/\s+S\d+E\d+.*$/i, '');
+  title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
+  title = title.replace(/\s+E\d+[: -].*$/i, '');
+  title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
+
+  // Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
+  title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
+  title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
+
+  // Remove common codec/source tags that may appear without brackets
+  title = title.replace(
+    /\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
+    '',
+  );
+
+  // Remove trailing dashes and group tags like "-Retr0"
+  title = title.replace(/\s*-\s*[\w]+$/, '');
+
+  return title.trim().replace(/\s{2,}/g, ' ');
+}
+
+function removeSeasonHint(title: string): string {
+  return title.replace(/\bseason\s*\d+\b/gi, '').replace(/\s{2,}/g, ' ').trim();
+}
+
+function normalizeTitle(text: string): string {
+  return text.trim().toLowerCase().replace(/\s+/g, ' ');
+}
+
+function extractCandidateSeasonHints(text: string): Set<number> {
+  const normalized = normalizeTitle(text);
+  const matches = [
+    ...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
+    ...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
+  ];
+  const values = new Set<number>();
+  for (const match of matches) {
+    const value = Number.parseInt(match[1]!, 10);
+    if (Number.isInteger(value)) {
+      values.add(value);
+    }
+  }
+  return values;
+}
+
+function isSeasonMentioned(titles: string[], season: number | null): boolean {
+  if (!season) {
+    return false;
+  }
+  const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
+  return hints.includes(season);
+}
+
+function pickBestSearchResult(
+  title: string,
+  episode: number | null,
+  season: number | null,
+  media: AnilistMedia[],
+): { id: number; title: string } | null {
+  const cleanedTitle = removeSeasonHint(title);
+  const targets = [title, cleanedTitle]
+    .map(normalizeTitle)
+    .map((value) => value.trim())
+    .filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
+
+  const filtered = episode === null
+    ? media
+    : media.filter((item) => {
+        const total = item.episodes;
+        return total === null || total >= episode;
+      });
+  const candidates = filtered.length > 0 ? filtered : media;
+  if (candidates.length === 0) {
+    return null;
+  }
+
+  const scored = candidates.map((item) => {
+    const candidateTitles = [
+      item.title?.romaji,
+      item.title?.english,
+      item.title?.native,
+    ]
+      .filter((value): value is string => typeof value === 'string')
+      .map((value) => normalizeTitle(value));
+
+    let score = 0;
+
+    for (const target of targets) {
+      if (candidateTitles.includes(target)) {
+        score += 120;
+        continue;
+      }
+      if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
+        score += 30;
+      }
+      if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
+        score += 10;
+      }
+    }
+
+    if (episode !== null && item.episodes === episode) {
+      score += 20;
+    }
+
+    if (season !== null && isSeasonMentioned(candidateTitles, season)) {
+      score += 15;
+    }
+
+    return { item, score };
+  });
+
+  scored.sort((a, b) => {
+    if (b.score !== a.score) return b.score - a.score;
+    return b.item.id - a.item.id;
+  });
+
+  const selected = scored[0]!;
+  const selectedTitle = selected.item.title?.english ?? selected.item.title?.romaji ?? selected.item.title?.native ?? title;
+  return { id: selected.item.id, title: selectedTitle };
+}
+
+function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
+  const candidateTitles = [
+    parsed.title,
+    ...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
+      ? [`${parsed.title} Season ${parsed.season}`]
+      : []),
+  ];
+  return candidateTitles
+    .map((title) => title.trim())
+    .filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
+}
+
+async function searchAnilist(
+  rateLimiter: AnilistRateLimiter,
+  title: string,
+): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
+  await rateLimiter.acquire();
+
+  const res = await fetch(ANILIST_GRAPHQL_URL, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
+    body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
+  });
+
+  rateLimiter.recordResponse(res.headers);
+
+  if (res.status === 429) {
+    return { media: [], rateLimited: true };
+  }
+
+  if (!res.ok) {
+    throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
+  }
+
+  const json = (await res.json()) as AnilistSearchResponse;
+  const mediaList = json.data?.Page?.media;
+  if (!mediaList || mediaList.length === 0) {
+    return { media: [], rateLimited: false };
+  }
+
+  return { media: mediaList, rateLimited: false };
+}
+
+async function downloadImage(url: string): Promise<Buffer | null> {
+  try {
+    const res = await fetch(url);
+    if (!res.ok) return null;
+    const arrayBuf = await res.arrayBuffer();
+    return Buffer.from(arrayBuf);
+  } catch {
+    return null;
+  }
+}
+
+export function createCoverArtFetcher(
+  rateLimiter: AnilistRateLimiter,
+  logger: Logger,
+  options: CoverArtFetcherOptions = {},
+): CoverArtFetcher {
+  const resolveMediaInfo = async (canonicalTitle: string): Promise<CoverArtCandidate | null> => {
+    const parsed = await guessAnilistMediaInfo(null, canonicalTitle, {
+      runGuessit: options.runGuessit ?? runGuessit,
+    });
+    if (!parsed) {
+      return null;
+    }
+    return {
+      title: parsed.title,
+      season: parsed.season,
+      episode: parsed.episode,
+      source: parsed.source,
+    };
+  };
+
+  return {
+    async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
+      const existing = getCoverArt(db, videoId);
+      if (existing?.coverBlob) {
+        return true;
+      }
+
+      if (existing?.coverUrl) {
+        const coverBlob = await downloadImage(existing.coverUrl);
+        if (coverBlob) {
+          upsertCoverArt(db, videoId, {
+            anilistId: existing.anilistId,
+            coverUrl: existing.coverUrl,
+            coverBlob,
+            titleRomaji: existing.titleRomaji,
+            titleEnglish: existing.titleEnglish,
+            episodesTotal: existing.episodesTotal,
+          });
+          return true;
+        }
+      }
+
+      if (
+        existing &&
+        existing.coverUrl === null &&
+        existing.anilistId === null &&
+        Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
+      ) {
+        return false;
+      }
+
+      const cleaned = stripFilenameTags(canonicalTitle);
+      if (!cleaned) {
+        logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
+        upsertCoverArt(db, videoId, {
+          anilistId: null,
+          coverUrl: null,
+          coverBlob: null,
+          titleRomaji: null,
+          titleEnglish: null,
+          episodesTotal: null,
+        });
+        return false;
+      }
+
+      const parsedInfo = await resolveMediaInfo(canonicalTitle);
+      const searchBase = parsedInfo?.title ?? cleaned;
+      const searchCandidates = parsedInfo
+        ? buildSearchCandidates(parsedInfo)
+        : [cleaned];
+
+      const effectiveCandidates = searchCandidates.includes(cleaned)
+        ? searchCandidates
+        : [...searchCandidates, cleaned];
+
+      let selected: AnilistMedia | null = null;
+      let rateLimited = false;
+
+      for (const candidate of effectiveCandidates) {
+        logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
+
+        try {
+          const result = await searchAnilist(rateLimiter, candidate);
+          rateLimited = result.rateLimited;
+          if (result.media.length === 0) {
+            continue;
+          }
+
+          const picked = pickBestSearchResult(
+            searchBase,
+            parsedInfo?.episode ?? null,
+            parsedInfo?.season ?? null,
+            result.media,
+          );
+          if (picked) {
+            const match = result.media.find((media) => media.id === picked.id);
+            if (match) {
+              selected = match;
+              break;
+            }
+          }
+        } catch (err) {
+          logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
+          return false;
+        }
+      }
+
+      if (rateLimited) {
+        logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
+        return false;
+      }
+
+      if (!selected) {
+        logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
+        upsertCoverArt(db, videoId, {
+          anilistId: null,
+          coverUrl: null,
+          coverBlob: null,
+          titleRomaji: null,
+          titleEnglish: null,
+          episodesTotal: null,
+        });
+        return false;
+      }
+
+      const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
+      let coverBlob: Buffer | null = null;
+      if (coverUrl) {
+        coverBlob = await downloadImage(coverUrl);
+      }
+
+      upsertCoverArt(db, videoId, {
+        anilistId: selected.id,
+        coverUrl,
+        coverBlob,
+        titleRomaji: selected.title?.romaji ?? null,
+        titleEnglish: selected.title?.english ?? null,
+        episodesTotal: selected.episodes ?? null,
+      });
+
+      updateAnimeAnilistInfo(db, videoId, {
+        anilistId: selected.id,
+        titleRomaji: selected.title?.romaji ?? null,
+        titleEnglish: selected.title?.english ?? null,
+        titleNative: selected.title?.native ?? null,
+        episodesTotal: selected.episodes ?? null,
+      });
+
+      logger.info(
+        'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
+        videoId,
+        selected.id,
+        selected.title?.romaji ?? searchBase,
+      );
+
+      return true;
+    },
+  };
+}
@@ -12,6 +12,7 @@ import {
  resolveBoundedInt,
 } from './immersion-tracker/reducer';
 import type { QueuedWrite } from './immersion-tracker/types';
+import { PartOfSpeech, type MergedToken } from '../../types';

 type ImmersionTrackerService = import('./immersion-tracker-service').ImmersionTrackerService;
 type ImmersionTrackerServiceCtor =
@@ -26,6 +27,34 @@ async function loadTrackerCtor(): Promise<ImmersionTrackerServiceCtor> {
  return trackerCtor;
 }

+async function waitForPendingAnimeMetadata(tracker: ImmersionTrackerService): Promise<void> {
+  const privateApi = tracker as unknown as {
+    sessionState: { videoId: number } | null;
+    pendingAnimeMetadataUpdates?: Map<number, Promise<void>>;
+  };
+  const videoId = privateApi.sessionState?.videoId;
+  if (!videoId) return;
+  await privateApi.pendingAnimeMetadataUpdates?.get(videoId);
+}
+
+function makeMergedToken(overrides: Partial<MergedToken>): MergedToken {
+  return {
+    surface: '',
+    reading: '',
+    headword: '',
+    startPos: 0,
+    endPos: 0,
+    partOfSpeech: PartOfSpeech.other,
+    pos1: '',
+    pos2: '',
+    pos3: '',
+    isMerged: true,
+    isKnown: false,
+    isNPlusOneTarget: false,
+    ...overrides,
+  };
+}
+
 function makeDbPath(): string {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-immersion-test-'));
  return path.join(dir, 'immersion.sqlite');
@@ -222,6 +251,308 @@ test('persists and retrieves minimum immersion tracking fields', async () => {
  }
 });

+test('recordSubtitleLine persists counted allowed tokenized vocabulary rows and subtitle-line occurrences', async () => {
+  const dbPath = makeDbPath();
+  let tracker: ImmersionTrackerService | null = null;
+
+  try {
+    const Ctor = await loadTrackerCtor();
+    tracker = new Ctor({ dbPath });
+
+    tracker.handleMediaChange('/tmp/Little Witch Academia S02E04.mkv', 'Episode 4');
+    await waitForPendingAnimeMetadata(tracker);
+    tracker.recordSubtitleLine('猫 猫 日 日 は 知っている', 0, 1, [
+      makeMergedToken({
+        surface: '猫',
+        headword: '猫',
+        reading: 'ねこ',
+        partOfSpeech: PartOfSpeech.noun,
+        pos1: '名詞',
+        pos2: '一般',
+      }),
+      makeMergedToken({
+        surface: '猫',
+        headword: '猫',
+        reading: 'ねこ',
+        partOfSpeech: PartOfSpeech.noun,
+        pos1: '名詞',
+        pos2: '一般',
+      }),
+      makeMergedToken({
+        surface: 'は',
+        headword: 'は',
+        reading: 'は',
+        partOfSpeech: PartOfSpeech.particle,
+        pos1: '助詞',
+        pos2: '係助詞',
+      }),
+      makeMergedToken({
+        surface: '知っている',
+        headword: '知る',
+        reading: 'しっている',
+        partOfSpeech: PartOfSpeech.other,
+        pos1: '動詞',
+        pos2: '自立',
+      }),
+    ]);
+
+    const privateApi = tracker as unknown as {
+      flushTelemetry: (force?: boolean) => void;
+      flushNow: () => void;
+    };
+    privateApi.flushTelemetry(true);
+    privateApi.flushNow();
+
+    const db = new Database(dbPath);
+    const rows = db
+      .prepare(
+        `SELECT headword, word, reading, part_of_speech, pos1, pos2, frequency
+         FROM imm_words
+         ORDER BY id ASC`,
+      )
+      .all() as Array<{
+      headword: string;
+      word: string;
+      reading: string;
+      part_of_speech: string;
+      pos1: string;
+      pos2: string;
+      frequency: number;
+    }>;
+    const lineRows = db
+      .prepare(
+        `SELECT video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text
+         FROM imm_subtitle_lines
+         ORDER BY line_id ASC`,
+      )
+      .all() as Array<{
+      video_id: number;
+      anime_id: number | null;
+      line_index: number;
+      segment_start_ms: number | null;
+      segment_end_ms: number | null;
+      text: string;
+    }>;
+    const wordOccurrenceRows = db
+      .prepare(
+        `SELECT o.occurrence_count, w.headword, w.word, w.reading
+         FROM imm_word_line_occurrences o
+         JOIN imm_words w ON w.id = o.word_id
+         ORDER BY o.line_id ASC, o.word_id ASC`,
+      )
+      .all() as Array<{
+      occurrence_count: number;
+      headword: string;
+      word: string;
+      reading: string;
+    }>;
+    const kanjiOccurrenceRows = db
+      .prepare(
+        `SELECT o.occurrence_count, k.kanji
+         FROM imm_kanji_line_occurrences o
+         JOIN imm_kanji k ON k.id = o.kanji_id
+         ORDER BY o.line_id ASC, k.kanji ASC`,
+      )
+      .all() as Array<{
+      occurrence_count: number;
+      kanji: string;
+    }>;
+    db.close();
+
+    assert.deepEqual(rows, [
+      {
+        headword: '猫',
+        word: '猫',
+        reading: 'ねこ',
+        part_of_speech: PartOfSpeech.noun,
+        pos1: '名詞',
+        pos2: '一般',
+        frequency: 2,
+      },
+      {
+        headword: '知る',
+        word: '知っている',
+        reading: 'しっている',
+        part_of_speech: PartOfSpeech.verb,
+        pos1: '動詞',
+        pos2: '自立',
+        frequency: 1,
+      },
+    ]);
+    assert.equal(lineRows.length, 1);
+    assert.equal(lineRows[0]?.line_index, 1);
+    assert.equal(lineRows[0]?.segment_start_ms, 0);
+    assert.equal(lineRows[0]?.segment_end_ms, 1000);
+    assert.equal(lineRows[0]?.text, '猫 猫 日 日 は 知っている');
+    assert.ok(lineRows[0]?.video_id);
+    assert.ok(lineRows[0]?.anime_id);
+    assert.deepEqual(wordOccurrenceRows, [
+      {
+        occurrence_count: 2,
+        headword: '猫',
+        word: '猫',
+        reading: 'ねこ',
+      },
+      {
+        occurrence_count: 1,
+        headword: '知る',
+        word: '知っている',
+        reading: 'しっている',
+      },
+    ]);
+    assert.deepEqual(kanjiOccurrenceRows, [
+      {
+        occurrence_count: 2,
+        kanji: '日',
+      },
+      {
+        occurrence_count: 2,
+        kanji: '猫',
+      },
+      {
+        occurrence_count: 1,
+        kanji: '知',
+      },
+    ]);
+  } finally {
+    tracker?.destroy();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('handleMediaChange links parsed anime metadata on the active video row', async () => {
+  const dbPath = makeDbPath();
+  let tracker: ImmersionTrackerService | null = null;
+
+  try {
+    const Ctor = await loadTrackerCtor();
+    tracker = new Ctor({ dbPath });
+
+    tracker.handleMediaChange('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5');
+    await waitForPendingAnimeMetadata(tracker);
+
+    const privateApi = tracker as unknown as {
+      db: DatabaseSync;
+      sessionState: { videoId: number } | null;
+    };
+    const videoId = privateApi.sessionState?.videoId;
+    assert.ok(videoId);
+
+    const row = privateApi.db
+      .prepare(
+        `
+          SELECT
+            v.anime_id,
+            v.parsed_basename,
+            v.parsed_title,
+            v.parsed_season,
+            v.parsed_episode,
+            v.parser_source,
+            a.canonical_title AS anime_title,
+            a.anilist_id
+          FROM imm_videos v
+          LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
+          WHERE v.video_id = ?
+        `,
+      )
+      .get(videoId) as {
+      anime_id: number | null;
+      parsed_basename: string | null;
+      parsed_title: string | null;
+      parsed_season: number | null;
+      parsed_episode: number | null;
+      parser_source: string | null;
+      anime_title: string | null;
+      anilist_id: number | null;
+    } | null;
+
+    assert.ok(row);
+    assert.ok(row?.anime_id);
+    assert.equal(row?.parsed_basename, 'Little Witch Academia S02E05.mkv');
+    assert.equal(row?.parsed_title, 'Little Witch Academia');
+    assert.equal(row?.parsed_season, 2);
+    assert.equal(row?.parsed_episode, 5);
+    assert.ok(row?.parser_source === 'guessit' || row?.parser_source === 'fallback');
+    assert.equal(row?.anime_title, 'Little Witch Academia');
+    assert.equal(row?.anilist_id, null);
+  } finally {
+    tracker?.destroy();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('handleMediaChange reuses the same provisional anime row across matching files', async () => {
+  const dbPath = makeDbPath();
+  let tracker: ImmersionTrackerService | null = null;
+
+  try {
+    const Ctor = await loadTrackerCtor();
+    tracker = new Ctor({ dbPath });
+
+    tracker.handleMediaChange('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5');
+    await waitForPendingAnimeMetadata(tracker);
+
+    tracker.handleMediaChange('/tmp/Little Witch Academia S02E06.mkv', 'Episode 6');
+    await waitForPendingAnimeMetadata(tracker);
+
+    const privateApi = tracker as unknown as {
+      db: DatabaseSync;
+    };
+    const rows = privateApi.db
+      .prepare(
+        `
+          SELECT
+            v.source_path,
+            v.anime_id,
+            v.parsed_episode,
+            a.canonical_title AS anime_title,
+            a.anilist_id
+          FROM imm_videos v
+          LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
+          WHERE v.source_path IN (?, ?)
+          ORDER BY v.source_path
+        `,
+      )
+      .all('/tmp/Little Witch Academia S02E05.mkv', '/tmp/Little Witch Academia S02E06.mkv') as
+      Array<{
+        source_path: string | null;
+        anime_id: number | null;
+        parsed_episode: number | null;
+        anime_title: string | null;
+        anilist_id: number | null;
+      }>;
+
+    assert.equal(rows.length, 2);
+    assert.ok(rows[0]?.anime_id);
+    assert.equal(rows[0]?.anime_id, rows[1]?.anime_id);
+    assert.deepEqual(
+      rows.map((row) => ({
+        sourcePath: row.source_path,
+        parsedEpisode: row.parsed_episode,
+        animeTitle: row.anime_title,
+        anilistId: row.anilist_id,
+      })),
+      [
+        {
+          sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
+          parsedEpisode: 5,
+          animeTitle: 'Little Witch Academia',
+          anilistId: null,
+        },
+        {
+          sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
+          parsedEpisode: 6,
+          animeTitle: 'Little Witch Academia',
+          anilistId: null,
+        },
+      ],
+    );
+  } finally {
+    tracker?.destroy();
+    cleanupDbPath(dbPath);
+  }
+});
+
 test('applies configurable queue, flush, and retention policy', async () => {
  const dbPath = makeDbPath();
  let tracker: ImmersionTrackerService | null = null;
@@ -1,7 +1,8 @@
 import path from 'node:path';
 import * as fs from 'node:fs';
 import { createLogger } from '../../logger';
-import { getLocalVideoMetadata } from './immersion-tracker/metadata';
+import type { CoverArtFetcher } from './anilist/cover-art-fetcher';
+import { getLocalVideoMetadata, guessAnimeVideoMetadata } from './immersion-tracker/metadata';
 import { pruneRetention, runRollupMaintenance } from './immersion-tracker/maintenance';
 import { Database, type DatabaseSync } from './immersion-tracker/sqlite';
 import { finalizeSessionRecord, startSessionRecord } from './immersion-tracker/session';
@@ -10,23 +11,58 @@ import {
  createTrackerPreparedStatements,
  ensureSchema,
  executeQueuedWrite,
+  getOrCreateAnimeRecord,
  getOrCreateVideoRecord,
+  linkVideoToAnimeRecord,
  type TrackerPreparedStatements,
  updateVideoMetadataRecord,
  updateVideoTitleRecord,
 } from './immersion-tracker/storage';
 import {
+  cleanupVocabularyStats,
+  getAnimeCoverArt,
+  getAnimeDailyRollups,
+  getAnimeAnilistEntries,
+  getAnimeDetail,
+  getAnimeEpisodes,
+  getAnimeLibrary,
+  getAnimeWords,
+  getEpisodeCardEvents,
+  getEpisodeSessions,
+  getEpisodeWords,
+  getCoverArt,
  getDailyRollups,
+  getEpisodesPerDay,
+  getKanjiAnimeAppearances,
+  getKanjiDetail,
+  getKanjiWords,
+  getNewAnimePerDay,
+  getSimilarWords,
+  getStreakCalendar,
+  getKanjiOccurrences,
+  getKanjiStats,
+  getMediaDailyRollups,
+  getMediaDetail,
+  getMediaLibrary,
+  getMediaSessions,
  getMonthlyRollups,
  getQueryHints,
+  getSessionEvents,
  getSessionSummaries,
  getSessionTimeline,
+  getVocabularyStats,
+  getWatchTimePerAnime,
+  getWordAnimeAppearances,
+  getWordDetail,
+  getWordOccurrences,
+  getVideoDurationMs,
+  markVideoWatched,
 } from './immersion-tracker/query';
 import {
  buildVideoKey,
  calculateTextMetrics,
-  extractLineVocabulary,
  deriveCanonicalTitle,
+  isKanji,
  isRemoteSource,
  normalizeMediaPath,
  normalizeText,
@@ -57,19 +93,73 @@ import {
  SOURCE_TYPE_LOCAL,
  SOURCE_TYPE_REMOTE,
  type ImmersionSessionRollupRow,
+  type EpisodeCardEventRow,
+  type EpisodesPerDayRow,
  type ImmersionTrackerOptions,
+  type KanjiAnimeAppearanceRow,
+  type KanjiDetailRow,
+  type KanjiOccurrenceRow,
+  type KanjiStatsRow,
+  type KanjiWordRow,
+  type LegacyVocabularyPosResolution,
+  type LegacyVocabularyPosRow,
+  type AnimeAnilistEntryRow,
+  type AnimeDetailRow,
+  type AnimeEpisodeRow,
+  type AnimeLibraryRow,
+  type AnimeWordRow,
+  type MediaArtRow,
+  type MediaDetailRow,
+  type MediaLibraryRow,
+  type NewAnimePerDayRow,
  type QueuedWrite,
+  type SessionEventRow,
  type SessionState,
  type SessionSummaryQueryRow,
  type SessionTimelineRow,
+  type SimilarWordRow,
+  type StreakCalendarRow,
+  type VocabularyCleanupSummary,
+  type WatchTimePerAnimeRow,
+  type WordAnimeAppearanceRow,
+  type WordDetailRow,
+  type WordOccurrenceRow,
+  type VocabularyStatsRow,
 } from './immersion-tracker/types';
+import type { MergedToken } from '../../types';
+import { shouldExcludeTokenFromVocabularyPersistence } from './tokenizer/annotation-stage';
+import { deriveStoredPartOfSpeech } from './tokenizer/part-of-speech';

 export type {
+  AnimeAnilistEntryRow,
+  AnimeDetailRow,
+  AnimeEpisodeRow,
+  AnimeLibraryRow,
+  AnimeWordRow,
+  EpisodeCardEventRow,
+  EpisodesPerDayRow,
  ImmersionSessionRollupRow,
  ImmersionTrackerOptions,
  ImmersionTrackerPolicy,
+  KanjiAnimeAppearanceRow,
+  KanjiDetailRow,
+  KanjiOccurrenceRow,
+  KanjiStatsRow,
+  KanjiWordRow,
+  MediaArtRow,
+  MediaDetailRow,
+  MediaLibraryRow,
+  NewAnimePerDayRow,
+  SessionEventRow,
  SessionSummaryQueryRow,
  SessionTimelineRow,
+  SimilarWordRow,
+  StreakCalendarRow,
+  WatchTimePerAnimeRow,
+  WordAnimeAppearanceRow,
+  WordDetailRow,
+  WordOccurrenceRow,
+  VocabularyStatsRow,
 } from './immersion-tracker/types';

 export class ImmersionTrackerService {
@@ -98,9 +188,17 @@ export class ImmersionTrackerService {
  private currentVideoKey = '';
  private currentMediaPathOrUrl = '';
  private readonly preparedStatements: TrackerPreparedStatements;
+  private coverArtFetcher: CoverArtFetcher | null = null;
+  private readonly pendingCoverFetches = new Map<number, Promise<boolean>>();
+  private readonly recordedSubtitleKeys = new Set<string>();
+  private readonly pendingAnimeMetadataUpdates = new Map<number, Promise<void>>();
+  private readonly resolveLegacyVocabularyPos:
+    | ((row: LegacyVocabularyPosRow) => Promise<LegacyVocabularyPosResolution | null>)
+    | undefined;

  constructor(options: ImmersionTrackerOptions) {
    this.dbPath = options.dbPath;
+    this.resolveLegacyVocabularyPos = options.resolveLegacyVocabularyPos;
    const parentDir = path.dirname(this.dbPath);
    if (!fs.existsSync(parentDir)) {
      fs.mkdirSync(parentDir, { recursive: true });
@@ -198,6 +296,8 @@ export class ImmersionTrackerService {
  async getQueryHints(): Promise<{
    totalSessions: number;
    activeSessions: number;
+    episodesToday: number;
+    activeAnimeCount: number;
  }> {
    return getQueryHints(this.db);
  }
@@ -210,6 +310,180 @@ export class ImmersionTrackerService {
    return getMonthlyRollups(this.db, limit);
  }

+  async getVocabularyStats(limit = 100, excludePos?: string[]): Promise<VocabularyStatsRow[]> {
+    return getVocabularyStats(this.db, limit, excludePos);
+  }
+
+  async cleanupVocabularyStats(): Promise<VocabularyCleanupSummary> {
+    return cleanupVocabularyStats(this.db, {
+      resolveLegacyPos: this.resolveLegacyVocabularyPos,
+    });
+  }
+
+  async getKanjiStats(limit = 100): Promise<KanjiStatsRow[]> {
+    return getKanjiStats(this.db, limit);
+  }
+
+  async getWordOccurrences(
+    headword: string,
+    word: string,
+    reading: string,
+    limit = 100,
+    offset = 0,
+  ): Promise<WordOccurrenceRow[]> {
+    return getWordOccurrences(this.db, headword, word, reading, limit, offset);
+  }
+
+  async getKanjiOccurrences(
+    kanji: string,
+    limit = 100,
+    offset = 0,
+  ): Promise<KanjiOccurrenceRow[]> {
+    return getKanjiOccurrences(this.db, kanji, limit, offset);
+  }
+
+  async getSessionEvents(sessionId: number, limit = 500): Promise<SessionEventRow[]> {
+    return getSessionEvents(this.db, sessionId, limit);
+  }
+
+  async getMediaLibrary(): Promise<MediaLibraryRow[]> {
+    return getMediaLibrary(this.db);
+  }
+
+  async getMediaDetail(videoId: number): Promise<MediaDetailRow | null> {
+    return getMediaDetail(this.db, videoId);
+  }
+
+  async getMediaSessions(videoId: number, limit = 100): Promise<SessionSummaryQueryRow[]> {
+    return getMediaSessions(this.db, videoId, limit);
+  }
+
+  async getMediaDailyRollups(videoId: number, limit = 90): Promise<ImmersionSessionRollupRow[]> {
+    return getMediaDailyRollups(this.db, videoId, limit);
+  }
+
+  async getCoverArt(videoId: number): Promise<MediaArtRow | null> {
+    return getCoverArt(this.db, videoId);
+  }
+
+  async getAnimeLibrary(): Promise<AnimeLibraryRow[]> {
+    return getAnimeLibrary(this.db);
+  }
+
+  async getAnimeDetail(animeId: number): Promise<AnimeDetailRow | null> {
+    return getAnimeDetail(this.db, animeId);
+  }
+
+  async getAnimeEpisodes(animeId: number): Promise<AnimeEpisodeRow[]> {
+    return getAnimeEpisodes(this.db, animeId);
+  }
+
+  async getAnimeAnilistEntries(animeId: number): Promise<AnimeAnilistEntryRow[]> {
+    return getAnimeAnilistEntries(this.db, animeId);
+  }
+
+  async getAnimeCoverArt(animeId: number): Promise<MediaArtRow | null> {
+    return getAnimeCoverArt(this.db, animeId);
+  }
+
+  async getAnimeWords(animeId: number, limit = 50): Promise<AnimeWordRow[]> {
+    return getAnimeWords(this.db, animeId, limit);
+  }
+
+  async getEpisodeWords(videoId: number, limit = 50): Promise<AnimeWordRow[]> {
+    return getEpisodeWords(this.db, videoId, limit);
+  }
+
+  async getEpisodeSessions(videoId: number): Promise<SessionSummaryQueryRow[]> {
+    return getEpisodeSessions(this.db, videoId);
+  }
+
+  async setVideoWatched(videoId: number, watched: boolean): Promise<void> {
+    markVideoWatched(this.db, videoId, watched);
+  }
+
+  async getEpisodeCardEvents(videoId: number): Promise<EpisodeCardEventRow[]> {
+    return getEpisodeCardEvents(this.db, videoId);
+  }
+
+  async getAnimeDailyRollups(animeId: number, limit = 90): Promise<ImmersionSessionRollupRow[]> {
+    return getAnimeDailyRollups(this.db, animeId, limit);
+  }
+
+  async getStreakCalendar(days = 90): Promise<StreakCalendarRow[]> {
+    return getStreakCalendar(this.db, days);
+  }
+
+  async getEpisodesPerDay(limit = 90): Promise<EpisodesPerDayRow[]> {
+    return getEpisodesPerDay(this.db, limit);
+  }
+
+  async getNewAnimePerDay(limit = 90): Promise<NewAnimePerDayRow[]> {
+    return getNewAnimePerDay(this.db, limit);
+  }
+
+  async getWatchTimePerAnime(limit = 90): Promise<WatchTimePerAnimeRow[]> {
+    return getWatchTimePerAnime(this.db, limit);
+  }
+
+  async getWordDetail(wordId: number): Promise<WordDetailRow | null> {
+    return getWordDetail(this.db, wordId);
+  }
+
+  async getWordAnimeAppearances(wordId: number): Promise<WordAnimeAppearanceRow[]> {
+    return getWordAnimeAppearances(this.db, wordId);
+  }
+
+  async getSimilarWords(wordId: number, limit = 10): Promise<SimilarWordRow[]> {
+    return getSimilarWords(this.db, wordId, limit);
+  }
+
+  async getKanjiDetail(kanjiId: number): Promise<KanjiDetailRow | null> {
+    return getKanjiDetail(this.db, kanjiId);
+  }
+
+  async getKanjiAnimeAppearances(kanjiId: number): Promise<KanjiAnimeAppearanceRow[]> {
+    return getKanjiAnimeAppearances(this.db, kanjiId);
+  }
+
+  async getKanjiWords(kanjiId: number, limit = 20): Promise<KanjiWordRow[]> {
+    return getKanjiWords(this.db, kanjiId, limit);
+  }
+
+  setCoverArtFetcher(fetcher: CoverArtFetcher | null): void {
+    this.coverArtFetcher = fetcher;
+  }
+
+  async ensureCoverArt(videoId: number): Promise<boolean> {
+    const existing = getCoverArt(this.db, videoId);
+    if (existing?.coverBlob) {
+      return true;
+    }
+    if (!this.coverArtFetcher) {
+      return false;
+    }
+    const inFlight = this.pendingCoverFetches.get(videoId);
+    if (inFlight) {
+      return await inFlight;
+    }
+
+    const fetchPromise = (async () => {
+      const detail = getMediaDetail(this.db, videoId);
+      const canonicalTitle = detail?.canonicalTitle?.trim();
+      if (!canonicalTitle) {
+        return false;
+      }
+      return await this.coverArtFetcher!.fetchIfMissing(this.db, videoId, canonicalTitle);
+    })();
+
+    this.pendingCoverFetches.set(videoId, fetchPromise);
+    try {
+      return await fetchPromise;
+    } finally {
+      this.pendingCoverFetches.delete(videoId);
+    }
+  }
+
  handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
    const normalizedPath = normalizeMediaPath(mediaPath);
    const normalizedTitle = normalizeText(mediaTitle);
@@ -254,6 +528,7 @@ export class ImmersionTrackerService {
      `Starting immersion session for path=${normalizedPath} videoId=${sessionInfo.videoId}`,
    );
    this.startSession(sessionInfo.videoId, sessionInfo.startedAtMs);
+    this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
    this.captureVideoMetadataAsync(sessionInfo.videoId, sourceType, normalizedPath);
  }

@@ -265,40 +540,110 @@ export class ImmersionTrackerService {
    this.updateVideoTitleForActiveSession(normalizedTitle);
  }

-  recordSubtitleLine(text: string, startSec: number, endSec: number): void {
+  recordSubtitleLine(
+    text: string,
+    startSec: number,
+    endSec: number,
+    tokens?: MergedToken[] | null,
+  ): void {
    if (!this.sessionState || !text.trim()) return;
    const cleaned = normalizeText(text);
    if (!cleaned) return;
+
+    if (!endSec || endSec <= 0) {
+      return;
+    }
+
+    const startMs = secToMs(startSec);
+    const subtitleKey = `${startMs}:${cleaned}`;
+    if (this.recordedSubtitleKeys.has(subtitleKey)) {
+      return;
+    }
+    this.recordedSubtitleKeys.add(subtitleKey);
+
    const nowMs = Date.now();
    const nowSec = nowMs / 1000;

    const metrics = calculateTextMetrics(cleaned);
-    const extractedVocabulary = extractLineVocabulary(cleaned);
    this.sessionState.currentLineIndex += 1;
    this.sessionState.linesSeen += 1;
    this.sessionState.wordsSeen += metrics.words;
    this.sessionState.tokensSeen += metrics.tokens;
    this.sessionState.pendingTelemetry = true;

-    for (const { headword, word, reading } of extractedVocabulary.words) {
-      this.recordWrite({
-        kind: 'word',
+    const wordOccurrences = new Map<
+      string,
+      {
+        headword: string;
+        word: string;
+        reading: string;
+        partOfSpeech: string;
+        pos1: string;
+        pos2: string;
+        pos3: string;
+        occurrenceCount: number;
+      }
+    >();
+    for (const token of tokens ?? []) {
+      if (shouldExcludeTokenFromVocabularyPersistence(token)) {
+        continue;
+      }
+      const headword = normalizeText(token.headword || token.surface);
+      const word = normalizeText(token.surface || token.headword);
+      const reading = normalizeText(token.reading);
+      if (!headword || !word) {
+        continue;
+      }
+      const wordKey = [
        headword,
        word,
        reading,
-        firstSeen: nowSec,
-        lastSeen: nowSec,
+      ].join('\u0000');
+      const storedPartOfSpeech = deriveStoredPartOfSpeech({
+        partOfSpeech: token.partOfSpeech,
+        pos1: token.pos1 ?? '',
+      });
+      const existing = wordOccurrences.get(wordKey);
+      if (existing) {
+        existing.occurrenceCount += 1;
+        continue;
+      }
+      wordOccurrences.set(wordKey, {
+        headword,
+        word,
+        reading,
+        partOfSpeech: storedPartOfSpeech,
+        pos1: token.pos1 ?? '',
+        pos2: token.pos2 ?? '',
+        pos3: token.pos3 ?? '',
+        occurrenceCount: 1,
      });
    }

-    for (const kanji of extractedVocabulary.kanji) {
+    const kanjiCounts = new Map<string, number>();
+    for (const char of cleaned) {
+      if (!isKanji(char)) {
+        continue;
+      }
+      kanjiCounts.set(char, (kanjiCounts.get(char) ?? 0) + 1);
+    }
+
    this.recordWrite({
-        kind: 'kanji',
+      kind: 'subtitleLine',
+      sessionId: this.sessionState.sessionId,
+      videoId: this.sessionState.videoId,
+      lineIndex: this.sessionState.currentLineIndex,
+      segmentStartMs: secToMs(startSec),
+      segmentEndMs: secToMs(endSec),
+      text: cleaned,
+      wordOccurrences: Array.from(wordOccurrences.values()),
+      kanjiOccurrences: Array.from(kanjiCounts.entries()).map(([kanji, occurrenceCount]) => ({
        kanji,
+        occurrenceCount,
+      })),
      firstSeen: nowSec,
      lastSeen: nowSec,
    });
-    }

    this.recordWrite({
      kind: 'event',
@@ -321,6 +666,16 @@ export class ImmersionTrackerService {
    });
  }

+  recordMediaDuration(durationSec: number): void {
+    if (!this.sessionState || !Number.isFinite(durationSec) || durationSec <= 0) return;
+    const durationMs = Math.round(durationSec * 1000);
+    const current = getVideoDurationMs(this.db, this.sessionState.videoId);
+    if (current === 0 || Math.abs(current - durationMs) > 1000) {
+      this.db.prepare('UPDATE imm_videos SET duration_ms = ?, LAST_UPDATE_DATE = ? WHERE video_id = ?')
+        .run(durationMs, Date.now(), this.sessionState.videoId);
+    }
+  }
+
  recordPlaybackPosition(mediaTimeSec: number | null): void {
    if (!this.sessionState || mediaTimeSec === null || !Number.isFinite(mediaTimeSec)) {
      return;
@@ -391,6 +746,14 @@ export class ImmersionTrackerService {
    this.sessionState.lastWallClockMs = nowMs;
    this.sessionState.lastMediaMs = mediaMs;
    this.sessionState.pendingTelemetry = true;
+
+    if (!this.sessionState.markedWatched) {
+      const durationMs = getVideoDurationMs(this.db, this.sessionState.videoId);
+      if (durationMs > 0 && mediaMs >= durationMs * 0.98) {
+        markVideoWatched(this.db, this.sessionState.videoId, true);
+        this.sessionState.markedWatched = true;
+      }
+    }
  }

  recordPauseState(isPaused: boolean): void {
@@ -454,7 +817,7 @@ export class ImmersionTrackerService {
    });
  }

-  recordCardsMined(count = 1): void {
+  recordCardsMined(count = 1, noteIds?: number[]): void {
    if (!this.sessionState) return;
    this.sessionState.cardsMined += count;
    this.sessionState.pendingTelemetry = true;
@@ -465,7 +828,10 @@ export class ImmersionTrackerService {
      eventType: EVENT_CARD_MINED,
      wordsDelta: 0,
      cardsDelta: count,
-      payloadJson: sanitizePayload({ cardsMined: count }, this.maxPayloadBytes),
+      payloadJson: sanitizePayload(
+        { cardsMined: count, ...(noteIds?.length ? { noteIds } : {}) },
+        this.maxPayloadBytes,
+      ),
    });
  }

@@ -615,6 +981,7 @@ export class ImmersionTrackerService {
  private startSession(videoId: number, startedAtMs?: number): void {
    const { sessionId, state } = startSessionRecord(this.db, videoId, startedAtMs);
    this.sessionState = state;
+    this.recordedSubtitleKeys.clear();
    this.recordWrite({
      kind: 'telemetry',
      sessionId,
@@ -673,6 +1040,48 @@ export class ImmersionTrackerService {
    })();
  }

+  private captureAnimeMetadataAsync(
+    videoId: number,
+    mediaPath: string | null,
+    mediaTitle: string | null,
+  ): void {
+    const updatePromise = (async () => {
+      try {
+        const parsed = await guessAnimeVideoMetadata(mediaPath, mediaTitle);
+        if (this.isDestroyed || !parsed?.parsedTitle.trim()) {
+          return;
+        }
+
+        const animeId = getOrCreateAnimeRecord(this.db, {
+          parsedTitle: parsed.parsedTitle,
+          canonicalTitle: parsed.parsedTitle,
+          anilistId: null,
+          titleRomaji: null,
+          titleEnglish: null,
+          titleNative: null,
+          metadataJson: parsed.parseMetadataJson,
+        });
+        linkVideoToAnimeRecord(this.db, videoId, {
+          animeId,
+          parsedBasename: parsed.parsedBasename,
+          parsedTitle: parsed.parsedTitle,
+          parsedSeason: parsed.parsedSeason,
+          parsedEpisode: parsed.parsedEpisode,
+          parserSource: parsed.parserSource,
+          parserConfidence: parsed.parserConfidence,
+          parseMetadataJson: parsed.parseMetadataJson,
+        });
+      } catch (error) {
+        this.logger.warn('Unable to capture anime metadata', (error as Error).message);
+      }
+    })();
+
+    this.pendingAnimeMetadataUpdates.set(videoId, updatePromise);
+    void updatePromise.finally(() => {
+      this.pendingAnimeMetadataUpdates.delete(videoId);
+    });
+  }
+
  private updateVideoTitleForActiveSession(canonicalTitle: string): void {
    if (!this.sessionState) return;
    updateVideoTitleRecord(this.db, this.sessionState.videoId, canonicalTitle);
@@ -0,0 +1,976 @@
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import test from 'node:test';
+import { Database } from '../sqlite.js';
+import {
+  createTrackerPreparedStatements,
+  ensureSchema,
+  getOrCreateAnimeRecord,
+  getOrCreateVideoRecord,
+  linkVideoToAnimeRecord,
+} from '../storage.js';
+import { startSessionRecord } from '../session.js';
+import {
+  cleanupVocabularyStats,
+  getAnimeDetail,
+  getAnimeEpisodes,
+  getAnimeLibrary,
+  getKanjiOccurrences,
+  getSessionSummaries,
+  getVocabularyStats,
+  getKanjiStats,
+  getSessionEvents,
+  getWordOccurrences,
+} from '../query.js';
+import { SOURCE_TYPE_LOCAL, EVENT_SUBTITLE_LINE } from '../types.js';
+
+function makeDbPath(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-imm-query-test-'));
+  return path.join(dir, 'immersion.sqlite');
+}
+
+function cleanupDbPath(dbPath: string): void {
+  const dir = path.dirname(dbPath);
+  if (!fs.existsSync(dir)) {
+    return;
+  }
+
+  const bunRuntime = globalThis as typeof globalThis & {
+    Bun?: {
+      gc?: (force?: boolean) => void;
+    };
+  };
+  let lastError: NodeJS.ErrnoException | null = null;
+  for (let attempt = 0; attempt < 3; attempt += 1) {
+    try {
+      fs.rmSync(dir, { recursive: true, force: true });
+      return;
+    } catch (error) {
+      const err = error as NodeJS.ErrnoException;
+      lastError = err;
+      if (process.platform !== 'win32' || err.code !== 'EBUSY') {
+        throw error;
+      }
+      bunRuntime.Bun?.gc?.(true);
+      Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 25);
+    }
+  }
+  if (lastError) {
+    throw lastError;
+  }
+}
+
+test('getSessionSummaries returns sessionId and canonicalTitle', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/query-test.mkv', {
+      canonicalTitle: 'Query Test Episode',
+      sourcePath: '/tmp/query-test.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    const startedAtMs = 1_000_000;
+    const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
+
+    stmts.telemetryInsertStmt.run(
+      sessionId,
+      startedAtMs + 1_000,
+      3_000,
+      2_500,
+      5,
+      10,
+      10,
+      1,
+      2,
+      1,
+      0,
+      0,
+      0,
+      0,
+      0,
+      startedAtMs + 1_000,
+      startedAtMs + 1_000,
+    );
+
+    const rows = getSessionSummaries(db, 10);
+
+    assert.ok(rows.length >= 1);
+    const row = rows.find((r) => r.sessionId === sessionId);
+    assert.ok(row, 'expected to find a row for the created session');
+    assert.equal(typeof row.sessionId, 'number');
+    assert.equal(row.sessionId, sessionId);
+    assert.equal(row.canonicalTitle, 'Query Test Episode');
+    assert.equal(row.videoId, videoId);
+    assert.ok(row.linesSeen >= 5);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getSessionSummaries with no telemetry returns zero aggregates', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/no-telemetry.mkv', {
+      canonicalTitle: 'No Telemetry',
+      sourcePath: '/tmp/no-telemetry.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    const { sessionId } = startSessionRecord(db, videoId, 3_000_000);
+
+    const rows = getSessionSummaries(db, 10);
+    const row = rows.find((r) => r.sessionId === sessionId);
+    assert.ok(row, 'expected to find the session with no telemetry');
+    assert.equal(row.canonicalTitle, 'No Telemetry');
+    assert.equal(row.totalWatchedMs, 0);
+    assert.equal(row.linesSeen, 0);
+    assert.equal(row.cardsMined, 0);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getVocabularyStats returns rows ordered by frequency descending', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    // Insert words: 猫 twice, 犬 once
+    stmts.wordUpsertStmt.run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 2_000);
+    stmts.wordUpsertStmt.run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 3_000);
+    stmts.wordUpsertStmt.run('犬', '犬', 'いぬ', 'noun', '名詞', '一般', '', 1_500, 1_500);
+
+    const rows = getVocabularyStats(db, 10);
+
+    assert.ok(rows.length >= 2);
+    // First row should be 猫 (frequency 2)
+    const nekRow = rows.find((r) => r.headword === '猫');
+    const inuRow = rows.find((r) => r.headword === '犬');
+    assert.ok(nekRow, 'expected 猫 row');
+    assert.ok(inuRow, 'expected 犬 row');
+    assert.equal(nekRow.headword, '猫');
+    assert.equal(nekRow.word, '猫');
+    assert.equal(nekRow.reading, 'ねこ');
+    assert.equal(nekRow.frequency, 2);
+    assert.equal(typeof nekRow.firstSeen, 'number');
+    assert.equal(typeof nekRow.lastSeen, 'number');
+    // Higher frequency should come first
+    const nekIdx = rows.indexOf(nekRow);
+    const inuIdx = rows.indexOf(inuRow);
+    assert.ok(nekIdx < inuIdx, 'higher frequency word should appear first');
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getVocabularyStats returns empty array when no words exist', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const rows = getVocabularyStats(db, 10);
+    assert.deepEqual(rows, []);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('cleanupVocabularyStats repairs stored POS metadata and removes excluded imm_words rows', async () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    db.prepare(
+      `INSERT INTO imm_words (
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 1_500, 3);
+    db.prepare(
+      `INSERT INTO imm_words (
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('知っている', '知っている', '', 'other', '動詞', '自立', '', 1_025, 1_525, 4);
+    db.prepare(
+      `INSERT INTO imm_words (
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('は', 'は', 'は', 'particle', '助詞', '係助詞', '', 1_100, 1_600, 9);
+    db.prepare(
+      `INSERT INTO imm_words (
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('旧', '旧', '', '', '', '', '', 900, 950, 1);
+    db.prepare(
+      `INSERT INTO imm_words (
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    ).run('未解決', '未解決', '', '', '', '', '', 901, 951, 1);
+
+    const result = await cleanupVocabularyStats(db, {
+      resolveLegacyPos: async (row) => {
+        if (row.headword === '旧') {
+          return {
+            partOfSpeech: 'noun',
+            headword: '旧',
+            reading: 'きゅう',
+            pos1: '名詞',
+            pos2: '一般',
+            pos3: '',
+          };
+        }
+        if (row.headword === '知っている') {
+          return {
+            partOfSpeech: 'verb',
+            headword: '知る',
+            reading: 'しっている',
+            pos1: '動詞',
+            pos2: '自立',
+            pos3: '',
+          };
+        }
+        return null;
+      },
+    });
+    const rows = getVocabularyStats(db, 10);
+    const repairedRows = db
+      .prepare(
+        `SELECT headword, word, reading, part_of_speech, pos1, pos2
+         FROM imm_words
+         ORDER BY headword ASC, word ASC`,
+      )
+      .all() as Array<{
+      headword: string;
+      word: string;
+      reading: string;
+      part_of_speech: string;
+      pos1: string;
+      pos2: string;
+    }>;
+
+    assert.deepEqual(result, { scanned: 5, kept: 3, deleted: 2, repaired: 2 });
+    assert.deepEqual(
+      rows.map((row) => ({ headword: row.headword, frequency: row.frequency })),
+      [
+        { headword: '知る', frequency: 4 },
+        { headword: '猫', frequency: 3 },
+        { headword: '旧', frequency: 1 },
+      ],
+    );
+    assert.deepEqual(
+      repairedRows,
+      [
+        {
+          headword: '旧',
+          word: '旧',
+          reading: 'きゅう',
+          part_of_speech: 'noun',
+          pos1: '名詞',
+          pos2: '一般',
+        },
+        {
+          headword: '猫',
+          word: '猫',
+          reading: 'ねこ',
+          part_of_speech: 'noun',
+          pos1: '名詞',
+          pos2: '一般',
+        },
+        {
+          headword: '知る',
+          word: '知っている',
+          reading: 'しっている',
+          part_of_speech: 'verb',
+          pos1: '動詞',
+          pos2: '自立',
+        },
+      ],
+    );
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('cleanupVocabularyStats merges repaired duplicates instead of violating the imm_words unique key', async () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cleanup-merge.mkv', {
+      canonicalTitle: 'Cleanup Merge',
+      sourcePath: '/tmp/cleanup-merge.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    const { sessionId } = startSessionRecord(db, videoId, 2_000_000);
+    const duplicateResult = db
+      .prepare(
+        `INSERT INTO imm_words (
+          headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run('知る', '知っている', 'しっている', 'verb', '動詞', '自立', '', 2_000, 2_500, 3);
+    const legacyResult = db
+      .prepare(
+        `INSERT INTO imm_words (
+          headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run('知っている', '知っている', '', 'other', '動詞', '自立', '', 1_000, 3_000, 4);
+    const lineResult = db
+      .prepare(
+        `INSERT INTO imm_subtitle_lines (
+          session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(sessionId, null, videoId, null, 1, 0, 1000, '知っている', 2_000, 2_000);
+    const lineId = Number(lineResult.lastInsertRowid);
+    const duplicateId = Number(duplicateResult.lastInsertRowid);
+    const legacyId = Number(legacyResult.lastInsertRowid);
+    db.prepare(
+      `INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
+       VALUES (?, ?, ?)`,
+    ).run(lineId, duplicateId, 2);
+    db.prepare(
+      `INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
+       VALUES (?, ?, ?)`,
+    ).run(lineId, legacyId, 1);
+
+    const result = await cleanupVocabularyStats(db, {
+      resolveLegacyPos: async (row) => {
+        if (row.id !== legacyId) {
+          return null;
+        }
+        return {
+          partOfSpeech: 'verb',
+          headword: '知る',
+          reading: 'しっている',
+          pos1: '動詞',
+          pos2: '自立',
+          pos3: '',
+        };
+      },
+    });
+
+    const rows = db
+      .prepare(
+        `SELECT id, headword, word, reading, frequency, first_seen, last_seen
+         FROM imm_words
+         ORDER BY id ASC`,
+      )
+      .all() as Array<{
+      id: number;
+      headword: string;
+      word: string;
+      reading: string;
+      frequency: number;
+      first_seen: number;
+      last_seen: number;
+    }>;
+    const occurrences = getWordOccurrences(db, '知る', '知っている', 'しっている', 10);
+
+    assert.deepEqual(result, { scanned: 2, kept: 1, deleted: 1, repaired: 1 });
+    assert.deepEqual(rows, [
+      {
+        id: duplicateId,
+        headword: '知る',
+        word: '知っている',
+        reading: 'しっている',
+        frequency: 7,
+        first_seen: 1_000,
+        last_seen: 3_000,
+      },
+    ]);
+    assert.deepEqual(occurrences, [
+      {
+        animeId: null,
+        animeTitle: null,
+        videoId,
+        videoTitle: 'Cleanup Merge',
+        sessionId,
+        lineIndex: 1,
+        segmentStartMs: 0,
+        segmentEndMs: 1000,
+        text: '知っている',
+        occurrenceCount: 3,
+      },
+    ]);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getKanjiStats returns rows ordered by frequency descending', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    // Insert kanji: 日 twice, 月 once
+    stmts.kanjiUpsertStmt.run('日', 1_000, 2_000);
+    stmts.kanjiUpsertStmt.run('日', 1_000, 3_000);
+    stmts.kanjiUpsertStmt.run('月', 1_500, 1_500);
+
+    const rows = getKanjiStats(db, 10);
+
+    assert.ok(rows.length >= 2);
+    const nichiRow = rows.find((r) => r.kanji === '日');
+    const tsukiRow = rows.find((r) => r.kanji === '月');
+    assert.ok(nichiRow, 'expected 日 row');
+    assert.ok(tsukiRow, 'expected 月 row');
+    assert.equal(nichiRow.kanji, '日');
+    assert.equal(nichiRow.frequency, 2);
+    assert.equal(typeof nichiRow.firstSeen, 'number');
+    assert.equal(typeof nichiRow.lastSeen, 'number');
+    // Higher frequency should come first
+    const nichiIdx = rows.indexOf(nichiRow);
+    const tsukiIdx = rows.indexOf(tsukiRow);
+    assert.ok(nichiIdx < tsukiIdx, 'higher frequency kanji should appear first');
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getKanjiStats returns empty array when no kanji exist', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const rows = getKanjiStats(db, 10);
+    assert.deepEqual(rows, []);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getSessionEvents returns events ordered by ts_ms ascending', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/events-test.mkv', {
+      canonicalTitle: 'Events Test',
+      sourcePath: '/tmp/events-test.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    const startedAtMs = 5_000_000;
+    const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
+
+    // Insert two events at different timestamps
+    stmts.eventInsertStmt.run(
+      sessionId,
+      startedAtMs + 2_000,
+      EVENT_SUBTITLE_LINE,
+      1,
+      0,
+      800,
+      2,
+      0,
+      '{"line":"second"}',
+      startedAtMs + 2_000,
+      startedAtMs + 2_000,
+    );
+    stmts.eventInsertStmt.run(
+      sessionId,
+      startedAtMs + 1_000,
+      EVENT_SUBTITLE_LINE,
+      0,
+      0,
+      600,
+      3,
+      0,
+      '{"line":"first"}',
+      startedAtMs + 1_000,
+      startedAtMs + 1_000,
+    );
+
+    const events = getSessionEvents(db, sessionId, 50);
+
+    assert.equal(events.length, 2);
+    // Should be ordered ASC by ts_ms
+    assert.equal(events[0]!.tsMs, startedAtMs + 1_000);
+    assert.equal(events[1]!.tsMs, startedAtMs + 2_000);
+    assert.equal(events[0]!.eventType, EVENT_SUBTITLE_LINE);
+    assert.equal(events[0]!.payload, '{"line":"first"}');
+    assert.equal(events[1]!.payload, '{"line":"second"}');
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getSessionEvents returns empty array for session with no events', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const events = getSessionEvents(db, 9999, 50);
+    assert.deepEqual(events, []);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getSessionEvents respects limit parameter', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/events-limit.mkv', {
+      canonicalTitle: 'Events Limit Test',
+      sourcePath: '/tmp/events-limit.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    const startedAtMs = 7_000_000;
+    const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
+
+    // Insert 5 events
+    for (let i = 0; i < 5; i += 1) {
+      stmts.eventInsertStmt.run(
+        sessionId,
+        startedAtMs + i * 1_000,
+        EVENT_SUBTITLE_LINE,
+        i,
+        0,
+        500,
+        1,
+        0,
+        null,
+        startedAtMs + i * 1_000,
+        startedAtMs + i * 1_000,
+      );
+    }
+
+    const limited = getSessionEvents(db, sessionId, 3);
+    assert.equal(limited.length, 3);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('anime-level queries group by anime_id and preserve episode-level rows', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    const lwaAnimeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Little Witch Academia',
+      canonicalTitle: 'Little Witch Academia',
+      anilistId: 33_435,
+      titleRomaji: 'Little Witch Academia',
+      titleEnglish: 'Little Witch Academia',
+      titleNative: 'リトルウィッチアカデミア',
+      metadataJson: '{"source":"anilist"}',
+    });
+    const frierenAnimeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Frieren',
+      canonicalTitle: 'Frieren',
+      anilistId: 52_921,
+      titleRomaji: 'Sousou no Frieren',
+      titleEnglish: 'Frieren: Beyond Journey\'s End',
+      titleNative: '葬送のフリーレン',
+      metadataJson: '{"source":"anilist"}',
+    });
+
+    const lwaEpisode5 = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
+      canonicalTitle: 'Episode 5',
+      sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    const lwaEpisode6 = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
+      canonicalTitle: 'Episode 6',
+      sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    const frierenEpisode3 = getOrCreateVideoRecord(db, 'local:/tmp/frieren-03.mkv', {
+      canonicalTitle: 'Episode 3',
+      sourcePath: '/tmp/[SubsPlease] Frieren - 03 - Departure.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    linkVideoToAnimeRecord(db, lwaEpisode5, {
+      animeId: lwaAnimeId,
+      parsedBasename: 'Little Witch Academia S02E05.mkv',
+      parsedTitle: 'Little Witch Academia',
+      parsedSeason: 2,
+      parsedEpisode: 5,
+      parserSource: 'fallback',
+      parserConfidence: 1,
+      parseMetadataJson: '{"episode":5}',
+    });
+    linkVideoToAnimeRecord(db, lwaEpisode6, {
+      animeId: lwaAnimeId,
+      parsedBasename: 'Little Witch Academia S02E06.mkv',
+      parsedTitle: 'Little Witch Academia',
+      parsedSeason: 2,
+      parsedEpisode: 6,
+      parserSource: 'fallback',
+      parserConfidence: 1,
+      parseMetadataJson: '{"episode":6}',
+    });
+    linkVideoToAnimeRecord(db, frierenEpisode3, {
+      animeId: frierenAnimeId,
+      parsedBasename: '[SubsPlease] Frieren - 03 - Departure.mkv',
+      parsedTitle: 'Frieren',
+      parsedSeason: 1,
+      parsedEpisode: 3,
+      parserSource: 'fallback',
+      parserConfidence: 0.6,
+      parseMetadataJson: '{"episode":3}',
+    });
+
+    const sessionA = startSessionRecord(db, lwaEpisode5, 1_000_000);
+    const sessionB = startSessionRecord(db, lwaEpisode5, 1_010_000);
+    const sessionC = startSessionRecord(db, lwaEpisode6, 1_020_000);
+    const sessionD = startSessionRecord(db, frierenEpisode3, 1_030_000);
+
+    stmts.telemetryInsertStmt.run(
+      sessionA.sessionId,
+      1_001_000,
+      4_000,
+      3_000,
+      10,
+      25,
+      25,
+      1,
+      3,
+      2,
+      0,
+      0,
+      0,
+      0,
+      0,
+      1_001_000,
+      1_001_000,
+    );
+    stmts.telemetryInsertStmt.run(
+      sessionB.sessionId,
+      1_011_000,
+      5_000,
+      4_000,
+      11,
+      27,
+      27,
+      2,
+      4,
+      2,
+      0,
+      0,
+      0,
+      0,
+      0,
+      1_011_000,
+      1_011_000,
+    );
+    stmts.telemetryInsertStmt.run(
+      sessionC.sessionId,
+      1_021_000,
+      6_000,
+      5_000,
+      12,
+      28,
+      28,
+      3,
+      5,
+      4,
+      0,
+      0,
+      0,
+      0,
+      0,
+      1_021_000,
+      1_021_000,
+    );
+    stmts.telemetryInsertStmt.run(
+      sessionD.sessionId,
+      1_031_000,
+      4_000,
+      3_500,
+      8,
+      20,
+      20,
+      1,
+      2,
+      1,
+      0,
+      0,
+      0,
+      0,
+      0,
+      1_031_000,
+      1_031_000,
+    );
+
+    const animeLibrary = getAnimeLibrary(db);
+    assert.equal(animeLibrary.length, 2);
+    assert.deepEqual(
+      animeLibrary.map((row) => ({
+        animeId: row.animeId,
+        canonicalTitle: row.canonicalTitle,
+        totalSessions: row.totalSessions,
+        totalActiveMs: row.totalActiveMs,
+        totalCards: row.totalCards,
+        episodeCount: row.episodeCount,
+      })),
+      [
+        {
+          animeId: lwaAnimeId,
+          canonicalTitle: 'Little Witch Academia',
+          totalSessions: 3,
+          totalActiveMs: 12_000,
+          totalCards: 6,
+          episodeCount: 2,
+        },
+        {
+          animeId: frierenAnimeId,
+          canonicalTitle: 'Frieren',
+          totalSessions: 1,
+          totalActiveMs: 3_500,
+          totalCards: 1,
+          episodeCount: 1,
+        },
+      ],
+    );
+
+    const animeDetail = getAnimeDetail(db, lwaAnimeId);
+    assert.ok(animeDetail);
+    assert.equal(animeDetail?.animeId, lwaAnimeId);
+    assert.equal(animeDetail?.canonicalTitle, 'Little Witch Academia');
+    assert.equal(animeDetail?.anilistId, 33_435);
+    assert.equal(animeDetail?.totalSessions, 3);
+    assert.equal(animeDetail?.totalActiveMs, 12_000);
+    assert.equal(animeDetail?.totalCards, 6);
+    assert.equal(animeDetail?.totalWordsSeen, 80);
+    assert.equal(animeDetail?.totalLinesSeen, 33);
+    assert.equal(animeDetail?.totalLookupCount, 12);
+    assert.equal(animeDetail?.totalLookupHits, 8);
+    assert.equal(animeDetail?.episodeCount, 2);
+
+    const episodes = getAnimeEpisodes(db, lwaAnimeId);
+    assert.deepEqual(
+      episodes.map((row) => ({
+        videoId: row.videoId,
+        season: row.season,
+        episode: row.episode,
+        totalSessions: row.totalSessions,
+        totalActiveMs: row.totalActiveMs,
+        totalCards: row.totalCards,
+      })),
+      [
+        {
+          videoId: lwaEpisode5,
+          season: 2,
+          episode: 5,
+          totalSessions: 2,
+          totalActiveMs: 7_000,
+          totalCards: 3,
+        },
+        {
+          videoId: lwaEpisode6,
+          season: 2,
+          episode: 6,
+          totalSessions: 1,
+          totalActiveMs: 5_000,
+          totalCards: 3,
+        },
+      ],
+    );
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getWordOccurrences maps a normalized word back to anime, video, and subtitle line context', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const animeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Little Witch Academia',
+      canonicalTitle: 'Little Witch Academia',
+      anilistId: null,
+      titleRomaji: null,
+      titleEnglish: null,
+      titleNative: null,
+      metadataJson: '{"source":"test"}',
+    });
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e04.mkv', {
+      canonicalTitle: 'Episode 4',
+      sourcePath: '/tmp/Little Witch Academia S02E04.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    linkVideoToAnimeRecord(db, videoId, {
+      animeId,
+      parsedBasename: 'Little Witch Academia S02E04.mkv',
+      parsedTitle: 'Little Witch Academia',
+      parsedSeason: 2,
+      parsedEpisode: 4,
+      parserSource: 'fallback',
+      parserConfidence: 1,
+      parseMetadataJson: '{"episode":4}',
+    });
+    const { sessionId } = startSessionRecord(db, videoId, 1_000_000);
+    const wordResult = db
+      .prepare(
+        `INSERT INTO imm_words (
+          headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run('猫', '猫', 'ねこ', 'noun', '名詞', '一般', '', 1_000, 1_500, 4);
+    const lineResult = db
+      .prepare(
+        `INSERT INTO imm_subtitle_lines (
+          session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(sessionId, null, videoId, animeId, 1, 0, 1000, '猫 猫 日 日 は', 1_000, 1_000);
+    db.prepare(
+      `INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count)
+       VALUES (?, ?, ?)`,
+    ).run(Number(lineResult.lastInsertRowid), Number(wordResult.lastInsertRowid), 2);
+
+    const rows = getWordOccurrences(db, '猫', '猫', 'ねこ', 10);
+
+    assert.deepEqual(rows, [
+      {
+        animeId,
+        animeTitle: 'Little Witch Academia',
+        videoId,
+        videoTitle: 'Episode 4',
+        sessionId,
+        lineIndex: 1,
+        segmentStartMs: 0,
+        segmentEndMs: 1000,
+        text: '猫 猫 日 日 は',
+        occurrenceCount: 2,
+      },
+    ]);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('getKanjiOccurrences maps a kanji back to anime, video, and subtitle line context', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const animeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Frieren',
+      canonicalTitle: 'Frieren',
+      anilistId: null,
+      titleRomaji: null,
+      titleEnglish: null,
+      titleNative: null,
+      metadataJson: '{"source":"test"}',
+    });
+    const videoId = getOrCreateVideoRecord(db, 'local:/tmp/frieren-03.mkv', {
+      canonicalTitle: 'Episode 3',
+      sourcePath: '/tmp/[SubsPlease] Frieren - 03 - Departure.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    linkVideoToAnimeRecord(db, videoId, {
+      animeId,
+      parsedBasename: '[SubsPlease] Frieren - 03 - Departure.mkv',
+      parsedTitle: 'Frieren',
+      parsedSeason: 1,
+      parsedEpisode: 3,
+      parserSource: 'fallback',
+      parserConfidence: 1,
+      parseMetadataJson: '{"episode":3}',
+    });
+    const { sessionId } = startSessionRecord(db, videoId, 2_000_000);
+    const kanjiResult = db
+      .prepare(
+        `INSERT INTO imm_kanji (
+          kanji, first_seen, last_seen, frequency
+        ) VALUES (?, ?, ?, ?)`,
+      )
+      .run('日', 2_000, 2_500, 8);
+    const lineResult = db
+      .prepare(
+        `INSERT INTO imm_subtitle_lines (
+          session_id, event_id, video_id, anime_id, line_index, segment_start_ms, segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(sessionId, null, videoId, animeId, 3, 5000, 6500, '今日は日曜', 2_000, 2_000);
+    db.prepare(
+      `INSERT INTO imm_kanji_line_occurrences (line_id, kanji_id, occurrence_count)
+       VALUES (?, ?, ?)`,
+    ).run(Number(lineResult.lastInsertRowid), Number(kanjiResult.lastInsertRowid), 2);
+
+    const rows = getKanjiOccurrences(db, '日', 10);
+
+    assert.deepEqual(rows, [
+      {
+        animeId,
+        animeTitle: 'Frieren',
+        videoId,
+        videoTitle: 'Episode 3',
+        sessionId,
+        lineIndex: 3,
+        segmentStartMs: 5000,
+        segmentEndMs: 6500,
+        text: '今日は日曜',
+        occurrenceCount: 2,
+      },
+    ]);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
@@ -0,0 +1,71 @@
+import type { Token } from '../../../types';
+import type { LegacyVocabularyPosResolution } from './types';
+import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
+
+const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
+const KATAKANA_CODEPOINT_START = 0x30a1;
+const KATAKANA_CODEPOINT_END = 0x30f6;
+
+function normalizeLookupText(value: string | null | undefined): string {
+  return typeof value === 'string' ? value.trim() : '';
+}
+
+function katakanaToHiragana(text: string): string {
+  let normalized = '';
+  for (const char of text) {
+    const code = char.codePointAt(0);
+    if (code === undefined) {
+      continue;
+    }
+    if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
+      normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
+      continue;
+    }
+    normalized += char;
+  }
+  return normalized;
+}
+
+function toResolution(token: Token): LegacyVocabularyPosResolution {
+  return {
+    headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
+    reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
+    partOfSpeech: deriveStoredPartOfSpeech({
+      partOfSpeech: token.partOfSpeech,
+      pos1: token.pos1,
+    }),
+    pos1: normalizeLookupText(token.pos1),
+    pos2: normalizeLookupText(token.pos2),
+    pos3: normalizeLookupText(token.pos3),
+  };
+}
+
+export function resolveLegacyVocabularyPosFromTokens(
+  lookupText: string,
+  tokens: Token[] | null,
+): LegacyVocabularyPosResolution | null {
+  const normalizedLookup = normalizeLookupText(lookupText);
+  if (!normalizedLookup || !tokens || tokens.length === 0) {
+    return null;
+  }
+
+  const exactSurfaceMatches = tokens.filter(
+    (token) => normalizeLookupText(token.word) === normalizedLookup,
+  );
+  if (exactSurfaceMatches.length === 1) {
+    return toResolution(exactSurfaceMatches[0]!);
+  }
+
+  const exactHeadwordMatches = tokens.filter(
+    (token) => normalizeLookupText(token.headword) === normalizedLookup,
+  );
+  if (exactHeadwordMatches.length === 1) {
+    return toResolution(exactHeadwordMatches[0]!);
+  }
+
+  if (tokens.length === 1) {
+    return toResolution(tokens[0]!);
+  }
+
+  return null;
+}
@@ -112,35 +112,46 @@ function upsertDailyRollupsForGroups(
      words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
    )
    SELECT
-      CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
+      CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
      s.video_id AS video_id,
      COUNT(DISTINCT s.session_id) AS total_sessions,
-      COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
-      COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
-      COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
-      COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
-      COALESCE(SUM(t.cards_mined), 0) AS total_cards,
+      COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
+      COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
+      COALESCE(SUM(sm.max_words), 0) AS total_words_seen,
+      COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
+      COALESCE(SUM(sm.max_cards), 0) AS total_cards,
      CASE
-        WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
-          THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
+        WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
+          THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
        ELSE NULL
      END AS cards_per_hour,
      CASE
-        WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
-          THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
+        WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
+          THEN COALESCE(SUM(sm.max_words), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
        ELSE NULL
      END AS words_per_min,
      CASE
-        WHEN COALESCE(SUM(t.lookup_count), 0) > 0
-          THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
+        WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
+          THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
        ELSE NULL
      END AS lookup_hit_rate,
      ? AS CREATED_DATE,
      ? AS LAST_UPDATE_DATE
    FROM imm_sessions s
-    JOIN imm_session_telemetry t
-      ON t.session_id = s.session_id
-    WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
+    JOIN (
+      SELECT
+        t.session_id,
+        MAX(t.active_watched_ms) AS max_active_ms,
+        MAX(t.lines_seen) AS max_lines,
+        MAX(t.words_seen) AS max_words,
+        MAX(t.tokens_seen) AS max_tokens,
+        MAX(t.cards_mined) AS max_cards,
+        MAX(t.lookup_count) AS max_lookups,
+        MAX(t.lookup_hits) AS max_hits
+      FROM imm_session_telemetry t
+      GROUP BY t.session_id
+    ) sm ON s.session_id = sm.session_id
+    WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
    GROUP BY rollup_day, s.video_id
    ON CONFLICT (rollup_day, video_id) DO UPDATE SET
      total_sessions = excluded.total_sessions,
@@ -176,20 +187,29 @@ function upsertMonthlyRollupsForGroups(
      total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
    )
    SELECT
-      CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
+      CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
      s.video_id AS video_id,
      COUNT(DISTINCT s.session_id) AS total_sessions,
-      COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
-      COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
-      COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
-      COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
-      COALESCE(SUM(t.cards_mined), 0) AS total_cards,
+      COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
+      COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
+      COALESCE(SUM(sm.max_words), 0) AS total_words_seen,
+      COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
+      COALESCE(SUM(sm.max_cards), 0) AS total_cards,
      ? AS CREATED_DATE,
      ? AS LAST_UPDATE_DATE
    FROM imm_sessions s
-    JOIN imm_session_telemetry t
-      ON t.session_id = s.session_id
-    WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
+    JOIN (
+      SELECT
+        t.session_id,
+        MAX(t.active_watched_ms) AS max_active_ms,
+        MAX(t.lines_seen) AS max_lines,
+        MAX(t.words_seen) AS max_words,
+        MAX(t.tokens_seen) AS max_tokens,
+        MAX(t.cards_mined) AS max_cards
+      FROM imm_session_telemetry t
+      GROUP BY t.session_id
+    ) sm ON s.session_id = sm.session_id
+    WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
    GROUP BY rollup_month, s.video_id
    ON CONFLICT (rollup_month, video_id) DO UPDATE SET
      total_sessions = excluded.total_sessions,
@@ -216,8 +236,8 @@ function getAffectedRollupGroups(
      .prepare(
        `
          SELECT DISTINCT
-            CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
-            CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
+            CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
+            CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
            s.video_id AS video_id
          FROM imm_session_telemetry t
          JOIN imm_sessions s
@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
 import test from 'node:test';
 import type { spawn as spawnFn } from 'node:child_process';
 import { SOURCE_TYPE_LOCAL } from './types';
-import { getLocalVideoMetadata, runFfprobe } from './metadata';
+import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';

 type Spawn = typeof spawnFn;

@@ -146,3 +146,79 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
  assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
  assert.equal(hashFallbackMetadata.hashSha256, null);
 });
+
+test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
+  const seenTargets: string[] = [];
+  const parsed = await guessAnimeVideoMetadata('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5', {
+    runGuessit: async (target) => {
+      seenTargets.push(target);
+      return JSON.stringify({
+        title: 'Little Witch Academia',
+        season: 2,
+        episode: 5,
+      });
+    },
+  });
+
+  assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
+  assert.deepEqual(parsed, {
+    parsedBasename: 'Little Witch Academia S02E05.mkv',
+    parsedTitle: 'Little Witch Academia',
+    parsedSeason: 2,
+    parsedEpisode: 5,
+    parserSource: 'guessit',
+    parserConfidence: 1,
+    parseMetadataJson: JSON.stringify({
+      filename: 'Little Witch Academia S02E05.mkv',
+      source: 'guessit',
+    }),
+  });
+});
+
+test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
+  const parsed = await guessAnimeVideoMetadata('/tmp/Little Witch Academia S02E05.mkv', 'Episode 5', {
+    runGuessit: async () => {
+      throw new Error('guessit unavailable');
+    },
+  });
+
+  assert.deepEqual(parsed, {
+    parsedBasename: 'Little Witch Academia S02E05.mkv',
+    parsedTitle: 'Little Witch Academia',
+    parsedSeason: 2,
+    parsedEpisode: 5,
+    parserSource: 'fallback',
+    parserConfidence: 1,
+    parseMetadataJson: JSON.stringify({
+      confidence: 'high',
+      filename: 'Little Witch Academia S02E05.mkv',
+      rawTitle: 'Little Witch Academia S02E05',
+      source: 'fallback',
+    }),
+  });
+});
+
+test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
+  const parsed = await guessAnimeVideoMetadata(
+    '/tmp/[SubsPlease] Frieren - 03 (1080p).mkv',
+    null,
+    {
+      runGuessit: async () => JSON.stringify({ episode: 3 }),
+    },
+  );
+
+  assert.deepEqual(parsed, {
+    parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
+    parsedTitle: 'Frieren - 03 (1080p)',
+    parsedSeason: null,
+    parsedEpisode: null,
+    parserSource: 'fallback',
+    parserConfidence: 0.2,
+    parseMetadataJson: JSON.stringify({
+      confidence: 'low',
+      filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
+      rawTitle: 'Frieren - 03 (1080p)',
+      source: 'fallback',
+    }),
+  });
+});
@@ -1,6 +1,13 @@
 import crypto from 'node:crypto';
 import { spawn as nodeSpawn } from 'node:child_process';
 import * as fs from 'node:fs';
+import path from 'node:path';
+import { parseMediaInfo } from '../../../jimaku/utils';
+import {
+  guessAnilistMediaInfo,
+  runGuessit,
+  type GuessAnilistMediaInfoDeps,
+} from '../anilist/anilist-updater';
 import {
  deriveCanonicalTitle,
  emptyMetadata,
@@ -8,7 +15,12 @@ import {
  parseFps,
  toNullableInt,
 } from './reducer';
-import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
+import {
+  SOURCE_TYPE_LOCAL,
+  type ParsedAnimeVideoGuess,
+  type ProbeMetadata,
+  type VideoMetadata,
+} from './types';

 type SpawnFn = typeof nodeSpawn;

@@ -24,6 +36,21 @@ interface MetadataDeps {
  fs?: FsDeps;
 }

+interface GuessAnimeVideoMetadataDeps {
+  runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
+}
+
+function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
+  switch (confidence) {
+    case 'high':
+      return 1;
+    case 'medium':
+      return 0.6;
+    default:
+      return 0.2;
+  }
+}
+
 export async function computeSha256(
  mediaPath: string,
  deps: MetadataDeps = {},
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
    metadataJson: null,
  };
 }
+
+export async function guessAnimeVideoMetadata(
+  mediaPath: string | null,
+  mediaTitle: string | null,
+  deps: GuessAnimeVideoMetadataDeps = {},
+): Promise<ParsedAnimeVideoGuess | null> {
+  const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
+    runGuessit: deps.runGuessit ?? runGuessit,
+  });
+  if (!parsed) {
+    return null;
+  }
+
+  const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
+  if (parsed.source === 'guessit') {
+    return {
+      parsedBasename,
+      parsedTitle: parsed.title,
+      parsedSeason: parsed.season,
+      parsedEpisode: parsed.episode,
+      parserSource: 'guessit',
+      parserConfidence: 1,
+      parseMetadataJson: JSON.stringify({
+        filename: parsedBasename,
+        source: 'guessit',
+      }),
+    };
+  }
+
+  const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
+  return {
+    parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
+    parsedTitle: parsed.title,
+    parsedSeason: parsed.season,
+    parsedEpisode: parsed.episode,
+    parserSource: 'fallback',
+    parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
+    parseMetadataJson: JSON.stringify({
+      confidence: fallbackInfo.confidence,
+      filename: fallbackInfo.filename,
+      rawTitle: fallbackInfo.rawTitle,
+      source: 'fallback',
+    }),
+  };
+}
@@ -30,6 +30,7 @@ export function createInitialSessionState(
    lastPauseStartMs: null,
    isPaused: false,
    pendingTelemetry: true,
+    markedWatched: false,
  };
 }

@@ -9,7 +9,9 @@ import {
  createTrackerPreparedStatements,
  ensureSchema,
  executeQueuedWrite,
+  getOrCreateAnimeRecord,
  getOrCreateVideoRecord,
+  linkVideoToAnimeRecord,
 } from './storage';
 import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';

@@ -60,6 +62,7 @@ test('ensureSchema creates immersion core tables', () => {
    const tableNames = new Set(rows.map((row) => row.name));

    assert.ok(tableNames.has('imm_videos'));
+    assert.ok(tableNames.has('imm_anime'));
    assert.ok(tableNames.has('imm_sessions'));
    assert.ok(tableNames.has('imm_session_telemetry'));
    assert.ok(tableNames.has('imm_session_events'));
@@ -67,8 +70,28 @@ test('ensureSchema creates immersion core tables', () => {
    assert.ok(tableNames.has('imm_monthly_rollups'));
    assert.ok(tableNames.has('imm_words'));
    assert.ok(tableNames.has('imm_kanji'));
+    assert.ok(tableNames.has('imm_subtitle_lines'));
+    assert.ok(tableNames.has('imm_word_line_occurrences'));
+    assert.ok(tableNames.has('imm_kanji_line_occurrences'));
    assert.ok(tableNames.has('imm_rollup_state'));

+    const videoColumns = new Set(
+      (
+        db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
+          name: string;
+        }>
+      ).map((row) => row.name),
+    );
+
+    assert.ok(videoColumns.has('anime_id'));
+    assert.ok(videoColumns.has('parsed_basename'));
+    assert.ok(videoColumns.has('parsed_title'));
+    assert.ok(videoColumns.has('parsed_season'));
+    assert.ok(videoColumns.has('parsed_episode'));
+    assert.ok(videoColumns.has('parser_source'));
+    assert.ok(videoColumns.has('parser_confidence'));
+    assert.ok(videoColumns.has('parse_metadata_json'));
+
    const rollupStateRow = db
      .prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
      .get('last_rollup_sample_ms') as {
@@ -82,6 +105,470 @@ test('ensureSchema creates immersion core tables', () => {
  }
 });

+test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    db.exec(`
+      CREATE TABLE imm_schema_version (
+        schema_version INTEGER PRIMARY KEY,
+        applied_at_ms INTEGER NOT NULL
+      );
+      INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
+
+      CREATE TABLE imm_videos(
+        video_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        video_key TEXT NOT NULL UNIQUE,
+        canonical_title TEXT NOT NULL,
+        source_type INTEGER NOT NULL,
+        source_path TEXT,
+        source_url TEXT,
+        duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
+        file_size_bytes INTEGER CHECK(file_size_bytes>=0),
+        codec_id INTEGER, container_id INTEGER,
+        width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
+        bitrate_kbps INTEGER, audio_codec_id INTEGER,
+        hash_sha256 TEXT, screenshot_path TEXT,
+        metadata_json TEXT,
+        CREATED_DATE INTEGER,
+        LAST_UPDATE_DATE INTEGER
+      );
+    `);
+
+    const insertLegacyVideo = db.prepare(`
+      INSERT INTO imm_videos (
+        video_key, canonical_title, source_type, source_path, source_url,
+        duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
+        fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
+        metadata_json, CREATED_DATE, LAST_UPDATE_DATE
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    `);
+
+    insertLegacyVideo.run(
+      'local:/library/Little Witch Academia S02E05.mkv',
+      'Episode 5',
+      SOURCE_TYPE_LOCAL,
+      '/library/Little Witch Academia S02E05.mkv',
+      null,
+      0,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      1,
+      1,
+    );
+    insertLegacyVideo.run(
+      'local:/library/Little Witch Academia S02E06.mkv',
+      'Episode 6',
+      SOURCE_TYPE_LOCAL,
+      '/library/Little Witch Academia S02E06.mkv',
+      null,
+      0,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      1,
+      1,
+    );
+    insertLegacyVideo.run(
+      'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
+      'Episode 3',
+      SOURCE_TYPE_LOCAL,
+      '/library/[SubsPlease] Frieren - 03 - Departure.mkv',
+      null,
+      0,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      1,
+      1,
+    );
+
+    ensureSchema(db);
+
+    const videoColumns = new Set(
+      (
+        db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
+          name: string;
+        }>
+      ).map((row) => row.name),
+    );
+    assert.ok(videoColumns.has('anime_id'));
+    assert.ok(videoColumns.has('parsed_basename'));
+    assert.ok(videoColumns.has('parsed_title'));
+    assert.ok(videoColumns.has('parsed_season'));
+    assert.ok(videoColumns.has('parsed_episode'));
+    assert.ok(videoColumns.has('parser_source'));
+    assert.ok(videoColumns.has('parser_confidence'));
+    assert.ok(videoColumns.has('parse_metadata_json'));
+
+    const animeRows = db
+      .prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
+      .all() as Array<{ canonical_title: string }>;
+    assert.deepEqual(
+      animeRows.map((row) => row.canonical_title),
+      ['Frieren', 'Little Witch Academia'],
+    );
+
+    const littleWitchRows = db
+      .prepare(
+        `
+          SELECT
+            a.canonical_title AS anime_title,
+            v.parsed_title,
+            v.parsed_basename,
+            v.parsed_season,
+            v.parsed_episode,
+            v.parser_source,
+            v.parser_confidence
+          FROM imm_videos v
+          JOIN imm_anime a ON a.anime_id = v.anime_id
+          WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
+          ORDER BY v.video_key
+        `,
+      )
+      .all() as Array<{
+      anime_title: string;
+      parsed_title: string | null;
+      parsed_basename: string | null;
+      parsed_season: number | null;
+      parsed_episode: number | null;
+      parser_source: string | null;
+      parser_confidence: number | null;
+    }>;
+
+    assert.equal(littleWitchRows.length, 2);
+    assert.deepEqual(
+      littleWitchRows.map((row) => ({
+        animeTitle: row.anime_title,
+        parsedTitle: row.parsed_title,
+        parsedBasename: row.parsed_basename,
+        parsedSeason: row.parsed_season,
+        parsedEpisode: row.parsed_episode,
+        parserSource: row.parser_source,
+      })),
+      [
+        {
+          animeTitle: 'Little Witch Academia',
+          parsedTitle: 'Little Witch Academia',
+          parsedBasename: 'Little Witch Academia S02E05.mkv',
+          parsedSeason: 2,
+          parsedEpisode: 5,
+          parserSource: 'fallback',
+        },
+        {
+          animeTitle: 'Little Witch Academia',
+          parsedTitle: 'Little Witch Academia',
+          parsedBasename: 'Little Witch Academia S02E06.mkv',
+          parsedSeason: 2,
+          parsedEpisode: 6,
+          parserSource: 'fallback',
+        },
+      ],
+    );
+    assert.ok(
+      littleWitchRows.every(
+        (row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
+      ),
+    );
+
+    const frierenRow = db
+      .prepare(
+        `
+          SELECT
+            a.canonical_title AS anime_title,
+            v.parsed_title,
+            v.parsed_episode,
+            v.parser_source
+          FROM imm_videos v
+          JOIN imm_anime a ON a.anime_id = v.anime_id
+          WHERE v.video_key = ?
+        `,
+      )
+      .get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
+      anime_title: string;
+      parsed_title: string | null;
+      parsed_episode: number | null;
+      parser_source: string | null;
+    } | null;
+
+    assert.ok(frierenRow);
+    assert.equal(frierenRow?.anime_title, 'Frieren');
+    assert.equal(frierenRow?.parsed_title, 'Frieren');
+    assert.equal(frierenRow?.parsed_episode, 3);
+    assert.equal(frierenRow?.parser_source, 'fallback');
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    db.exec(`
+      CREATE TABLE imm_schema_version (
+        schema_version INTEGER PRIMARY KEY,
+        applied_at_ms INTEGER NOT NULL
+      );
+      INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
+
+      CREATE TABLE imm_videos(
+        video_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        video_key TEXT NOT NULL UNIQUE,
+        anime_id INTEGER,
+        canonical_title TEXT NOT NULL,
+        source_type INTEGER NOT NULL,
+        source_path TEXT,
+        source_url TEXT,
+        parsed_basename TEXT,
+        parsed_title TEXT,
+        parsed_season INTEGER,
+        parsed_episode INTEGER,
+        parser_source TEXT,
+        parser_confidence REAL,
+        parse_metadata_json TEXT,
+        duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
+        file_size_bytes INTEGER CHECK(file_size_bytes>=0),
+        codec_id INTEGER, container_id INTEGER,
+        width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
+        bitrate_kbps INTEGER, audio_codec_id INTEGER,
+        hash_sha256 TEXT, screenshot_path TEXT,
+        metadata_json TEXT,
+        CREATED_DATE INTEGER,
+        LAST_UPDATE_DATE INTEGER
+      );
+      CREATE TABLE imm_sessions(
+        session_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        session_uuid TEXT NOT NULL UNIQUE,
+        video_id INTEGER NOT NULL,
+        started_at_ms INTEGER NOT NULL,
+        ended_at_ms INTEGER,
+        status INTEGER NOT NULL,
+        locale_id INTEGER,
+        target_lang_id INTEGER,
+        difficulty_tier INTEGER,
+        subtitle_mode INTEGER,
+        CREATED_DATE INTEGER,
+        LAST_UPDATE_DATE INTEGER
+      );
+      CREATE TABLE imm_session_events(
+        event_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        session_id INTEGER NOT NULL,
+        ts_ms INTEGER NOT NULL,
+        event_type INTEGER NOT NULL,
+        line_index INTEGER,
+        segment_start_ms INTEGER,
+        segment_end_ms INTEGER,
+        words_delta INTEGER NOT NULL DEFAULT 0,
+        cards_delta INTEGER NOT NULL DEFAULT 0,
+        payload_json TEXT,
+        CREATED_DATE INTEGER,
+        LAST_UPDATE_DATE INTEGER
+      );
+      CREATE TABLE imm_words(
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        headword TEXT,
+        word TEXT,
+        reading TEXT,
+        part_of_speech TEXT,
+        pos1 TEXT,
+        pos2 TEXT,
+        pos3 TEXT,
+        first_seen REAL,
+        last_seen REAL,
+        frequency INTEGER,
+        UNIQUE(headword, word, reading)
+      );
+      CREATE TABLE imm_kanji(
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        kanji TEXT,
+        first_seen REAL,
+        last_seen REAL,
+        frequency INTEGER,
+        UNIQUE(kanji)
+      );
+      CREATE TABLE imm_rollup_state(
+        state_key TEXT PRIMARY KEY,
+        state_value INTEGER NOT NULL
+      );
+    `);
+
+    ensureSchema(db);
+
+    const tableNames = new Set(
+      (
+        db.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`).all() as
+          Array<{ name: string }>
+      ).map((row) => row.name),
+    );
+
+    assert.ok(tableNames.has('imm_subtitle_lines'));
+    assert.ok(tableNames.has('imm_word_line_occurrences'));
+    assert.ok(tableNames.has('imm_kanji_line_occurrences'));
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
+test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+
+    const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
+      canonicalTitle: 'Episode 5',
+      sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+    const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
+      canonicalTitle: 'Episode 6',
+      sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
+      sourceUrl: null,
+      sourceType: SOURCE_TYPE_LOCAL,
+    });
+
+    const provisionalAnimeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Little Witch Academia',
+      canonicalTitle: 'Little Witch Academia',
+      anilistId: null,
+      titleRomaji: null,
+      titleEnglish: null,
+      titleNative: null,
+      metadataJson: '{"source":"parsed"}',
+    });
+    linkVideoToAnimeRecord(db, firstVideoId, {
+      animeId: provisionalAnimeId,
+      parsedBasename: 'Little Witch Academia S02E05.mkv',
+      parsedTitle: 'Little Witch Academia',
+      parsedSeason: 2,
+      parsedEpisode: 5,
+      parserSource: 'fallback',
+      parserConfidence: 0.6,
+      parseMetadataJson: '{"source":"parsed","episode":5}',
+    });
+
+    const reusedAnimeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: ' little  witch academia ',
+      canonicalTitle: 'Little Witch Academia',
+      anilistId: null,
+      titleRomaji: null,
+      titleEnglish: null,
+      titleNative: null,
+      metadataJson: '{"source":"parsed"}',
+    });
+    linkVideoToAnimeRecord(db, secondVideoId, {
+      animeId: reusedAnimeId,
+      parsedBasename: 'Little Witch Academia S02E06.mkv',
+      parsedTitle: 'Little Witch Academia',
+      parsedSeason: 2,
+      parsedEpisode: 6,
+      parserSource: 'fallback',
+      parserConfidence: 0.6,
+      parseMetadataJson: '{"source":"parsed","episode":6}',
+    });
+
+    assert.equal(reusedAnimeId, provisionalAnimeId);
+
+    const upgradedAnimeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: 'Little Witch Academia',
+      canonicalTitle: 'Little Witch Academia TV',
+      anilistId: 33_435,
+      titleRomaji: 'Little Witch Academia',
+      titleEnglish: 'Little Witch Academia',
+      titleNative: 'リトルウィッチアカデミア',
+      metadataJson: '{"source":"anilist"}',
+    });
+
+    assert.equal(upgradedAnimeId, provisionalAnimeId);
+
+    const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
+      anime_id: number;
+      normalized_title_key: string;
+      canonical_title: string;
+      anilist_id: number | null;
+      title_romaji: string | null;
+      title_english: string | null;
+      title_native: string | null;
+      metadata_json: string | null;
+    }>;
+    assert.equal(animeRows.length, 1);
+    assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
+    assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
+    assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
+    assert.equal(animeRows[0]?.anilist_id, 33_435);
+    assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
+    assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
+    assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
+    assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
+
+    const linkedVideos = db
+      .prepare(
+        `
+          SELECT anime_id, parsed_title, parsed_season, parsed_episode
+          FROM imm_videos
+          WHERE video_id IN (?, ?)
+          ORDER BY video_id
+        `,
+      )
+      .all(firstVideoId, secondVideoId) as Array<{
+      anime_id: number | null;
+      parsed_title: string | null;
+      parsed_season: number | null;
+      parsed_episode: number | null;
+    }>;
+
+    assert.deepEqual(linkedVideos, [
+      {
+        anime_id: provisionalAnimeId,
+        parsed_title: 'Little Witch Academia',
+        parsed_season: 2,
+        parsed_episode: 5,
+      },
+      {
+        anime_id: provisionalAnimeId,
+        parsed_title: 'Little Witch Academia',
+        parsed_season: 2,
+        parsed_episode: 6,
+      },
+    ]);
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
+
 test('start/finalize session updates ended_at and status', () => {
  const dbPath = makeDbPath();
  const db = new Database(dbPath);
@@ -191,18 +678,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
    ensureSchema(db);
    const stmts = createTrackerPreparedStatements(db);

-    stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
-    stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
+    stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
+    stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
    stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
    stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);

    const wordRow = db
      .prepare(
-        'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
+        `SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
+         FROM imm_words WHERE headword = ?`,
      )
      .get('猫') as {
      headword: string;
      frequency: number;
+      part_of_speech: string;
+      pos1: string;
+      pos2: string;
      first_seen: number;
      last_seen: number;
    } | null;
@@ -218,6 +709,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
    assert.ok(wordRow);
    assert.ok(kanjiRow);
    assert.equal(wordRow?.frequency, 2);
+    assert.equal(wordRow?.part_of_speech, 'noun');
+    assert.equal(wordRow?.pos1, '名詞');
+    assert.equal(wordRow?.pos2, '一般');
    assert.equal(kanjiRow?.frequency, 2);
    assert.equal(wordRow?.first_seen, 5);
    assert.equal(wordRow?.last_seen, 15);
@@ -228,3 +722,34 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
    cleanupDbPath(dbPath);
  }
 });
+
+test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
+  const dbPath = makeDbPath();
+  const db = new Database(dbPath);
+
+  try {
+    ensureSchema(db);
+    const stmts = createTrackerPreparedStatements(db);
+
+    stmts.wordUpsertStmt.run('知っている', '知っている', 'しっている', 'other', '動詞', '自立', '', 10, 10);
+    stmts.wordUpsertStmt.run('知っている', '知っている', 'しっている', 'verb', '動詞', '自立', '', 11, 12);
+
+    const row = db
+      .prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
+      .get('知っている') as {
+      frequency: number;
+      part_of_speech: string;
+      pos1: string;
+      pos2: string;
+    } | null;
+
+    assert.ok(row);
+    assert.equal(row?.frequency, 2);
+    assert.equal(row?.part_of_speech, 'verb');
+    assert.equal(row?.pos1, '動詞');
+    assert.equal(row?.pos2, '自立');
+  } finally {
+    db.close();
+    cleanupDbPath(dbPath);
+  }
+});
@@ -1,3 +1,4 @@
+import { parseMediaInfo } from '../../../jimaku/utils';
 import type { DatabaseSync } from './sqlite';
 import { SCHEMA_VERSION } from './types';
 import type { QueuedWrite, VideoMetadata } from './types';
@@ -7,6 +8,33 @@ export interface TrackerPreparedStatements {
  eventInsertStmt: ReturnType<DatabaseSync['prepare']>;
  wordUpsertStmt: ReturnType<DatabaseSync['prepare']>;
  kanjiUpsertStmt: ReturnType<DatabaseSync['prepare']>;
+  subtitleLineInsertStmt: ReturnType<DatabaseSync['prepare']>;
+  wordIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
+  kanjiIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
+  wordLineOccurrenceUpsertStmt: ReturnType<DatabaseSync['prepare']>;
+  kanjiLineOccurrenceUpsertStmt: ReturnType<DatabaseSync['prepare']>;
+  videoAnimeIdSelectStmt: ReturnType<DatabaseSync['prepare']>;
+}
+
+export interface AnimeRecordInput {
+  parsedTitle: string;
+  canonicalTitle: string;
+  anilistId: number | null;
+  titleRomaji: string | null;
+  titleEnglish: string | null;
+  titleNative: string | null;
+  metadataJson: string | null;
+}
+
+export interface VideoAnimeLinkInput {
+  animeId: number | null;
+  parsedBasename: string | null;
+  parsedTitle: string | null;
+  parsedSeason: number | null;
+  parsedEpisode: number | null;
+  parserSource: string | null;
+  parserConfidence: number | null;
+  parseMetadataJson: string | null;
 }

 function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boolean {
@@ -16,9 +44,14 @@ function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boo
    .some((row: unknown) => (row as { name: string }).name === columnName);
 }

-function addColumnIfMissing(db: DatabaseSync, tableName: string, columnName: string): void {
+function addColumnIfMissing(
+  db: DatabaseSync,
+  tableName: string,
+  columnName: string,
+  columnType = 'INTEGER',
+): void {
  if (!hasColumn(db, tableName, columnName)) {
-    db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER`);
+    db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} ${columnType}`);
  }
 }

@@ -35,6 +68,247 @@ export function applyPragmas(db: DatabaseSync): void {
  db.exec('PRAGMA busy_timeout = 2500');
 }

+export function normalizeAnimeIdentityKey(title: string): string {
+  return title
+    .normalize('NFKC')
+    .toLowerCase()
+    .replace(/[^\p{L}\p{N}]+/gu, ' ')
+    .trim()
+    .replace(/\s+/g, ' ');
+}
+
+function looksLikeEpisodeOnlyTitle(title: string): boolean {
+  const normalized = title
+    .normalize('NFKC')
+    .toLowerCase()
+    .replace(/\s+/g, ' ')
+    .trim();
+  return /^(episode|ep)\s*\d{1,3}$/.test(normalized) || /^第\s*\d{1,3}\s*話$/.test(normalized);
+}
+
+function parserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
+  switch (confidence) {
+    case 'high':
+      return 1;
+    case 'medium':
+      return 0.6;
+    default:
+      return 0.2;
+  }
+}
+
+function parseLegacyAnimeBackfillCandidate(
+  sourcePath: string | null,
+  canonicalTitle: string,
+): {
+  basename: string | null;
+  title: string;
+  season: number | null;
+  episode: number | null;
+  source: 'fallback';
+  confidenceScore: number;
+  metadataJson: string;
+} | null {
+  const fromPath =
+    sourcePath && sourcePath.trim().length > 0 ? parseMediaInfo(sourcePath.trim()) : null;
+  if (fromPath?.title && !looksLikeEpisodeOnlyTitle(fromPath.title)) {
+    return {
+      basename: fromPath.filename || null,
+      title: fromPath.title,
+      season: fromPath.season,
+      episode: fromPath.episode,
+      source: 'fallback',
+      confidenceScore: parserConfidenceToScore(fromPath.confidence),
+      metadataJson: JSON.stringify({
+        confidence: fromPath.confidence,
+        filename: fromPath.filename,
+        rawTitle: fromPath.rawTitle,
+        migrationSource: 'source_path',
+      }),
+    };
+  }
+
+  const fallbackTitle = canonicalTitle.trim();
+  if (!fallbackTitle) return null;
+  const fromTitle = parseMediaInfo(fallbackTitle);
+  if (!fromTitle.title || looksLikeEpisodeOnlyTitle(fromTitle.title)) {
+    return null;
+  }
+
+  return {
+    basename: null,
+    title: fromTitle.title,
+    season: fromTitle.season,
+    episode: fromTitle.episode,
+    source: 'fallback',
+    confidenceScore: parserConfidenceToScore(fromTitle.confidence),
+    metadataJson: JSON.stringify({
+      confidence: fromTitle.confidence,
+      filename: fromTitle.filename,
+      rawTitle: fromTitle.rawTitle,
+      migrationSource: 'canonical_title',
+    }),
+  };
+}
+
+export function getOrCreateAnimeRecord(db: DatabaseSync, input: AnimeRecordInput): number {
+  const normalizedTitleKey = normalizeAnimeIdentityKey(input.parsedTitle);
+  if (!normalizedTitleKey) {
+    throw new Error('parsedTitle is required to create or update an anime record');
+  }
+
+  const byAnilistId =
+    input.anilistId !== null
+      ? (db.prepare('SELECT anime_id FROM imm_anime WHERE anilist_id = ?').get(input.anilistId) as {
+          anime_id: number;
+        } | null)
+      : null;
+  const byNormalizedTitle = db
+    .prepare('SELECT anime_id FROM imm_anime WHERE normalized_title_key = ?')
+    .get(normalizedTitleKey) as { anime_id: number } | null;
+  const existing = byAnilistId ?? byNormalizedTitle;
+  if (existing?.anime_id) {
+    db.prepare(
+      `
+        UPDATE imm_anime
+        SET
+          canonical_title = COALESCE(NULLIF(?, ''), canonical_title),
+          anilist_id = COALESCE(?, anilist_id),
+          title_romaji = COALESCE(?, title_romaji),
+          title_english = COALESCE(?, title_english),
+          title_native = COALESCE(?, title_native),
+          metadata_json = COALESCE(?, metadata_json),
+          LAST_UPDATE_DATE = ?
+        WHERE anime_id = ?
+      `,
+    ).run(
+      input.canonicalTitle,
+      input.anilistId,
+      input.titleRomaji,
+      input.titleEnglish,
+      input.titleNative,
+      input.metadataJson,
+      Date.now(),
+      existing.anime_id,
+    );
+    return existing.anime_id;
+  }
+
+  const nowMs = Date.now();
+  const result = db
+    .prepare(
+      `
+        INSERT INTO imm_anime(
+          normalized_title_key,
+          canonical_title,
+          anilist_id,
+          title_romaji,
+          title_english,
+          title_native,
+          metadata_json,
+          CREATED_DATE,
+          LAST_UPDATE_DATE
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+      `,
+    )
+    .run(
+      normalizedTitleKey,
+      input.canonicalTitle,
+      input.anilistId,
+      input.titleRomaji,
+      input.titleEnglish,
+      input.titleNative,
+      input.metadataJson,
+      nowMs,
+      nowMs,
+    );
+  return Number(result.lastInsertRowid);
+}
+
+export function linkVideoToAnimeRecord(
+  db: DatabaseSync,
+  videoId: number,
+  input: VideoAnimeLinkInput,
+): void {
+  db.prepare(
+    `
+      UPDATE imm_videos
+      SET
+        anime_id = ?,
+        parsed_basename = ?,
+        parsed_title = ?,
+        parsed_season = ?,
+        parsed_episode = ?,
+        parser_source = ?,
+        parser_confidence = ?,
+        parse_metadata_json = ?,
+        LAST_UPDATE_DATE = ?
+      WHERE video_id = ?
+    `,
+  ).run(
+    input.animeId,
+    input.parsedBasename,
+    input.parsedTitle,
+    input.parsedSeason,
+    input.parsedEpisode,
+    input.parserSource,
+    input.parserConfidence,
+    input.parseMetadataJson,
+    Date.now(),
+    videoId,
+  );
+}
+
+function migrateLegacyAnimeMetadata(db: DatabaseSync): void {
+  addColumnIfMissing(db, 'imm_videos', 'anime_id', 'INTEGER REFERENCES imm_anime(anime_id)');
+  addColumnIfMissing(db, 'imm_videos', 'parsed_basename', 'TEXT');
+  addColumnIfMissing(db, 'imm_videos', 'parsed_title', 'TEXT');
+  addColumnIfMissing(db, 'imm_videos', 'parsed_season', 'INTEGER');
+  addColumnIfMissing(db, 'imm_videos', 'parsed_episode', 'INTEGER');
+  addColumnIfMissing(db, 'imm_videos', 'parser_source', 'TEXT');
+  addColumnIfMissing(db, 'imm_videos', 'parser_confidence', 'REAL');
+  addColumnIfMissing(db, 'imm_videos', 'parse_metadata_json', 'TEXT');
+
+  const legacyRows = db
+    .prepare(
+      `
+        SELECT video_id, source_path, canonical_title
+        FROM imm_videos
+        WHERE anime_id IS NULL
+      `,
+    )
+    .all() as Array<{
+    video_id: number;
+    source_path: string | null;
+    canonical_title: string;
+  }>;
+
+  for (const row of legacyRows) {
+    const parsed = parseLegacyAnimeBackfillCandidate(row.source_path, row.canonical_title);
+    if (!parsed) continue;
+
+    const animeId = getOrCreateAnimeRecord(db, {
+      parsedTitle: parsed.title,
+      canonicalTitle: parsed.title,
+      anilistId: null,
+      titleRomaji: null,
+      titleEnglish: null,
+      titleNative: null,
+      metadataJson: parsed.metadataJson,
+    });
+    linkVideoToAnimeRecord(db, row.video_id, {
+      animeId,
+      parsedBasename: parsed.basename,
+      parsedTitle: parsed.title,
+      parsedSeason: parsed.season,
+      parsedEpisode: parsed.episode,
+      parserSource: parsed.source,
+      parserConfidence: parsed.confidenceScore,
+      parseMetadataJson: parsed.metadataJson,
+    });
+  }
+}
+
 export function ensureSchema(db: DatabaseSync): void {
  db.exec(`
    CREATE TABLE IF NOT EXISTS imm_schema_version (
@@ -61,14 +335,38 @@ export function ensureSchema(db: DatabaseSync): void {
    return;
  }

+  db.exec(`
+    CREATE TABLE IF NOT EXISTS imm_anime(
+      anime_id INTEGER PRIMARY KEY AUTOINCREMENT,
+      normalized_title_key TEXT NOT NULL UNIQUE,
+      canonical_title TEXT NOT NULL,
+      anilist_id INTEGER UNIQUE,
+      title_romaji TEXT,
+      title_english TEXT,
+      title_native TEXT,
+      episodes_total INTEGER,
+      metadata_json TEXT,
+      CREATED_DATE INTEGER,
+      LAST_UPDATE_DATE INTEGER
+    );
+  `);
  db.exec(`
    CREATE TABLE IF NOT EXISTS imm_videos(
      video_id INTEGER PRIMARY KEY AUTOINCREMENT,
      video_key TEXT NOT NULL UNIQUE,
+      anime_id INTEGER,
      canonical_title TEXT NOT NULL,
      source_type INTEGER NOT NULL,
      source_path TEXT,
      source_url TEXT,
+      parsed_basename TEXT,
+      parsed_title TEXT,
+      parsed_season INTEGER,
+      parsed_episode INTEGER,
+      parser_source TEXT,
+      parser_confidence REAL,
+      parse_metadata_json TEXT,
+      watched INTEGER NOT NULL DEFAULT 0,
      duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
      file_size_bytes INTEGER CHECK(file_size_bytes>=0),
      codec_id INTEGER, container_id INTEGER,
@@ -77,7 +375,8 @@ export function ensureSchema(db: DatabaseSync): void {
      hash_sha256 TEXT, screenshot_path TEXT,
      metadata_json TEXT,
      CREATED_DATE INTEGER,
-      LAST_UPDATE_DATE INTEGER
+      LAST_UPDATE_DATE INTEGER,
+      FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
    );
  `);
  db.exec(`
@@ -173,6 +472,10 @@ export function ensureSchema(db: DatabaseSync): void {
      headword TEXT,
      word TEXT,
      reading TEXT,
+      part_of_speech TEXT,
+      pos1 TEXT,
+      pos2 TEXT,
+      pos3 TEXT,
      first_seen REAL,
      last_seen REAL,
      frequency INTEGER,
@@ -189,42 +492,59 @@ export function ensureSchema(db: DatabaseSync): void {
      UNIQUE(kanji)
    );
  `);
-
  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_sessions_video_started
-    ON imm_sessions(video_id, started_at_ms DESC)
+    CREATE TABLE IF NOT EXISTS imm_subtitle_lines(
+      line_id INTEGER PRIMARY KEY AUTOINCREMENT,
+      session_id INTEGER NOT NULL,
+      event_id INTEGER,
+      video_id INTEGER NOT NULL,
+      anime_id INTEGER,
+      line_index INTEGER NOT NULL,
+      segment_start_ms INTEGER,
+      segment_end_ms INTEGER,
+      text TEXT NOT NULL,
+      CREATED_DATE INTEGER,
+      LAST_UPDATE_DATE INTEGER,
+      FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
+      FOREIGN KEY(event_id) REFERENCES imm_session_events(event_id) ON DELETE SET NULL,
+      FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE,
+      FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
+    );
  `);
  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_sessions_status_started
-    ON imm_sessions(status, started_at_ms DESC)
+    CREATE TABLE IF NOT EXISTS imm_word_line_occurrences(
+      line_id INTEGER NOT NULL,
+      word_id INTEGER NOT NULL,
+      occurrence_count INTEGER NOT NULL,
+      PRIMARY KEY(line_id, word_id),
+      FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
+      FOREIGN KEY(word_id) REFERENCES imm_words(id) ON DELETE CASCADE
+    );
  `);
  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_telemetry_session_sample
-    ON imm_session_telemetry(session_id, sample_ms DESC)
+    CREATE TABLE IF NOT EXISTS imm_kanji_line_occurrences(
+      line_id INTEGER NOT NULL,
+      kanji_id INTEGER NOT NULL,
+      occurrence_count INTEGER NOT NULL,
+      PRIMARY KEY(line_id, kanji_id),
+      FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
+      FOREIGN KEY(kanji_id) REFERENCES imm_kanji(id) ON DELETE CASCADE
+    );
  `);
  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_events_session_ts
-    ON imm_session_events(session_id, ts_ms DESC)
-  `);
-  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_events_type_ts
-    ON imm_session_events(event_type, ts_ms DESC)
-  `);
-  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_rollups_day_video
-    ON imm_daily_rollups(rollup_day, video_id)
-  `);
-  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_rollups_month_video
-    ON imm_monthly_rollups(rollup_month, video_id)
-  `);
-  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_words_headword_word_reading
-    ON imm_words(headword, word, reading)
-  `);
-  db.exec(`
-    CREATE INDEX IF NOT EXISTS idx_kanji_kanji
-    ON imm_kanji(kanji)
+    CREATE TABLE IF NOT EXISTS imm_media_art(
+      video_id INTEGER PRIMARY KEY,
+      anilist_id INTEGER,
+      cover_url TEXT,
+      cover_blob BLOB,
+      title_romaji TEXT,
+      title_english TEXT,
+      episodes_total INTEGER,
+      fetched_at_ms INTEGER NOT NULL,
+      CREATED_DATE INTEGER,
+      LAST_UPDATE_DATE INTEGER,
+      FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
+    );
  `);

  if (currentVersion?.schema_version === 1) {
@@ -299,6 +619,134 @@ export function ensureSchema(db: DatabaseSync): void {
    dropColumnIfExists(db, 'imm_sessions', 'updated_at_ms');
  }

+  if (currentVersion?.schema_version && currentVersion.schema_version < 5) {
+    migrateLegacyAnimeMetadata(db);
+  }
+
+  if (currentVersion?.schema_version && currentVersion.schema_version < 6) {
+    addColumnIfMissing(db, 'imm_words', 'part_of_speech', 'TEXT');
+    addColumnIfMissing(db, 'imm_words', 'pos1', 'TEXT');
+    addColumnIfMissing(db, 'imm_words', 'pos2', 'TEXT');
+    addColumnIfMissing(db, 'imm_words', 'pos3', 'TEXT');
+  }
+
+  if (currentVersion?.schema_version && currentVersion.schema_version < 7) {
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS imm_subtitle_lines(
+        line_id INTEGER PRIMARY KEY AUTOINCREMENT,
+        session_id INTEGER NOT NULL,
+        event_id INTEGER,
+        video_id INTEGER NOT NULL,
+        anime_id INTEGER,
+        line_index INTEGER NOT NULL,
+        segment_start_ms INTEGER,
+        segment_end_ms INTEGER,
+        text TEXT NOT NULL,
+        CREATED_DATE INTEGER,
+        LAST_UPDATE_DATE INTEGER,
+        FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
+        FOREIGN KEY(event_id) REFERENCES imm_session_events(event_id) ON DELETE SET NULL,
+        FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE,
+        FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
+      )
+    `);
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS imm_word_line_occurrences(
+        line_id INTEGER NOT NULL,
+        word_id INTEGER NOT NULL,
+        occurrence_count INTEGER NOT NULL,
+        PRIMARY KEY(line_id, word_id),
+        FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
+        FOREIGN KEY(word_id) REFERENCES imm_words(id) ON DELETE CASCADE
+      )
+    `);
+    db.exec(`
+      CREATE TABLE IF NOT EXISTS imm_kanji_line_occurrences(
+        line_id INTEGER NOT NULL,
+        kanji_id INTEGER NOT NULL,
+        occurrence_count INTEGER NOT NULL,
+        PRIMARY KEY(line_id, kanji_id),
+        FOREIGN KEY(line_id) REFERENCES imm_subtitle_lines(line_id) ON DELETE CASCADE,
+        FOREIGN KEY(kanji_id) REFERENCES imm_kanji(id) ON DELETE CASCADE
+      )
+    `);
+  }
+
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_anime_normalized_title
+    ON imm_anime(normalized_title_key)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_anime_anilist_id
+    ON imm_anime(anilist_id)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_videos_anime_id
+    ON imm_videos(anime_id)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_sessions_video_started
+    ON imm_sessions(video_id, started_at_ms DESC)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_sessions_status_started
+    ON imm_sessions(status, started_at_ms DESC)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_telemetry_session_sample
+    ON imm_session_telemetry(session_id, sample_ms DESC)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_events_session_ts
+    ON imm_session_events(session_id, ts_ms DESC)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_events_type_ts
+    ON imm_session_events(event_type, ts_ms DESC)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_rollups_day_video
+    ON imm_daily_rollups(rollup_day, video_id)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_rollups_month_video
+    ON imm_monthly_rollups(rollup_month, video_id)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_words_headword_word_reading
+    ON imm_words(headword, word, reading)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_kanji_kanji
+    ON imm_kanji(kanji)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_subtitle_lines_session_line
+    ON imm_subtitle_lines(session_id, line_index)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_subtitle_lines_video_line
+    ON imm_subtitle_lines(video_id, line_index)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_subtitle_lines_anime_line
+    ON imm_subtitle_lines(anime_id, line_index)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_word_line_occurrences_word
+    ON imm_word_line_occurrences(word_id, line_id)
+  `);
+  db.exec(`
+    CREATE INDEX IF NOT EXISTS idx_kanji_line_occurrences_kanji
+    ON imm_kanji_line_occurrences(kanji_id, line_id)
+  `);
+
+  if (currentVersion?.schema_version && currentVersion.schema_version < SCHEMA_VERSION) {
+    db.exec('DELETE FROM imm_daily_rollups');
+    db.exec('DELETE FROM imm_monthly_rollups');
+    db.exec(`UPDATE imm_rollup_state SET state_value = 0 WHERE state_key = 'last_rollup_sample_ms'`);
+  }
+
  db.exec(`
    INSERT INTO imm_schema_version(schema_version, applied_at_ms)
    VALUES (${SCHEMA_VERSION}, ${Date.now()})
@@ -328,12 +776,21 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
    `),
    wordUpsertStmt: db.prepare(`
      INSERT INTO imm_words (
-        headword, word, reading, first_seen, last_seen, frequency
+        headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency
      ) VALUES (
-        ?, ?, ?, ?, ?, 1
+        ?, ?, ?, ?, ?, ?, ?, ?, ?, 1
      )
      ON CONFLICT(headword, word, reading) DO UPDATE SET
        frequency = COALESCE(frequency, 0) + 1,
+        part_of_speech = CASE
+          WHEN COALESCE(NULLIF(imm_words.part_of_speech, ''), 'other') = 'other'
+            AND COALESCE(NULLIF(excluded.part_of_speech, ''), '') <> ''
+          THEN excluded.part_of_speech
+          ELSE imm_words.part_of_speech
+        END,
+        pos1 = COALESCE(NULLIF(imm_words.pos1, ''), excluded.pos1),
+        pos2 = COALESCE(NULLIF(imm_words.pos2, ''), excluded.pos2),
+        pos3 = COALESCE(NULLIF(imm_words.pos3, ''), excluded.pos3),
        first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
        last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
    `),
@@ -348,9 +805,93 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
        first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
        last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
    `),
+    subtitleLineInsertStmt: db.prepare(`
+      INSERT INTO imm_subtitle_lines (
+        session_id, event_id, video_id, anime_id, line_index, segment_start_ms,
+        segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
+      ) VALUES (
+        ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
+      )
+    `),
+    wordIdSelectStmt: db.prepare(`
+      SELECT id FROM imm_words
+      WHERE headword = ? AND word = ? AND reading = ?
+    `),
+    kanjiIdSelectStmt: db.prepare(`
+      SELECT id FROM imm_kanji
+      WHERE kanji = ?
+    `),
+    wordLineOccurrenceUpsertStmt: db.prepare(`
+      INSERT INTO imm_word_line_occurrences (
+        line_id, word_id, occurrence_count
+      ) VALUES (
+        ?, ?, ?
+      )
+      ON CONFLICT(line_id, word_id) DO UPDATE SET
+        occurrence_count = imm_word_line_occurrences.occurrence_count + excluded.occurrence_count
+    `),
+    kanjiLineOccurrenceUpsertStmt: db.prepare(`
+      INSERT INTO imm_kanji_line_occurrences (
+        line_id, kanji_id, occurrence_count
+      ) VALUES (
+        ?, ?, ?
+      )
+      ON CONFLICT(line_id, kanji_id) DO UPDATE SET
+        occurrence_count = imm_kanji_line_occurrences.occurrence_count + excluded.occurrence_count
+    `),
+    videoAnimeIdSelectStmt: db.prepare(`
+      SELECT anime_id FROM imm_videos
+      WHERE video_id = ?
+    `),
  };
 }

+function incrementWordAggregate(
+  stmts: TrackerPreparedStatements,
+  occurrence: Extract<QueuedWrite, { kind: 'subtitleLine' }>['wordOccurrences'][number],
+  firstSeen: number,
+  lastSeen: number,
+): number {
+  for (let i = 0; i < occurrence.occurrenceCount; i += 1) {
+    stmts.wordUpsertStmt.run(
+      occurrence.headword,
+      occurrence.word,
+      occurrence.reading,
+      occurrence.partOfSpeech,
+      occurrence.pos1,
+      occurrence.pos2,
+      occurrence.pos3,
+      firstSeen,
+      lastSeen,
+    );
+  }
+  const row = stmts.wordIdSelectStmt.get(
+    occurrence.headword,
+    occurrence.word,
+    occurrence.reading,
+  ) as { id: number } | null;
+  if (!row?.id) {
+    throw new Error(`Failed to resolve imm_words id for ${occurrence.headword}`);
+  }
+  return row.id;
+}
+
+function incrementKanjiAggregate(
+  stmts: TrackerPreparedStatements,
+  occurrence: Extract<QueuedWrite, { kind: 'subtitleLine' }>['kanjiOccurrences'][number],
+  firstSeen: number,
+  lastSeen: number,
+): number {
+  for (let i = 0; i < occurrence.occurrenceCount; i += 1) {
+    stmts.kanjiUpsertStmt.run(occurrence.kanji, firstSeen, lastSeen);
+  }
+  const row = stmts.kanjiIdSelectStmt.get(occurrence.kanji) as { id: number } | null;
+  if (!row?.id) {
+    throw new Error(`Failed to resolve imm_kanji id for ${occurrence.kanji}`);
+  }
+  return row.id;
+}
+
 export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedStatements): void {
  if (write.kind === 'telemetry') {
    stmts.telemetryInsertStmt.run(
@@ -379,6 +920,10 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
      write.headword,
      write.word,
      write.reading,
+      write.partOfSpeech,
+      write.pos1,
+      write.pos2,
+      write.pos3,
      write.firstSeen,
      write.lastSeen,
    );
@@ -388,6 +933,31 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
    stmts.kanjiUpsertStmt.run(write.kanji, write.firstSeen, write.lastSeen);
    return;
  }
+  if (write.kind === 'subtitleLine') {
+    const animeRow = stmts.videoAnimeIdSelectStmt.get(write.videoId) as { anime_id: number | null } | null;
+    const lineResult = stmts.subtitleLineInsertStmt.run(
+      write.sessionId,
+      null,
+      write.videoId,
+      animeRow?.anime_id ?? null,
+      write.lineIndex,
+      write.segmentStartMs ?? null,
+      write.segmentEndMs ?? null,
+      write.text,
+      Date.now(),
+      Date.now(),
+    );
+    const lineId = Number(lineResult.lastInsertRowid);
+    for (const occurrence of write.wordOccurrences) {
+      const wordId = incrementWordAggregate(stmts, occurrence, write.firstSeen, write.lastSeen);
+      stmts.wordLineOccurrenceUpsertStmt.run(lineId, wordId, occurrence.occurrenceCount);
+    }
+    for (const occurrence of write.kanjiOccurrences) {
+      const kanjiId = incrementKanjiAggregate(stmts, occurrence, write.firstSeen, write.lastSeen);
+      stmts.kanjiLineOccurrenceUpsertStmt.run(lineId, kanjiId, occurrence.occurrenceCount);
+    }
+    return;
+  }

  stmts.eventInsertStmt.run(
    write.sessionId,
@@ -1,4 +1,4 @@
-export const SCHEMA_VERSION = 3;
+export const SCHEMA_VERSION = 7;
 export const DEFAULT_QUEUE_CAP = 1_000;
 export const DEFAULT_BATCH_SIZE = 25;
 export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -29,6 +29,9 @@ export const EVENT_PAUSE_END = 8;
 export interface ImmersionTrackerOptions {
  dbPath: string;
  policy?: ImmersionTrackerPolicy;
+  resolveLegacyVocabularyPos?: (
+    row: LegacyVocabularyPosRow,
+  ) => Promise<LegacyVocabularyPosResolution | null>;
 }

 export interface ImmersionTrackerPolicy {
@@ -72,6 +75,7 @@ export interface SessionState extends TelemetryAccumulator {
  lastPauseStartMs: number | null;
  isPaused: boolean;
  pendingTelemetry: boolean;
+  markedWatched: boolean;
 }

 interface QueuedTelemetryWrite {
@@ -118,6 +122,10 @@ interface QueuedWordWrite {
  headword: string;
  word: string;
  reading: string;
+  partOfSpeech: string;
+  pos1: string;
+  pos2: string;
+  pos3: string;
  firstSeen: number;
  lastSeen: number;
 }
@@ -129,11 +137,42 @@ interface QueuedKanjiWrite {
  lastSeen: number;
 }

+export interface CountedWordOccurrence {
+  headword: string;
+  word: string;
+  reading: string;
+  partOfSpeech: string;
+  pos1: string;
+  pos2: string;
+  pos3: string;
+  occurrenceCount: number;
+}
+
+export interface CountedKanjiOccurrence {
+  kanji: string;
+  occurrenceCount: number;
+}
+
+interface QueuedSubtitleLineWrite {
+  kind: 'subtitleLine';
+  sessionId: number;
+  videoId: number;
+  lineIndex: number;
+  segmentStartMs: number | null;
+  segmentEndMs: number | null;
+  text: string;
+  wordOccurrences: CountedWordOccurrence[];
+  kanjiOccurrences: CountedKanjiOccurrence[];
+  firstSeen: number;
+  lastSeen: number;
+}
+
 export type QueuedWrite =
  | QueuedTelemetryWrite
  | QueuedEventWrite
  | QueuedWordWrite
-  | QueuedKanjiWrite;
+  | QueuedKanjiWrite
+  | QueuedSubtitleLineWrite;

 export interface VideoMetadata {
  sourceType: number;
@@ -152,8 +191,33 @@ export interface VideoMetadata {
  metadataJson: string | null;
 }

+export interface ParsedAnimeVideoMetadata {
+  animeId: number | null;
+  parsedBasename: string | null;
+  parsedTitle: string | null;
+  parsedSeason: number | null;
+  parsedEpisode: number | null;
+  parserSource: string | null;
+  parserConfidence: number | null;
+  parseMetadataJson: string | null;
+}
+
+export interface ParsedAnimeVideoGuess {
+  parsedBasename: string | null;
+  parsedTitle: string;
+  parsedSeason: number | null;
+  parsedEpisode: number | null;
+  parserSource: 'guessit' | 'fallback';
+  parserConfidence: number;
+  parseMetadataJson: string;
+}
+
 export interface SessionSummaryQueryRow {
+  sessionId: number;
  videoId: number | null;
+  canonicalTitle: string | null;
+  animeId: number | null;
+  animeTitle: string | null;
  startedAtMs: number;
  endedAtMs: number | null;
  totalWatchedMs: number;
@@ -166,6 +230,82 @@ export interface SessionSummaryQueryRow {
  lookupHits: number;
 }

+export interface VocabularyStatsRow {
+  wordId: number;
+  headword: string;
+  word: string;
+  reading: string;
+  partOfSpeech: string | null;
+  pos1: string | null;
+  pos2: string | null;
+  pos3: string | null;
+  frequency: number;
+  firstSeen: number;
+  lastSeen: number;
+}
+
+export interface VocabularyCleanupSummary {
+  scanned: number;
+  kept: number;
+  deleted: number;
+  repaired: number;
+}
+
+export interface LegacyVocabularyPosRow {
+  headword: string;
+  word: string;
+  reading: string | null;
+}
+
+export interface LegacyVocabularyPosResolution {
+  headword: string;
+  reading: string;
+  partOfSpeech: string;
+  pos1: string;
+  pos2: string;
+  pos3: string;
+}
+
+export interface KanjiStatsRow {
+  kanjiId: number;
+  kanji: string;
+  frequency: number;
+  firstSeen: number;
+  lastSeen: number;
+}
+
+export interface WordOccurrenceRow {
+  animeId: number | null;
+  animeTitle: string | null;
+  videoId: number;
+  videoTitle: string;
+  sessionId: number;
+  lineIndex: number;
+  segmentStartMs: number | null;
+  segmentEndMs: number | null;
+  text: string;
+  occurrenceCount: number;
+}
+
+export interface KanjiOccurrenceRow {
+  animeId: number | null;
+  animeTitle: string | null;
+  videoId: number;
+  videoTitle: string;
+  sessionId: number;
+  lineIndex: number;
+  segmentStartMs: number | null;
+  segmentEndMs: number | null;
+  text: string;
+  occurrenceCount: number;
+}
+
+export interface SessionEventRow {
+  eventType: number;
+  tsMs: number;
+  payload: string | null;
+}
+
 export interface SessionTimelineRow {
  sampleMs: number;
  totalWatchedMs: number;
@@ -200,3 +340,180 @@ export interface ProbeMetadata {
  bitrateKbps: number | null;
  audioCodecId: number | null;
 }
+
+export interface MediaArtRow {
+  videoId: number;
+  anilistId: number | null;
+  coverUrl: string | null;
+  coverBlob: Buffer | null;
+  titleRomaji: string | null;
+  titleEnglish: string | null;
+  episodesTotal: number | null;
+  fetchedAtMs: number;
+}
+
+export interface MediaLibraryRow {
+  videoId: number;
+  canonicalTitle: string;
+  totalSessions: number;
+  totalActiveMs: number;
+  totalCards: number;
+  totalWordsSeen: number;
+  lastWatchedMs: number;
+  hasCoverArt: number;
+}
+
+export interface MediaDetailRow {
+  videoId: number;
+  canonicalTitle: string;
+  totalSessions: number;
+  totalActiveMs: number;
+  totalCards: number;
+  totalWordsSeen: number;
+  totalLinesSeen: number;
+  totalLookupCount: number;
+  totalLookupHits: number;
+}
+
+export interface AnimeLibraryRow {
+  animeId: number;
+  canonicalTitle: string;
+  anilistId: number | null;
+  totalSessions: number;
+  totalActiveMs: number;
+  totalCards: number;
+  totalWordsSeen: number;
+  episodeCount: number;
+  episodesTotal: number | null;
+  lastWatchedMs: number;
+}
+
+export interface AnimeDetailRow {
+  animeId: number;
+  canonicalTitle: string;
+  anilistId: number | null;
+  titleRomaji: string | null;
+  titleEnglish: string | null;
+  titleNative: string | null;
+  totalSessions: number;
+  totalActiveMs: number;
+  totalCards: number;
+  totalWordsSeen: number;
+  totalLinesSeen: number;
+  totalLookupCount: number;
+  totalLookupHits: number;
+  episodeCount: number;
+  lastWatchedMs: number;
+}
+
+export interface AnimeAnilistEntryRow {
+  anilistId: number;
+  titleRomaji: string | null;
+  titleEnglish: string | null;
+  season: number | null;
+}
+
+export interface AnimeEpisodeRow {
+  animeId: number;
+  videoId: number;
+  canonicalTitle: string;
+  parsedTitle: string | null;
+  season: number | null;
+  episode: number | null;
+  durationMs: number;
+  watched: number;
+  totalSessions: number;
+  totalActiveMs: number;
+  totalCards: number;
+  totalWordsSeen: number;
+  lastWatchedMs: number;
+}
+
+export interface StreakCalendarRow {
+  epochDay: number;
+  totalActiveMin: number;
+}
+
+export interface AnimeWordRow {
+  wordId: number;
+  headword: string;
+  word: string;
+  reading: string;
+  partOfSpeech: string | null;
+  frequency: number;
+}
+
+export interface EpisodesPerDayRow {
+  epochDay: number;
+  episodeCount: number;
+}
+
+export interface NewAnimePerDayRow {
+  epochDay: number;
+  newAnimeCount: number;
+}
+
+export interface WatchTimePerAnimeRow {
+  epochDay: number;
+  animeId: number;
+  animeTitle: string;
+  totalActiveMin: number;
+}
+
+export interface WordDetailRow {
+  wordId: number;
+  headword: string;
+  word: string;
+  reading: string;
+  partOfSpeech: string | null;
+  pos1: string | null;
+  pos2: string | null;
+  pos3: string | null;
+  frequency: number;
+  firstSeen: number;
+  lastSeen: number;
+}
+
+export interface WordAnimeAppearanceRow {
+  animeId: number;
+  animeTitle: string;
+  occurrenceCount: number;
+}
+
+export interface SimilarWordRow {
+  wordId: number;
+  headword: string;
+  word: string;
+  reading: string;
+  frequency: number;
+}
+
+export interface KanjiDetailRow {
+  kanjiId: number;
+  kanji: string;
+  frequency: number;
+  firstSeen: number;
+  lastSeen: number;
+}
+
+export interface KanjiAnimeAppearanceRow {
+  animeId: number;
+  animeTitle: string;
+  occurrenceCount: number;
+}
+
+export interface KanjiWordRow {
+  wordId: number;
+  headword: string;
+  word: string;
+  reading: string;
+  frequency: number;
+}
+
+export interface EpisodeCardEventRow {
+  eventId: number;
+  sessionId: number;
+  tsMs: number;
+  cardsDelta: number;
+  noteIds: number[];
+}
@@ -133,6 +133,17 @@ function isFrequencyExcludedByPos(
  );
 }

+export function shouldExcludeTokenFromVocabularyPersistence(
+  token: MergedToken,
+  options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
+): boolean {
+  return isFrequencyExcludedByPos(
+    token,
+    resolvePos1Exclusions(options),
+    resolvePos2Exclusions(options),
+  );
+}
+
 function applyFrequencyMarking(
  tokens: MergedToken[],
  pos1Exclusions: ReadonlySet<string>,
@@ -0,0 +1,56 @@
+import { PartOfSpeech } from '../../../types';
+
+function normalizePosTag(value: string | null | undefined): string {
+  return typeof value === 'string' ? value.trim() : '';
+}
+
+export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
+  return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
+}
+
+export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
+  switch (normalizePosTag(pos1)) {
+    case '名詞':
+      return PartOfSpeech.noun;
+    case '動詞':
+      return PartOfSpeech.verb;
+    case '形容詞':
+      return PartOfSpeech.i_adjective;
+    case '形状詞':
+    case '形容動詞':
+      return PartOfSpeech.na_adjective;
+    case '助詞':
+      return PartOfSpeech.particle;
+    case '助動詞':
+      return PartOfSpeech.bound_auxiliary;
+    case '記号':
+    case '補助記号':
+      return PartOfSpeech.symbol;
+    default:
+      return PartOfSpeech.other;
+  }
+}
+
+export function deriveStoredPartOfSpeech(input: {
+  partOfSpeech?: string | null;
+  pos1?: string | null;
+}): PartOfSpeech {
+  const pos1Parts = normalizePosTag(input.pos1)
+    .split('|')
+    .map((part) => part.trim())
+    .filter((part) => part.length > 0);
+
+  if (pos1Parts.length > 0) {
+    const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
+    if (derivedParts.length === 1) {
+      return derivedParts[0]!;
+    }
+    return PartOfSpeech.other;
+  }
+
+  if (isPartOfSpeechValue(input.partOfSpeech)) {
+    return input.partOfSpeech;
+  }
+
+  return PartOfSpeech.other;
+}
@@ -19,34 +19,12 @@
 import * as childProcess from 'child_process';
 import { PartOfSpeech, Token, MecabStatus } from './types';
 import { createLogger } from './logger';
+import { mapMecabPos1ToPartOfSpeech } from './core/services/tokenizer/part-of-speech';

 export { PartOfSpeech };

 const log = createLogger('mecab');

-function mapPartOfSpeech(pos1: string): PartOfSpeech {
-  switch (pos1) {
-    case '名詞':
-      return PartOfSpeech.noun;
-    case '動詞':
-      return PartOfSpeech.verb;
-    case '形容詞':
-      return PartOfSpeech.i_adjective;
-    case '形状詞':
-    case '形容動詞':
-      return PartOfSpeech.na_adjective;
-    case '助詞':
-      return PartOfSpeech.particle;
-    case '助動詞':
-      return PartOfSpeech.bound_auxiliary;
-    case '記号':
-    case '補助記号':
-      return PartOfSpeech.symbol;
-    default:
-      return PartOfSpeech.other;
-  }
-}
-
 export function parseMecabLine(line: string): Token | null {
  if (!line || line === 'EOS' || line.trim() === '') {
    return null;
@@ -73,7 +51,7 @@ export function parseMecabLine(line: string): Token | null {

  return {
    word: surface,
-    partOfSpeech: mapPartOfSpeech(pos1),
+    partOfSpeech: mapMecabPos1ToPartOfSpeech(pos1),
    pos1,
    pos2,
    pos3,
@@ -446,4 +424,4 @@ export class MecabTokenizer {
  }
 }

-export { mapPartOfSpeech };
+export { mapMecabPos1ToPartOfSpeech as mapPartOfSpeech };