feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
});
assert.deepEqual(result, {
title: 'Guessit Title',
season: null,
episode: 7,
source: 'guessit',
});
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
});
assert.deepEqual(result, {
title: 'My Anime',
season: 1,
episode: 3,
source: 'fallback',
});
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
]);
assert.deepEqual(result, {
title: 'Rascal Does Not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
});
assert.deepEqual(result, {
title: 'Rascal Does not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});

View File

@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
export interface AnilistMediaGuess {
title: string;
season: number | null;
episode: number | null;
source: 'guessit' | 'fallback';
}
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
};
}
function runGuessit(target: string): Promise<string> {
export function runGuessit(target: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.execFile(
'guessit',
@@ -73,9 +74,9 @@ function runGuessit(target: string): Promise<string> {
});
}
type GuessAnilistMediaInfoDeps = {
export interface GuessAnilistMediaInfoDeps {
runGuessit: (target: string) => Promise<string>;
};
}
function firstString(value: unknown): string | null {
if (typeof value === 'string') {
@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
const parsed = JSON.parse(stdout) as Record<string, unknown>;
const title = readGuessitTitle(parsed.title);
const episode = firstPositiveInteger(parsed.episode);
const season = firstPositiveInteger(parsed.season);
if (title) {
return { title, episode, source: 'guessit' };
return { title, season, episode, source: 'guessit' };
}
} catch {
// Ignore guessit failures and fall back to internal parser.
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
}
return {
title: parsed.title.trim(),
season: parsed.season,
episode: parsed.episode,
source: 'fallback',
};

View File

@@ -0,0 +1,244 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';
import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
import { Database } from '../immersion-tracker/sqlite.js';
import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
return path.join(dir, 'immersion.sqlite');
}
function cleanupDbPath(dbPath: string): void {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
}
test('stripFilenameTags normalizes common media-title formats', () => {
assert.equal(
stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
'The Eminence in Shadow',
);
assert.equal(
stripFilenameTags(
'[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags(
'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
),
'Little Witch Academia',
);
});
test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
canonicalTitle: 'Cover Fetcher Test',
sourcePath: '/tmp/cover-fetcher-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
upsertCoverArt(db, videoId, {
anilistId: 7,
coverUrl: 'https://images.test/cover.jpg',
coverBlob: null,
titleRomaji: 'Test Title',
titleEnglish: 'Test Title',
episodesTotal: 12,
});
const fetchCalls: string[] = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = (async (input: RequestInfo | URL) => {
const url = String(input);
fetchCalls.push(url);
assert.equal(url, 'https://images.test/cover.jpg');
return new Response(new Uint8Array([1, 2, 3, 4]), {
status: 200,
headers: { 'Content-Type': 'image/jpeg' },
});
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
);
const fetched = await fetcher.fetchIfMissing(
db,
videoId,
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
);
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(fetchCalls.length, 1);
assert.equal(stored?.coverBlob?.length, 4);
assert.equal(stored?.titleEnglish, 'Test Title');
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
function createJsonResponse(payload: unknown): Response {
return new Response(JSON.stringify(payload), {
status: 200,
headers: { 'content-type': 'application/json' },
});
}
test('fetchIfMissing uses guessit primary title and season when available', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
canonicalTitle:
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
sourcePath: '/tmp/cover-fetcher-season-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const searchCalls: Array<{ search: string }> = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
const search = payload.variables.search;
searchCalls.push({ search });
if (search.includes('Season 2')) {
return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
}
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 19,
episodes: 24,
coverImage: { large: 'https://images.test/cover.jpg', medium: null },
title: {
romaji: 'Little Witch Academia',
english: 'Little Witch Academia',
native: null,
},
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () =>
JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(searchCalls.length, 2);
assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
assert.equal(stored?.anilistId, 19);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
canonicalTitle: 'School Vlog S01E01',
sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
let requestCount = 0;
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
requestCount += 1;
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
assert.equal(payload.variables.search, 'School Vlog');
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 21,
episodes: 12,
coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(requestCount, 2);
assert.equal(stored?.anilistId, 21);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});

View File

@@ -0,0 +1,435 @@
import type { AnilistRateLimiter } from './rate-limiter';
import type { DatabaseSync } from '../immersion-tracker/sqlite';
import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from './anilist-updater';
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
const SEARCH_QUERY = `
query ($search: String!) {
Page(perPage: 5) {
media(search: $search, type: ANIME) {
id
episodes
season
seasonYear
coverImage { large medium }
title { romaji english native }
}
}
}
`;
interface AnilistMedia {
id: number;
episodes: number | null;
season: string | null;
seasonYear: number | null;
coverImage: { large: string | null; medium: string | null } | null;
title: { romaji: string | null; english: string | null; native: string | null } | null;
}
interface AnilistSearchResponse {
data?: {
Page?: {
media?: AnilistMedia[];
};
};
errors?: Array<{ message?: string }>;
}
export interface CoverArtFetcher {
fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
}
interface Logger {
info(msg: string, ...args: unknown[]): void;
warn(msg: string, ...args: unknown[]): void;
error(msg: string, ...args: unknown[]): void;
}
interface CoverArtCandidate {
title: string;
source: 'guessit' | 'fallback';
season: number | null;
episode: number | null;
}
interface CoverArtFetcherOptions {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
export function stripFilenameTags(raw: string): string {
let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
title = title.replace(/[._]+/g, ' ');
// Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
title = title.replace(/\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
title = title.replace(/\s+E\d+[: -].*$/i, '');
title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
// Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
// Remove common codec/source tags that may appear without brackets
title = title.replace(
/\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
'',
);
// Remove trailing dashes and group tags like "-Retr0"
title = title.replace(/\s*-\s*[\w]+$/, '');
return title.trim().replace(/\s{2,}/g, ' ');
}
function removeSeasonHint(title: string): string {
return title
.replace(/\bseason\s*\d+\b/gi, '')
.replace(/\s{2,}/g, ' ')
.trim();
}
function normalizeTitle(text: string): string {
return text.trim().toLowerCase().replace(/\s+/g, ' ');
}
function extractCandidateSeasonHints(text: string): Set<number> {
const normalized = normalizeTitle(text);
const matches = [
...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
];
const values = new Set<number>();
for (const match of matches) {
const value = Number.parseInt(match[1]!, 10);
if (Number.isInteger(value)) {
values.add(value);
}
}
return values;
}
function isSeasonMentioned(titles: string[], season: number | null): boolean {
if (!season) {
return false;
}
const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
return hints.includes(season);
}
function pickBestSearchResult(
title: string,
episode: number | null,
season: number | null,
media: AnilistMedia[],
): { id: number; title: string } | null {
const cleanedTitle = removeSeasonHint(title);
const targets = [title, cleanedTitle]
.map(normalizeTitle)
.map((value) => value.trim())
.filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
const filtered =
episode === null
? media
: media.filter((item) => {
const total = item.episodes;
return total === null || total >= episode;
});
const candidates = filtered.length > 0 ? filtered : media;
if (candidates.length === 0) {
return null;
}
const scored = candidates.map((item) => {
const candidateTitles = [item.title?.romaji, item.title?.english, item.title?.native]
.filter((value): value is string => typeof value === 'string')
.map((value) => normalizeTitle(value));
let score = 0;
for (const target of targets) {
if (candidateTitles.includes(target)) {
score += 120;
continue;
}
if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
score += 30;
}
if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
score += 10;
}
}
if (episode !== null && item.episodes === episode) {
score += 20;
}
if (season !== null && isSeasonMentioned(candidateTitles, season)) {
score += 15;
}
return { item, score };
});
scored.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
return b.item.id - a.item.id;
});
const selected = scored[0]!;
const selectedTitle =
selected.item.title?.english ??
selected.item.title?.romaji ??
selected.item.title?.native ??
title;
return { id: selected.item.id, title: selectedTitle };
}
function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
const candidateTitles = [
...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
? [`${parsed.title} Season ${parsed.season}`]
: []),
parsed.title,
];
return candidateTitles
.map((title) => title.trim())
.filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
}
async function searchAnilist(
rateLimiter: AnilistRateLimiter,
title: string,
): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
await rateLimiter.acquire();
const res = await fetch(ANILIST_GRAPHQL_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
});
rateLimiter.recordResponse(res.headers);
if (res.status === 429) {
return { media: [], rateLimited: true };
}
if (!res.ok) {
throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
}
const json = (await res.json()) as AnilistSearchResponse;
const mediaList = json.data?.Page?.media;
if (!mediaList || mediaList.length === 0) {
return { media: [], rateLimited: false };
}
return { media: mediaList, rateLimited: false };
}
async function downloadImage(url: string): Promise<Buffer | null> {
try {
const res = await fetch(url);
if (!res.ok) return null;
const arrayBuf = await res.arrayBuffer();
return Buffer.from(arrayBuf);
} catch {
return null;
}
}
export function createCoverArtFetcher(
rateLimiter: AnilistRateLimiter,
logger: Logger,
options: CoverArtFetcherOptions = {},
): CoverArtFetcher {
const resolveCanonicalTitle = (
db: DatabaseSync,
videoId: number,
fallbackTitle: string,
): string => {
const row = db
.prepare(
`
SELECT canonical_title AS canonicalTitle
FROM imm_videos
WHERE video_id = ?
LIMIT 1
`,
)
.get(videoId) as { canonicalTitle: string | null } | undefined;
return row?.canonicalTitle?.trim() || fallbackTitle;
};
const resolveMediaInfo = async (
db: DatabaseSync,
videoId: number,
canonicalTitle: string,
): Promise<CoverArtCandidate | null> => {
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
const parsed = await guessAnilistMediaInfo(null, effectiveTitle, {
runGuessit: options.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
return {
title: parsed.title,
season: parsed.season,
episode: parsed.episode,
source: parsed.source,
};
};
return {
async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
const existing = getCoverArt(db, videoId);
if (existing?.coverBlob) {
return true;
}
if (existing?.coverUrl) {
const coverBlob = await downloadImage(existing.coverUrl);
if (coverBlob) {
upsertCoverArt(db, videoId, {
anilistId: existing.anilistId,
coverUrl: existing.coverUrl,
coverBlob,
titleRomaji: existing.titleRomaji,
titleEnglish: existing.titleEnglish,
episodesTotal: existing.episodesTotal,
});
return true;
}
}
if (
existing &&
existing.coverUrl === null &&
existing.anilistId === null &&
Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
) {
return false;
}
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
const cleaned = stripFilenameTags(effectiveTitle);
if (!cleaned) {
logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const parsedInfo = await resolveMediaInfo(db, videoId, canonicalTitle);
const searchBase = parsedInfo?.title ?? cleaned;
const searchCandidates = parsedInfo ? buildSearchCandidates(parsedInfo) : [cleaned];
const effectiveCandidates = searchCandidates.includes(cleaned)
? searchCandidates
: [...searchCandidates, cleaned];
let selected: AnilistMedia | null = null;
let rateLimited = false;
for (const candidate of effectiveCandidates) {
logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
try {
const result = await searchAnilist(rateLimiter, candidate);
rateLimited = result.rateLimited;
if (result.media.length === 0) {
continue;
}
const picked = pickBestSearchResult(
searchBase,
parsedInfo?.episode ?? null,
parsedInfo?.season ?? null,
result.media,
);
if (picked) {
const match = result.media.find((media) => media.id === picked.id);
if (match) {
selected = match;
break;
}
}
} catch (err) {
logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
return false;
}
}
if (rateLimited) {
logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
return false;
}
if (!selected) {
logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
let coverBlob: Buffer | null = null;
if (coverUrl) {
coverBlob = await downloadImage(coverUrl);
}
upsertCoverArt(db, videoId, {
anilistId: selected.id,
coverUrl,
coverBlob,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
episodesTotal: selected.episodes ?? null,
});
updateAnimeAnilistInfo(db, videoId, {
anilistId: selected.id,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
titleNative: selected.title?.native ?? null,
episodesTotal: selected.episodes ?? null,
});
logger.info(
'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
videoId,
selected.id,
selected.title?.romaji ?? searchBase,
);
return true;
},
};
}

View File

@@ -0,0 +1,72 @@
const DEFAULT_MAX_PER_MINUTE = 20;
const WINDOW_MS = 60_000;
const SAFETY_REMAINING_THRESHOLD = 5;
export interface AnilistRateLimiter {
acquire(): Promise<void>;
recordResponse(headers: Headers): void;
}
export function createAnilistRateLimiter(
maxPerMinute = DEFAULT_MAX_PER_MINUTE,
): AnilistRateLimiter {
const timestamps: number[] = [];
let pauseUntilMs = 0;
function pruneOld(now: number): void {
const cutoff = now - WINDOW_MS;
while (timestamps.length > 0 && timestamps[0]! < cutoff) {
timestamps.shift();
}
}
return {
async acquire(): Promise<void> {
const now = Date.now();
if (now < pauseUntilMs) {
const waitMs = pauseUntilMs - now;
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
pruneOld(Date.now());
if (timestamps.length >= maxPerMinute) {
const oldest = timestamps[0]!;
const waitMs = oldest + WINDOW_MS - Date.now() + 100;
if (waitMs > 0) {
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
pruneOld(Date.now());
}
timestamps.push(Date.now());
},
recordResponse(headers: Headers): void {
const remaining = headers.get('x-ratelimit-remaining');
if (remaining !== null) {
const n = parseInt(remaining, 10);
if (Number.isFinite(n) && n < SAFETY_REMAINING_THRESHOLD) {
const reset = headers.get('x-ratelimit-reset');
if (reset) {
const resetMs = parseInt(reset, 10) * 1000;
if (Number.isFinite(resetMs)) {
pauseUntilMs = Math.max(pauseUntilMs, resetMs);
}
} else {
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + WINDOW_MS);
}
}
}
const retryAfter = headers.get('retry-after');
if (retryAfter) {
const seconds = parseInt(retryAfter, 10);
if (Number.isFinite(seconds) && seconds > 0) {
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + seconds * 1000);
}
}
},
};
}

View File

@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,

View File

@@ -176,6 +176,22 @@ test('runAppReadyRuntime skips heavy startup when shouldSkipHeavyStartup returns
assert.ok(calls.indexOf('handleFirstRunSetup') < calls.indexOf('handleInitialArgs'));
});
test('runAppReadyRuntime uses minimal startup for texthooker-only mode', async () => {
const { deps, calls } = makeDeps({
texthookerOnlyMode: true,
reloadConfig: () => calls.push('reloadConfig'),
handleInitialArgs: () => calls.push('handleInitialArgs'),
});
await runAppReadyRuntime(deps);
assert.deepEqual(calls, [
'ensureDefaultConfigBootstrap',
'reloadConfig',
'handleInitialArgs',
]);
});
test('runAppReadyRuntime skips Jellyfin remote startup when dependency is not wired', async () => {
const { deps, calls } = makeDeps({
startJellyfinRemoteSession: undefined,

View File

@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,
@@ -177,6 +178,9 @@ function createDeps(overrides: Partial<CliCommandServiceDeps> = {}) {
mediaTitle: 'Test',
entryCount: 10,
}),
runStatsCommand: async () => {
calls.push('runStatsCommand');
},
runJellyfinCommand: async () => {
calls.push('runJellyfinCommand');
},
@@ -249,6 +253,21 @@ test('handleCliCommand opens first-run setup window for --setup', () => {
assert.equal(calls.includes('openYomitanSettingsDelayed:1000'), false);
});
test('handleCliCommand dispatches stats command without overlay startup', async () => {
const { deps, calls } = createDeps({
runStatsCommand: async () => {
calls.push('runStatsCommand');
},
});
handleCliCommand(makeArgs({ stats: true }), 'initial', deps);
await Promise.resolve();
assert.ok(calls.includes('runStatsCommand'));
assert.equal(calls.includes('initializeOverlayRuntime'), false);
assert.equal(calls.includes('connectMpvClient'), false);
});
test('handleCliCommand applies cli log level for second-instance commands', () => {
const { deps, calls } = createDeps({
setLogLevel: (level) => {
@@ -520,8 +539,21 @@ test('handleCliCommand runs refresh-known-words command', () => {
assert.ok(calls.includes('refreshKnownWords'));
});
test('handleCliCommand stops app after headless initial refresh-known-words completes', async () => {
const { deps, calls } = createDeps({
hasMainWindow: () => false,
});
handleCliCommand(makeArgs({ refreshKnownWords: true }), 'initial', deps);
await new Promise((resolve) => setImmediate(resolve));
assert.ok(calls.includes('refreshKnownWords'));
assert.ok(calls.includes('stopApp'));
});
test('handleCliCommand reports async refresh-known-words errors to OSD', async () => {
const { deps, calls, osd } = createDeps({
hasMainWindow: () => false,
refreshKnownWords: async () => {
throw new Error('refresh boom');
},
@@ -532,4 +564,5 @@ test('handleCliCommand reports async refresh-known-words errors to OSD', async (
assert.ok(calls.some((value) => value.startsWith('error:refreshKnownWords failed:')));
assert.ok(osd.some((value) => value.includes('Refresh known words failed: refresh boom')));
assert.ok(calls.includes('stopApp'));
});

View File

@@ -61,6 +61,7 @@ export interface CliCommandServiceDeps {
mediaTitle: string;
entryCount: number;
}>;
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
runJellyfinCommand: (args: CliArgs) => Promise<void>;
printHelp: () => void;
hasMainWindow: () => boolean;
@@ -154,6 +155,7 @@ export interface CliCommandDepsRuntimeOptions {
};
jellyfin: {
openSetup: () => void;
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
runCommand: (args: CliArgs) => Promise<void>;
};
ui: UiCliRuntime;
@@ -222,6 +224,7 @@ export function createCliCommandDepsRuntime(
getAnilistQueueStatus: options.anilist.getQueueStatus,
retryAnilistQueue: options.anilist.retryQueueNow,
generateCharacterDictionary: options.dictionary.generate,
runStatsCommand: options.jellyfin.runStatsCommand,
runJellyfinCommand: options.jellyfin.runCommand,
printHelp: options.ui.printHelp,
hasMainWindow: options.app.hasMainWindow,
@@ -331,12 +334,18 @@ export function handleCliCommand(
'Update failed',
);
} else if (args.refreshKnownWords) {
runAsyncWithOsd(
() => deps.refreshKnownWords(),
deps,
'refreshKnownWords',
'Refresh known words failed',
);
const shouldStopAfterRun = source === 'initial' && !deps.hasMainWindow();
deps
.refreshKnownWords()
.catch((err) => {
deps.error('refreshKnownWords failed:', err);
deps.showMpvOsd(`Refresh known words failed: ${(err as Error).message}`);
})
.finally(() => {
if (shouldStopAfterRun) {
deps.stopApp();
}
});
} else if (args.toggleSecondarySub) {
deps.cycleSecondarySubMode();
} else if (args.triggerFieldGrouping) {
@@ -410,6 +419,8 @@ export function handleCliCommand(
deps.stopApp();
}
});
} else if (args.stats) {
void deps.runStatsCommand(args, source);
} else if (args.anilistRetryQueue) {
const queueStatus = deps.getAnilistQueueStatus();
deps.log(

View File

@@ -130,6 +130,56 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r
assert.equal(lookup('高み'), 9933);
});
test('createFrequencyDictionaryLookup uses leading display digits for displayValue strings', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['潜む', 1, { frequency: { value: 121, displayValue: '118,121' } }],
['例', 2, { frequency: { value: 1234, displayValue: '1,234' } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: () => undefined,
});
assert.equal(lookup('潜む'), 118);
assert.equal(lookup('例'), 1);
});
test('createFrequencyDictionaryLookup ignores occurrence-based Yomitan dictionaries', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
fs.writeFileSync(
path.join(tempDir, 'index.json'),
JSON.stringify({
title: 'CC100',
revision: '1',
frequencyMode: 'occurrence-based',
}),
);
fs.writeFileSync(
path.join(tempDir, 'term_meta_bank_1.json'),
JSON.stringify([['潜む', 1, { frequency: { value: 118121 } }]]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('潜む'), null);
assert.equal(
logs.some((entry) => entry.includes('occurrence-based') && entry.includes('CC100')),
true,
);
});
test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');

View File

@@ -6,6 +6,8 @@ export interface FrequencyDictionaryLookupOptions {
log: (message: string) => void;
}
type FrequencyDictionaryMode = 'occurrence-based' | 'rank-based';
interface FrequencyDictionaryEntry {
rank: number;
term: string;
@@ -29,30 +31,67 @@ function normalizeFrequencyTerm(value: string): string {
return value.trim().toLowerCase();
}
async function readDictionaryMetadata(
dictionaryPath: string,
log: (message: string) => void,
): Promise<{ title: string | null; frequencyMode: FrequencyDictionaryMode | null }> {
const indexPath = path.join(dictionaryPath, 'index.json');
let rawText: string;
try {
rawText = await fs.readFile(indexPath, 'utf-8');
} catch (error) {
if (isErrorCode(error, 'ENOENT')) {
return { title: null, frequencyMode: null };
}
log(`Failed to read frequency dictionary index ${indexPath}: ${String(error)}`);
return { title: null, frequencyMode: null };
}
let rawIndex: unknown;
try {
rawIndex = JSON.parse(rawText) as unknown;
} catch {
log(`Failed to parse frequency dictionary index as JSON: ${indexPath}`);
return { title: null, frequencyMode: null };
}
if (!rawIndex || typeof rawIndex !== 'object') {
return { title: null, frequencyMode: null };
}
const titleRaw = (rawIndex as { title?: unknown }).title;
const frequencyModeRaw = (rawIndex as { frequencyMode?: unknown }).frequencyMode;
return {
title: typeof titleRaw === 'string' && titleRaw.trim().length > 0 ? titleRaw.trim() : null,
frequencyMode:
frequencyModeRaw === 'occurrence-based' || frequencyModeRaw === 'rank-based'
? frequencyModeRaw
: null,
};
}
function parsePositiveFrequencyString(value: string): number | null {
const trimmed = value.trim();
if (!trimmed) {
return null;
}
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
if (!numericPrefix) {
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) {
return null;
}
const chunks = numericPrefix.split(',');
const normalizedNumber =
chunks.length <= 1
? (chunks[0] ?? '')
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
? chunks.join('')
: (chunks[0] ?? '');
const parsed = Number.parseInt(normalizedNumber, 10);
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) {
return null;
}
return parsed;
const normalized = Math.floor(parsed);
if (!Number.isFinite(normalized) || normalized <= 0) {
return null;
}
return normalized;
}
function parsePositiveFrequencyNumber(value: unknown): number | null {
@@ -68,18 +107,32 @@ function parsePositiveFrequencyNumber(value: unknown): number | null {
return null;
}
function parseDisplayFrequencyNumber(value: unknown): number | null {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) {
return null;
}
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyNumber(value);
}
function extractFrequencyDisplayValue(meta: unknown): number | null {
if (!meta || typeof meta !== 'object') return null;
const frequency = (meta as { frequency?: unknown }).frequency;
if (!frequency || typeof frequency !== 'object') return null;
const rawValue = (frequency as { value?: unknown }).value;
const parsedRawValue = parsePositiveFrequencyNumber(rawValue);
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
const parsedDisplayValue = parsePositiveFrequencyNumber(displayValue);
const parsedDisplayValue = parseDisplayFrequencyNumber(displayValue);
if (parsedDisplayValue !== null) {
return parsedDisplayValue;
}
const rawValue = (frequency as { value?: unknown }).value;
return parsePositiveFrequencyNumber(rawValue);
return parsedRawValue;
}
function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null {
@@ -141,6 +194,15 @@ async function collectDictionaryFromPath(
log: (message: string) => void,
): Promise<Map<string, number>> {
const terms = new Map<string, number>();
const metadata = await readDictionaryMetadata(dictionaryPath, log);
if (metadata.frequencyMode === 'occurrence-based') {
log(
`Skipping occurrence-based frequency dictionary ${
metadata.title ?? dictionaryPath
}; SubMiner frequency tags require rank-based values.`,
);
return terms;
}
let fileNames: string[];
try {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,71 @@
import type { Token } from '../../../types';
import type { LegacyVocabularyPosResolution } from './types';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
function normalizeLookupText(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
function katakanaToHiragana(text: string): string {
let normalized = '';
for (const char of text) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
continue;
}
normalized += char;
}
return normalized;
}
function toResolution(token: Token): LegacyVocabularyPosResolution {
return {
headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
partOfSpeech: deriveStoredPartOfSpeech({
partOfSpeech: token.partOfSpeech,
pos1: token.pos1,
}),
pos1: normalizeLookupText(token.pos1),
pos2: normalizeLookupText(token.pos2),
pos3: normalizeLookupText(token.pos3),
};
}
export function resolveLegacyVocabularyPosFromTokens(
lookupText: string,
tokens: Token[] | null,
): LegacyVocabularyPosResolution | null {
const normalizedLookup = normalizeLookupText(lookupText);
if (!normalizedLookup || !tokens || tokens.length === 0) {
return null;
}
const exactSurfaceMatches = tokens.filter(
(token) => normalizeLookupText(token.word) === normalizedLookup,
);
if (exactSurfaceMatches.length === 1) {
return toResolution(exactSurfaceMatches[0]!);
}
const exactHeadwordMatches = tokens.filter(
(token) => normalizeLookupText(token.headword) === normalizedLookup,
);
if (exactHeadwordMatches.length === 1) {
return toResolution(exactHeadwordMatches[0]!);
}
if (tokens.length === 1) {
return toResolution(tokens[0]!);
}
return null;
}

View File

@@ -0,0 +1,569 @@
import type { DatabaseSync } from './sqlite';
import { finalizeSessionRecord } from './session';
import type { LifetimeRebuildSummary, SessionState } from './types';
interface TelemetryRow {
active_watched_ms: number | null;
cards_mined: number | null;
lines_seen: number | null;
tokens_seen: number | null;
}
interface VideoRow {
anime_id: number | null;
watched: number;
}
interface AnimeRow {
episodes_total: number | null;
}
function asPositiveNumber(value: number | null, fallback: number): number {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
return Math.max(0, Math.floor(value));
}
interface ExistenceRow {
count: number;
}
interface LifetimeMediaStateRow {
completed: number;
}
interface LifetimeAnimeStateRow {
episodes_completed: number;
}
interface RetainedSessionRow {
sessionId: number;
videoId: number;
startedAtMs: number;
endedAtMs: number;
lastMediaMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
seekBackwardCount: number;
mediaBufferEvents: number;
}
function hasRetainedPriorSession(
db: DatabaseSync,
videoId: number,
startedAtMs: number,
currentSessionId: number,
): boolean {
return (
Number(
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE video_id = ?
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(videoId, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count ?? 0,
) > 0
);
}
function isFirstSessionForLocalDay(
db: DatabaseSync,
currentSessionId: number,
startedAtMs: number,
): boolean {
return (
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE CAST(strftime('%s', started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
= CAST(strftime('%s', ? / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(startedAtMs, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count === 0
);
}
function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
db.exec(`
DELETE FROM imm_lifetime_anime;
DELETE FROM imm_lifetime_media;
DELETE FROM imm_lifetime_applied_sessions;
`);
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = 0,
total_active_ms = 0,
total_cards = 0,
active_days = 0,
episodes_started = 0,
episodes_completed = 0,
anime_completed = 0,
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(nowMs, nowMs);
}
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
return {
sessionId: row.sessionId,
videoId: row.videoId,
startedAtMs: row.startedAtMs,
currentLineIndex: 0,
lastWallClockMs: row.endedAtMs,
lastMediaMs: row.lastMediaMs,
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: false,
markedWatched: false,
totalWatchedMs: Math.max(0, row.totalWatchedMs),
activeWatchedMs: Math.max(0, row.activeWatchedMs),
linesSeen: Math.max(0, row.linesSeen),
tokensSeen: Math.max(0, row.tokensSeen),
cardsMined: Math.max(0, row.cardsMined),
lookupCount: Math.max(0, row.lookupCount),
lookupHits: Math.max(0, row.lookupHits),
yomitanLookupCount: Math.max(0, row.yomitanLookupCount),
pauseCount: Math.max(0, row.pauseCount),
pauseMs: Math.max(0, row.pauseMs),
seekForwardCount: Math.max(0, row.seekForwardCount),
seekBackwardCount: Math.max(0, row.seekBackwardCount),
mediaBufferEvents: Math.max(0, row.mediaBufferEvents),
};
}
function getRetainedStaleActiveSessions(db: DatabaseSync): RetainedSessionRow[] {
return db
.prepare(
`
SELECT
s.session_id AS sessionId,
s.video_id AS videoId,
s.started_at_ms AS startedAtMs,
COALESCE(t.sample_ms, s.LAST_UPDATE_DATE, s.started_at_ms) AS endedAtMs,
s.ended_media_ms AS lastMediaMs,
COALESCE(t.total_watched_ms, s.total_watched_ms, 0) AS totalWatchedMs,
COALESCE(t.active_watched_ms, s.active_watched_ms, 0) AS activeWatchedMs,
COALESCE(t.lines_seen, s.lines_seen, 0) AS linesSeen,
COALESCE(t.tokens_seen, s.tokens_seen, 0) AS tokensSeen,
COALESCE(t.cards_mined, s.cards_mined, 0) AS cardsMined,
COALESCE(t.lookup_count, s.lookup_count, 0) AS lookupCount,
COALESCE(t.lookup_hits, s.lookup_hits, 0) AS lookupHits,
COALESCE(t.yomitan_lookup_count, s.yomitan_lookup_count, 0) AS yomitanLookupCount,
COALESCE(t.pause_count, s.pause_count, 0) AS pauseCount,
COALESCE(t.pause_ms, s.pause_ms, 0) AS pauseMs,
COALESCE(t.seek_forward_count, s.seek_forward_count, 0) AS seekForwardCount,
COALESCE(t.seek_backward_count, s.seek_backward_count, 0) AS seekBackwardCount,
COALESCE(t.media_buffer_events, s.media_buffer_events, 0) AS mediaBufferEvents
FROM imm_sessions s
LEFT JOIN imm_session_telemetry t
ON t.telemetry_id = (
SELECT telemetry_id
FROM imm_session_telemetry
WHERE session_id = s.session_id
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
)
WHERE s.ended_at_ms IS NULL
ORDER BY s.started_at_ms ASC, s.session_id ASC
`,
)
.all() as RetainedSessionRow[];
}
function upsertLifetimeMedia(
db: DatabaseSync,
videoId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
completed: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_media(
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(video_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
completed = MAX(completed, excluded.completed),
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
videoId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
completed,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
function upsertLifetimeAnime(
db: DatabaseSync,
animeId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
episodesStartedDelta: number,
episodesCompletedDelta: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_anime(
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(anime_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
episodes_started = episodes_started + excluded.episodes_started,
episodes_completed = episodes_completed + excluded.episodes_completed,
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
animeId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
episodesStartedDelta,
episodesCompletedDelta,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
export function applySessionLifetimeSummary(
db: DatabaseSync,
session: SessionState,
endedAtMs: number,
): void {
const applyResult = db
.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (
?, ?, ?, ?
)
ON CONFLICT(session_id) DO NOTHING
`,
)
.run(session.sessionId, endedAtMs, Date.now(), Date.now());
if ((applyResult.changes ?? 0) <= 0) {
return;
}
const telemetry = db
.prepare(
`
SELECT
active_watched_ms,
cards_mined,
lines_seen,
tokens_seen
FROM imm_session_telemetry
WHERE session_id = ?
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
`,
)
.get(session.sessionId) as TelemetryRow | null;
const video = db
.prepare('SELECT anime_id, watched FROM imm_videos WHERE video_id = ?')
.get(session.videoId) as VideoRow | null;
const mediaLifetime =
(db
.prepare('SELECT completed FROM imm_lifetime_media WHERE video_id = ?')
.get(session.videoId) as LifetimeMediaStateRow | null | undefined) ?? null;
const animeLifetime = video?.anime_id
? ((db
.prepare('SELECT episodes_completed FROM imm_lifetime_anime WHERE anime_id = ?')
.get(video.anime_id) as LifetimeAnimeStateRow | null | undefined) ?? null)
: null;
const anime = video?.anime_id
? ((db
.prepare('SELECT episodes_total FROM imm_anime WHERE anime_id = ?')
.get(video.anime_id) as AnimeRow | null | undefined) ?? null)
: null;
const activeMs = telemetry
? asPositiveNumber(telemetry.active_watched_ms, session.activeWatchedMs)
: session.activeWatchedMs;
const cardsMined = telemetry
? asPositiveNumber(telemetry.cards_mined, session.cardsMined)
: session.cardsMined;
const linesSeen = telemetry
? asPositiveNumber(telemetry.lines_seen, session.linesSeen)
: session.linesSeen;
const tokensSeen = telemetry
? asPositiveNumber(telemetry.tokens_seen, session.tokensSeen)
: session.tokensSeen;
const watched = video?.watched ?? 0;
const isFirstSessionForVideoRun =
mediaLifetime === null &&
!hasRetainedPriorSession(db, session.videoId, session.startedAtMs, session.sessionId);
const isFirstCompletedSessionForVideoRun =
watched > 0 && Number(mediaLifetime?.completed ?? 0) <= 0;
const isFirstSessionForDay = isFirstSessionForLocalDay(
db,
session.sessionId,
session.startedAtMs,
);
const episodesCompletedBefore = Number(animeLifetime?.episodes_completed ?? 0);
const animeEpisodesTotal = anime?.episodes_total ?? null;
const animeCompletedDelta =
watched > 0 &&
isFirstCompletedSessionForVideoRun &&
animeEpisodesTotal !== null &&
animeEpisodesTotal > 0 &&
episodesCompletedBefore < animeEpisodesTotal &&
episodesCompletedBefore + 1 >= animeEpisodesTotal
? 1
: 0;
const nowMs = Date.now();
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + ?,
total_cards = total_cards + ?,
active_days = active_days + ?,
episodes_started = episodes_started + ?,
episodes_completed = episodes_completed + ?,
anime_completed = anime_completed + ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(
activeMs,
cardsMined,
isFirstSessionForDay ? 1 : 0,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
animeCompletedDelta,
nowMs,
);
upsertLifetimeMedia(
db,
session.videoId,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
watched > 0 ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
if (video?.anime_id) {
upsertLifetimeAnime(
db,
video.anime_id,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
}
}
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
const rebuiltAtMs = Date.now();
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
db.exec('BEGIN');
try {
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
}
export function reconcileStaleActiveSessions(db: DatabaseSync): number {
const sessions = getRetainedStaleActiveSessions(db);
if (sessions.length === 0) {
return 0;
}
db.exec('BEGIN');
try {
for (const session of sessions) {
const state = toRebuildSessionState(session);
finalizeSessionRecord(db, state, session.endedAtMs);
applySessionLifetimeSummary(db, state, session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return sessions.length;
}
export function shouldBackfillLifetimeSummaries(db: DatabaseSync): boolean {
const globalRow = db
.prepare('SELECT total_sessions AS totalSessions FROM imm_lifetime_global WHERE global_id = 1')
.get() as { totalSessions: number } | null;
const appliedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_lifetime_applied_sessions')
.get() as ExistenceRow | null;
const endedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_sessions WHERE ended_at_ms IS NOT NULL')
.get() as ExistenceRow | null;
const totalSessions = Number(globalRow?.totalSessions ?? 0);
const appliedSessions = Number(appliedRow?.count ?? 0);
const retainedEndedSessions = Number(endedRow?.count ?? 0);
return retainedEndedSessions > 0 && (appliedSessions === 0 || totalSessions === 0);
}

View File

@@ -0,0 +1,200 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { Database } from './sqlite';
import {
pruneRawRetention,
pruneRollupRetention,
runOptimizeMaintenance,
toMonthKey,
} from './maintenance';
import { ensureSchema } from './storage';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-maintenance-test-'));
return path.join(dir, 'tracker.db');
}
function cleanupDbPath(dbPath: string): void {
try {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
} catch {
// best effort
}
}
test('pruneRawRetention uses session retention separately from telemetry retention', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = 90 * 86_400_000;
const staleEndedAtMs = nowMs - 40 * 86_400_000;
const keptEndedAtMs = nowMs - 5 * 86_400_000;
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, 'session-1', 1, ${staleEndedAtMs - 1_000}, ${staleEndedAtMs}, 2, ${staleEndedAtMs}, ${staleEndedAtMs}),
(2, 'session-2', 1, ${keptEndedAtMs - 1_000}, ${keptEndedAtMs}, 2, ${keptEndedAtMs}, ${keptEndedAtMs});
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, ${nowMs - 2 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}),
(2, ${nowMs - 12 * 60 * 60 * 1000}, 0, 0, ${nowMs}, ${nowMs});
`);
const result = pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 1 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const remainingSessions = db
.prepare('SELECT session_id FROM imm_sessions ORDER BY session_id')
.all() as Array<{ session_id: number }>;
const remainingTelemetry = db
.prepare('SELECT session_id FROM imm_session_telemetry ORDER BY session_id')
.all() as Array<{ session_id: number }>;
assert.equal(result.deletedTelemetryRows, 1);
assert.equal(result.deletedEndedSessions, 1);
assert.deepEqual(
remainingSessions.map((row) => row.session_id),
[2],
);
assert.deepEqual(
remainingTelemetry.map((row) => row.session_id),
[2],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('raw retention keeps rollups and rollup retention prunes them separately', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = Date.UTC(2026, 2, 16, 12, 0, 0, 0);
const oldDay = Math.floor((nowMs - 90 * 86_400_000) / 86_400_000);
const oldMonth = toMonthKey(nowMs - 400 * 86_400_000);
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'session-1', 1, ${nowMs - 90 * 86_400_000}, ${nowMs - 90 * 86_400_000 + 1_000}, 2, ${nowMs}, ${nowMs}
);
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, ${nowMs - 90 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards
) VALUES (
${oldDay}, 1, 1, 10, 1, 1, 1
);
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
${oldMonth}, 1, 1, 10, 1, 1, 1, ${nowMs}, ${nowMs}
);
`);
pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 30 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const rollupsAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupsAfterRawPrune?.total, 1);
assert.equal(monthlyAfterRawPrune?.total, 1);
const rollupPrune = pruneRollupRetention(db, nowMs, {
dailyRollupRetentionMs: 30 * 86_400_000,
monthlyRollupRetentionMs: 365 * 86_400_000,
});
const rollupsAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupPrune.deletedDailyRows, 1);
assert.equal(rollupPrune.deletedMonthlyRows, 1);
assert.equal(rollupsAfterRollupPrune?.total, 0);
assert.equal(monthlyAfterRollupPrune?.total, 0);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds sample_ms index for telemetry rollup scans', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexes = db.prepare("PRAGMA index_list('imm_session_telemetry')").all() as Array<{
name: string;
}>;
const hasSampleMsIndex = indexes.some((row) => row.name === 'idx_telemetry_sample_ms');
assert.equal(hasSampleMsIndex, true);
const indexColumns = db.prepare("PRAGMA index_info('idx_telemetry_sample_ms')").all() as Array<{
name: string;
}>;
assert.deepEqual(
indexColumns.map((column) => column.name),
['sample_ms'],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('runOptimizeMaintenance executes PRAGMA optimize', () => {
const executedSql: string[] = [];
const db = {
exec(source: string) {
executedSql.push(source);
return this;
},
} as unknown as Parameters<typeof runOptimizeMaintenance>[0];
runOptimizeMaintenance(db);
assert.deepEqual(executedSql, ['PRAGMA optimize']);
});

View File

@@ -18,11 +18,9 @@ interface RollupTelemetryResult {
maxSampleMs: number | null;
}
interface RetentionResult {
interface RawRetentionResult {
deletedSessionEvents: number;
deletedTelemetryRows: number;
deletedDailyRows: number;
deletedMonthlyRows: number;
deletedEndedSessions: number;
}
@@ -31,20 +29,18 @@ export function toMonthKey(timestampMs: number): number {
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
}
export function pruneRetention(
export function pruneRawRetention(
db: DatabaseSync,
nowMs: number,
policy: {
eventsRetentionMs: number;
telemetryRetentionMs: number;
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
sessionsRetentionMs: number;
},
): RetentionResult {
): RawRetentionResult {
const eventCutoff = nowMs - policy.eventsRetentionMs;
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
const sessionsCutoff = nowMs - policy.sessionsRetentionMs;
const deletedSessionEvents = (
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
@@ -56,28 +52,49 @@ export function pruneRetention(
changes: number;
}
).changes;
const deletedDailyRows = (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
).changes;
const deletedMonthlyRows = (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(monthCutoff)) as { changes: number }
).changes;
const deletedEndedSessions = (
db
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
.run(telemetryCutoff) as { changes: number }
.run(sessionsCutoff) as { changes: number }
).changes;
return {
deletedSessionEvents,
deletedTelemetryRows,
deletedEndedSessions,
};
}
export function pruneRollupRetention(
db: DatabaseSync,
nowMs: number,
policy: {
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
},
): { deletedDailyRows: number; deletedMonthlyRows: number } {
const deletedDailyRows = Number.isFinite(policy.dailyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor((nowMs - policy.dailyRollupRetentionMs) / DAILY_MS)) as {
changes: number;
}
).changes
: 0;
const deletedMonthlyRows = Number.isFinite(policy.monthlyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(nowMs - policy.monthlyRollupRetentionMs)) as {
changes: number;
}
).changes
: 0;
return {
deletedDailyRows,
deletedMonthlyRows,
deletedEndedSessions,
};
}
@@ -108,49 +125,57 @@ function upsertDailyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, cards_per_hour,
tokens_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS cards_per_hour,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN COALESCE(SUM(sm.max_tokens), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS words_per_min,
END AS tokens_per_min,
CASE
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
ELSE NULL
END AS lookup_hit_rate,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards,
MAX(t.lookup_count) AS max_lookups,
MAX(t.lookup_hits) AS max_hits
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_day, s.video_id
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
cards_per_hour = excluded.cards_per_hour,
words_per_min = excluded.words_per_min,
tokens_per_min = excluded.tokens_per_min,
lookup_hit_rate = excluded.lookup_hit_rate,
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
@@ -173,29 +198,35 @@ function upsertMonthlyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_month, s.video_id
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
@@ -216,8 +247,8 @@ function getAffectedRollupGroups(
.prepare(
`
SELECT DISTINCT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id
FROM imm_session_telemetry t
JOIN imm_sessions s
@@ -292,3 +323,7 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
throw error;
}
}
export function runOptimizeMaintenance(db: DatabaseSync): void {
db.exec('PRAGMA optimize');
}

View File

@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
import test from 'node:test';
import type { spawn as spawnFn } from 'node:child_process';
import { SOURCE_TYPE_LOCAL } from './types';
import { getLocalVideoMetadata, runFfprobe } from './metadata';
import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';
type Spawn = typeof spawnFn;
@@ -146,3 +146,83 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
assert.equal(hashFallbackMetadata.hashSha256, null);
});
test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
const seenTargets: string[] = [];
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async (target) => {
seenTargets.push(target);
return JSON.stringify({
title: 'Little Witch Academia',
season: 2,
episode: 5,
});
},
},
);
assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: 'Little Witch Academia S02E05.mkv',
source: 'guessit',
}),
});
});
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
confidence: 'high',
filename: 'Little Witch Academia S02E05.mkv',
rawTitle: 'Little Witch Academia S02E05',
source: 'fallback',
}),
});
});
test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
const parsed = await guessAnimeVideoMetadata('/tmp/[SubsPlease] Frieren - 03 (1080p).mkv', null, {
runGuessit: async () => JSON.stringify({ episode: 3 }),
});
assert.deepEqual(parsed, {
parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
parsedTitle: 'Frieren - 03 (1080p)',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: JSON.stringify({
confidence: 'low',
filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
rawTitle: 'Frieren - 03 (1080p)',
source: 'fallback',
}),
});
});

View File

@@ -1,6 +1,13 @@
import crypto from 'node:crypto';
import { spawn as nodeSpawn } from 'node:child_process';
import * as fs from 'node:fs';
import path from 'node:path';
import { parseMediaInfo } from '../../../jimaku/utils';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from '../anilist/anilist-updater';
import {
deriveCanonicalTitle,
emptyMetadata,
@@ -8,7 +15,12 @@ import {
parseFps,
toNullableInt,
} from './reducer';
import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
import {
SOURCE_TYPE_LOCAL,
type ParsedAnimeVideoGuess,
type ProbeMetadata,
type VideoMetadata,
} from './types';
type SpawnFn = typeof nodeSpawn;
@@ -24,6 +36,21 @@ interface MetadataDeps {
fs?: FsDeps;
}
interface GuessAnimeVideoMetadataDeps {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
switch (confidence) {
case 'high':
return 1;
case 'medium':
return 0.6;
default:
return 0.2;
}
}
export async function computeSha256(
mediaPath: string,
deps: MetadataDeps = {},
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
metadataJson: null,
};
}
export async function guessAnimeVideoMetadata(
mediaPath: string | null,
mediaTitle: string | null,
deps: GuessAnimeVideoMetadataDeps = {},
): Promise<ParsedAnimeVideoGuess | null> {
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
runGuessit: deps.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
if (parsed.source === 'guessit') {
return {
parsedBasename,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: parsedBasename,
source: 'guessit',
}),
};
}
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
return {
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'fallback',
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
parseMetadataJson: JSON.stringify({
confidence: fallbackInfo.confidence,
filename: fallbackInfo.filename,
rawTitle: fallbackInfo.rawTitle,
source: 'fallback',
}),
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -15,11 +15,11 @@ export function createInitialSessionState(
totalWatchedMs: 0,
activeWatchedMs: 0,
linesSeen: 0,
wordsSeen: 0,
tokensSeen: 0,
cardsMined: 0,
lookupCount: 0,
lookupHits: 0,
yomitanLookupCount: 0,
pauseCount: 0,
pauseMs: 0,
seekForwardCount: 0,
@@ -30,6 +30,7 @@ export function createInitialSessionState(
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: true,
markedWatched: false,
};
}
@@ -50,16 +51,6 @@ export function sanitizePayload(payload: Record<string, unknown>, maxPayloadByte
return json.length <= maxPayloadBytes ? json : JSON.stringify({ truncated: true });
}
export function calculateTextMetrics(value: string): {
words: number;
tokens: number;
} {
const words = value.split(/\s+/).filter(Boolean).length;
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
const tokens = Math.max(words, cjkCount);
return { words, tokens };
}
export function secToMs(seconds: number): number {
const coerced = Number(seconds);
if (!Number.isFinite(coerced)) return 0;

View File

@@ -39,8 +39,41 @@ export function finalizeSessionRecord(
SET
ended_at_ms = ?,
status = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
tokens_seen = ?,
cards_mined = ?,
lookup_count = ?,
lookup_hits = ?,
yomitan_lookup_count = ?,
pause_count = ?,
pause_ms = ?,
seek_forward_count = ?,
seek_backward_count = ?,
media_buffer_events = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
).run(
endedAtMs,
SESSION_STATUS_ENDED,
sessionState.lastMediaMs,
sessionState.totalWatchedMs,
sessionState.activeWatchedMs,
sessionState.linesSeen,
sessionState.tokensSeen,
sessionState.cardsMined,
sessionState.lookupCount,
sessionState.lookupHits,
sessionState.yomitanLookupCount,
sessionState.pauseCount,
sessionState.pauseMs,
sessionState.seekForwardCount,
sessionState.seekBackwardCount,
sessionState.mediaBufferEvents,
Date.now(),
sessionState.sessionId,
);
}

View File

@@ -6,10 +6,15 @@ import test from 'node:test';
import { Database } from './sqlite';
import { finalizeSessionRecord, startSessionRecord } from './session';
import {
applyPragmas,
createTrackerPreparedStatements,
ensureSchema,
executeQueuedWrite,
normalizeCoverBlobBytes,
parseCoverBlobReference,
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
} from './storage';
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
@@ -46,6 +51,34 @@ function cleanupDbPath(dbPath: string): void {
// libsql keeps Windows file handles alive after close when prepared statements were used.
}
test('applyPragmas sets the SQLite tuning defaults used by immersion tracking', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
applyPragmas(db);
const journalModeRow = db.prepare('PRAGMA journal_mode').get() as {
journal_mode: string;
};
const synchronousRow = db.prepare('PRAGMA synchronous').get() as { synchronous: number };
const foreignKeysRow = db.prepare('PRAGMA foreign_keys').get() as { foreign_keys: number };
const busyTimeoutRow = db.prepare('PRAGMA busy_timeout').get() as { timeout: number };
const journalSizeLimitRow = db.prepare('PRAGMA journal_size_limit').get() as {
journal_size_limit: number;
};
assert.equal(journalModeRow.journal_mode, 'wal');
assert.equal(synchronousRow.synchronous, 1);
assert.equal(foreignKeysRow.foreign_keys, 1);
assert.equal(busyTimeoutRow.timeout, 2500);
assert.equal(journalSizeLimitRow.journal_size_limit, 67_108_864);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema creates immersion core tables', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -60,6 +93,7 @@ test('ensureSchema creates immersion core tables', () => {
const tableNames = new Set(rows.map((row) => row.name));
assert.ok(tableNames.has('imm_videos'));
assert.ok(tableNames.has('imm_anime'));
assert.ok(tableNames.has('imm_sessions'));
assert.ok(tableNames.has('imm_session_telemetry'));
assert.ok(tableNames.has('imm_session_events'));
@@ -67,7 +101,37 @@ test('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_monthly_rollups'));
assert.ok(tableNames.has('imm_words'));
assert.ok(tableNames.has('imm_kanji'));
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
assert.ok(tableNames.has('imm_rollup_state'));
assert.ok(tableNames.has('imm_cover_art_blobs'));
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const mediaArtColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_media_art)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(mediaArtColumns.has('cover_blob_hash'));
const rollupStateRow = db
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
@@ -82,6 +146,566 @@ test('ensureSchema creates immersion core tables', () => {
}
});
test('ensureSchema creates large-history performance indexes', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'index' AND name LIKE 'idx_%'`)
.all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(indexNames.has('idx_telemetry_sample_ms'));
assert.ok(indexNames.has('idx_sessions_started_at'));
assert.ok(indexNames.has('idx_sessions_ended_at'));
assert.ok(indexNames.has('idx_words_frequency'));
assert.ok(indexNames.has('idx_kanji_frequency'));
assert.ok(indexNames.has('idx_media_art_anilist_id'));
assert.ok(indexNames.has('idx_media_art_cover_url'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
`);
const insertLegacyVideo = db.prepare(`
INSERT INTO imm_videos (
video_key, canonical_title, source_type, source_path, source_url,
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E05.mkv',
'Episode 5',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E05.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E06.mkv',
'Episode 6',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E06.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
'Episode 3',
SOURCE_TYPE_LOCAL,
'/library/[SubsPlease] Frieren - 03 - Departure.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
ensureSchema(db);
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const animeRows = db
.prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
.all() as Array<{ canonical_title: string }>;
assert.deepEqual(
animeRows.map((row) => row.canonical_title),
['Frieren', 'Little Witch Academia'],
);
const littleWitchRows = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_basename,
v.parsed_season,
v.parsed_episode,
v.parser_source,
v.parser_confidence
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
ORDER BY v.video_key
`,
)
.all() as Array<{
anime_title: string;
parsed_title: string | null;
parsed_basename: string | null;
parsed_season: number | null;
parsed_episode: number | null;
parser_source: string | null;
parser_confidence: number | null;
}>;
assert.equal(littleWitchRows.length, 2);
assert.deepEqual(
littleWitchRows.map((row) => ({
animeTitle: row.anime_title,
parsedTitle: row.parsed_title,
parsedBasename: row.parsed_basename,
parsedSeason: row.parsed_season,
parsedEpisode: row.parsed_episode,
parserSource: row.parser_source,
})),
[
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
},
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
},
],
);
assert.ok(
littleWitchRows.every(
(row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
),
);
const frierenRow = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_episode,
v.parser_source
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key = ?
`,
)
.get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
anime_title: string;
parsed_title: string | null;
parsed_episode: number | null;
parser_source: string | null;
} | null;
assert.ok(frierenRow);
assert.equal(frierenRow?.anime_title, 'Frieren');
assert.equal(frierenRow?.parsed_title, 'Frieren');
assert.equal(frierenRow?.parsed_episode, 3);
assert.equal(frierenRow?.parser_source, 'fallback');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
anime_id INTEGER,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
parsed_basename TEXT,
parsed_title TEXT,
parsed_season INTEGER,
parsed_episode INTEGER,
parser_source TEXT,
parser_confidence REAL,
parse_metadata_json TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_sessions(
session_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_uuid TEXT NOT NULL UNIQUE,
video_id INTEGER NOT NULL,
started_at_ms INTEGER NOT NULL,
ended_at_ms INTEGER,
status INTEGER NOT NULL,
locale_id INTEGER,
target_lang_id INTEGER,
difficulty_tier INTEGER,
subtitle_mode INTEGER,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_session_events(
event_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id INTEGER NOT NULL,
ts_ms INTEGER NOT NULL,
event_type INTEGER NOT NULL,
line_index INTEGER,
segment_start_ms INTEGER,
segment_end_ms INTEGER,
words_delta INTEGER NOT NULL DEFAULT 0,
cards_delta INTEGER NOT NULL DEFAULT 0,
payload_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_words(
id INTEGER PRIMARY KEY AUTOINCREMENT,
headword TEXT,
word TEXT,
reading TEXT,
part_of_speech TEXT,
pos1 TEXT,
pos2 TEXT,
pos3 TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(headword, word, reading)
);
CREATE TABLE imm_kanji(
id INTEGER PRIMARY KEY AUTOINCREMENT,
kanji TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(kanji)
);
CREATE TABLE imm_rollup_state(
state_key TEXT PRIMARY KEY,
state_value INTEGER NOT NULL
);
`);
ensureSchema(db);
const tableNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`)
.all() as Array<{ name: string }>
).map((row) => row.name),
);
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy cover art blobs into the shared blob store', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
db.prepare('UPDATE imm_schema_version SET schema_version = 12').run();
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/legacy-cover-art.mkv', {
canonicalTitle: 'Legacy Cover Art',
sourcePath: '/tmp/legacy-cover-art.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const legacyBlob = Uint8Array.from([0xde, 0xad, 0xbe, 0xef]);
db.prepare(
`
INSERT INTO imm_media_art (
video_id,
anilist_id,
cover_url,
cover_blob,
cover_blob_hash,
title_romaji,
title_english,
episodes_total,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(videoId, null, null, legacyBlob, null, null, null, null, 1, 1, 1);
assert.doesNotThrow(() => ensureSchema(db));
const mediaArtRow = db
.prepare(
'SELECT cover_blob AS coverBlob, cover_blob_hash AS coverBlobHash FROM imm_media_art',
)
.get() as {
coverBlob: ArrayBuffer | Uint8Array | Buffer | null;
coverBlobHash: string | null;
} | null;
assert.ok(mediaArtRow);
assert.ok(mediaArtRow?.coverBlobHash);
assert.equal(
parseCoverBlobReference(normalizeCoverBlobBytes(mediaArtRow?.coverBlob)),
mediaArtRow?.coverBlobHash,
);
const sharedBlobRow = db
.prepare('SELECT cover_blob AS coverBlob FROM imm_cover_art_blobs WHERE blob_hash = ?')
.get(mediaArtRow?.coverBlobHash) as {
coverBlob: ArrayBuffer | Uint8Array | Buffer;
} | null;
assert.ok(sharedBlobRow);
assert.equal(normalizeCoverBlobBytes(sharedBlobRow?.coverBlob)?.toString('hex'), 'deadbeef');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
canonicalTitle: 'Episode 5',
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
canonicalTitle: 'Episode 6',
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const provisionalAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, firstVideoId, {
animeId: provisionalAnimeId,
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":5}',
});
const reusedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: ' little witch academia ',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, secondVideoId, {
animeId: reusedAnimeId,
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":6}',
});
assert.equal(reusedAnimeId, provisionalAnimeId);
const upgradedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia TV',
anilistId: 33_435,
titleRomaji: 'Little Witch Academia',
titleEnglish: 'Little Witch Academia',
titleNative: 'リトルウィッチアカデミア',
metadataJson: '{"source":"anilist"}',
});
assert.equal(upgradedAnimeId, provisionalAnimeId);
const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
anime_id: number;
normalized_title_key: string;
canonical_title: string;
anilist_id: number | null;
title_romaji: string | null;
title_english: string | null;
title_native: string | null;
metadata_json: string | null;
}>;
assert.equal(animeRows.length, 1);
assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
assert.equal(animeRows[0]?.anilist_id, 33_435);
assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
const linkedVideos = db
.prepare(
`
SELECT anime_id, parsed_title, parsed_season, parsed_episode
FROM imm_videos
WHERE video_id IN (?, ?)
ORDER BY video_id
`,
)
.all(firstVideoId, secondVideoId) as Array<{
anime_id: number | null;
parsed_title: string | null;
parsed_season: number | null;
parsed_episode: number | null;
}>;
assert.deepEqual(linkedVideos, [
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 5,
},
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 6,
},
]);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('start/finalize session updates ended_at and status', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -116,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
}
});
test('finalize session persists ended media position', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
canonicalTitle: 'Slice A Ended Media',
sourcePath: '/tmp/slice-a-ended-media.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const startedAtMs = 1_234_567_000;
const endedAtMs = startedAtMs + 8_500;
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
state.lastMediaMs = 91_000;
finalizeSessionRecord(db, state, endedAtMs);
const row = db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as {
ended_media_ms: number | null;
} | null;
assert.ok(row);
assert.equal(row?.ended_media_ms, 91_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('executeQueuedWrite inserts event and telemetry rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -139,11 +796,11 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
totalWatchedMs: 1_000,
activeWatchedMs: 900,
linesSeen: 3,
wordsSeen: 6,
tokensSeen: 6,
cardsMined: 1,
lookupCount: 2,
lookupHits: 1,
yomitanLookupCount: 0,
pauseCount: 1,
pauseMs: 50,
seekForwardCount: 0,
@@ -161,7 +818,7 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
lineIndex: 1,
segmentStartMs: 0,
segmentEndMs: 800,
wordsDelta: 2,
tokensDelta: 2,
cardsDelta: 0,
payloadJson: '{"event":"subtitle-line"}',
},
@@ -191,18 +848,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
const wordRow = db
.prepare(
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
`SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
FROM imm_words WHERE headword = ?`,
)
.get('猫') as {
headword: string;
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
first_seen: number;
last_seen: number;
} | null;
@@ -218,6 +879,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
assert.ok(wordRow);
assert.ok(kanjiRow);
assert.equal(wordRow?.frequency, 2);
assert.equal(wordRow?.part_of_speech, 'noun');
assert.equal(wordRow?.pos1, '名詞');
assert.equal(wordRow?.pos2, '一般');
assert.equal(kanjiRow?.frequency, 2);
assert.equal(wordRow?.first_seen, 5);
assert.equal(wordRow?.last_seen, 15);
@@ -228,3 +892,54 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
cleanupDbPath(dbPath);
}
});
test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'other',
'動詞',
'自立',
'',
10,
10,
);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'verb',
'動詞',
'自立',
'',
11,
12,
);
const row = db
.prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
.get('知っている') as {
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
} | null;
assert.ok(row);
assert.equal(row?.frequency, 2);
assert.equal(row?.part_of_speech, 'verb');
assert.equal(row?.pos1, '動詞');
assert.equal(row?.pos2, '自立');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 3;
export const SCHEMA_VERSION = 15;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -7,6 +7,7 @@ const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
export const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
export const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
export const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_SESSIONS_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MAX_PAYLOAD_BYTES = 256;
@@ -25,10 +26,14 @@ export const EVENT_SEEK_FORWARD = 5;
export const EVENT_SEEK_BACKWARD = 6;
export const EVENT_PAUSE_START = 7;
export const EVENT_PAUSE_END = 8;
export const EVENT_YOMITAN_LOOKUP = 9;
export interface ImmersionTrackerOptions {
dbPath: string;
policy?: ImmersionTrackerPolicy;
resolveLegacyVocabularyPos?: (
row: LegacyVocabularyPosRow,
) => Promise<LegacyVocabularyPosResolution | null>;
}
export interface ImmersionTrackerPolicy {
@@ -40,6 +45,7 @@ export interface ImmersionTrackerPolicy {
retention?: {
eventsDays?: number;
telemetryDays?: number;
sessionsDays?: number;
dailyRollupsDays?: number;
monthlyRollupsDays?: number;
vacuumIntervalDays?: number;
@@ -50,11 +56,11 @@ export interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
@@ -72,20 +78,22 @@ export interface SessionState extends TelemetryAccumulator {
lastPauseStartMs: number | null;
isPaused: boolean;
pendingTelemetry: boolean;
markedWatched: boolean;
}
interface QueuedTelemetryWrite {
kind: 'telemetry';
sessionId: number;
sampleMs?: number;
lastMediaMs?: number | null;
totalWatchedMs?: number;
activeWatchedMs?: number;
linesSeen?: number;
wordsSeen?: number;
tokensSeen?: number;
cardsMined?: number;
lookupCount?: number;
lookupHits?: number;
yomitanLookupCount?: number;
pauseCount?: number;
pauseMs?: number;
seekForwardCount?: number;
@@ -95,7 +103,7 @@ interface QueuedTelemetryWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -108,7 +116,7 @@ interface QueuedEventWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -118,8 +126,13 @@ interface QueuedWordWrite {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
firstSeen: number;
lastSeen: number;
frequencyRank: number | null;
}
interface QueuedKanjiWrite {
@@ -129,11 +142,44 @@ interface QueuedKanjiWrite {
lastSeen: number;
}
export interface CountedWordOccurrence {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
occurrenceCount: number;
frequencyRank: number | null;
}
export interface CountedKanjiOccurrence {
kanji: string;
occurrenceCount: number;
}
interface QueuedSubtitleLineWrite {
kind: 'subtitleLine';
sessionId: number;
videoId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
secondaryText?: string | null;
wordOccurrences: CountedWordOccurrence[];
kanjiOccurrences: CountedKanjiOccurrence[];
firstSeen: number;
lastSeen: number;
}
export type QueuedWrite =
| QueuedTelemetryWrite
| QueuedEventWrite
| QueuedWordWrite
| QueuedKanjiWrite;
| QueuedKanjiWrite
| QueuedSubtitleLineWrite;
export interface VideoMetadata {
sourceType: number;
@@ -152,18 +198,173 @@ export interface VideoMetadata {
metadataJson: string | null;
}
export interface ParsedAnimeVideoMetadata {
animeId: number | null;
parsedBasename: string | null;
parsedTitle: string | null;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: string | null;
parserConfidence: number | null;
parseMetadataJson: string | null;
}
export interface ParsedAnimeVideoGuess {
parsedBasename: string | null;
parsedTitle: string;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: 'guessit' | 'fallback';
parserConfidence: number;
parseMetadataJson: string;
}
export interface SessionSummaryQueryRow {
sessionId: number;
videoId: number | null;
canonicalTitle: string | null;
animeId: number | null;
animeTitle: string | null;
startedAtMs: number;
endedAtMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
knownWordsSeen?: number;
knownWordRate?: number;
}
export interface LifetimeGlobalRow {
totalSessions: number;
totalActiveMs: number;
totalCards: number;
activeDays: number;
episodesStarted: number;
episodesCompleted: number;
animeCompleted: number;
lastRebuiltMs: number | null;
}
export interface LifetimeAnimeRow {
animeId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
episodesStarted: number;
episodesCompleted: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface LifetimeMediaRow {
videoId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
completed: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface AppliedSessionRow {
sessionId: number;
appliedAtMs: number;
}
export interface LifetimeRebuildSummary {
appliedSessions: number;
rebuiltAtMs: number;
}
export interface VocabularyStatsRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
frequencyRank: number | null;
animeCount: number;
firstSeen: number;
lastSeen: number;
}
export interface VocabularyCleanupSummary {
scanned: number;
kept: number;
deleted: number;
repaired: number;
}
export interface LegacyVocabularyPosRow {
headword: string;
word: string;
reading: string | null;
}
export interface LegacyVocabularyPosResolution {
headword: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
}
export interface KanjiStatsRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface KanjiOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface SessionEventRow {
eventType: number;
tsMs: number;
payload: string | null;
}
export interface SessionTimelineRow {
@@ -171,7 +372,6 @@ export interface SessionTimelineRow {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
}
@@ -182,11 +382,10 @@ export interface ImmersionSessionRollupRow {
totalSessions: number;
totalActiveMin: number;
totalLinesSeen: number;
totalWordsSeen: number;
totalTokensSeen: number;
totalCards: number;
cardsPerHour: number | null;
wordsPerMin: number | null;
tokensPerMin: number | null;
lookupHitRate: number | null;
}
@@ -200,3 +399,186 @@ export interface ProbeMetadata {
bitrateKbps: number | null;
audioCodecId: number | null;
}
export interface MediaArtRow {
videoId: number;
anilistId: number | null;
coverUrl: string | null;
coverBlob: Buffer | null;
titleRomaji: string | null;
titleEnglish: string | null;
episodesTotal: number | null;
fetchedAtMs: number;
}
export interface MediaLibraryRow {
videoId: number;
canonicalTitle: string;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
lastWatchedMs: number;
hasCoverArt: number;
}
export interface MediaDetailRow {
videoId: number;
canonicalTitle: string;
animeId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
}
export interface AnimeLibraryRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
episodeCount: number;
episodesTotal: number | null;
lastWatchedMs: number;
}
export interface AnimeDetailRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
titleRomaji: string | null;
titleEnglish: string | null;
titleNative: string | null;
description: string | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
episodeCount: number;
lastWatchedMs: number;
}
export interface AnimeAnilistEntryRow {
anilistId: number;
titleRomaji: string | null;
titleEnglish: string | null;
season: number | null;
}
export interface AnimeEpisodeRow {
animeId: number;
videoId: number;
canonicalTitle: string;
parsedTitle: string | null;
season: number | null;
episode: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalYomitanLookupCount: number;
lastWatchedMs: number;
}
export interface StreakCalendarRow {
epochDay: number;
totalActiveMin: number;
}
export interface AnimeWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
frequency: number;
}
export interface EpisodesPerDayRow {
epochDay: number;
episodeCount: number;
}
export interface NewAnimePerDayRow {
epochDay: number;
newAnimeCount: number;
}
export interface WatchTimePerAnimeRow {
epochDay: number;
animeId: number;
animeTitle: string;
totalActiveMin: number;
}
export interface WordDetailRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface SimilarWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface KanjiDetailRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface KanjiAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface KanjiWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface EpisodeCardEventRow {
eventId: number;
sessionId: number;
tsMs: number;
cardsDelta: number;
noteIds: number[];
}

View File

@@ -29,7 +29,10 @@ export {
} from './startup';
export { openYomitanSettingsWindow } from './yomitan-settings';
export { createTokenizerDepsRuntime, tokenizeSubtitle } from './tokenizer';
export { clearYomitanParserCachesForWindow } from './tokenizer/yomitan-parser-runtime';
export {
addYomitanNoteViaSearch,
clearYomitanParserCachesForWindow,
} from './tokenizer/yomitan-parser-runtime';
export {
deleteYomitanDictionaryByTitle,
getYomitanDictionaryInfo,

View File

@@ -1,7 +1,7 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { createIpcDepsRuntime, registerIpcHandlers } from './ipc';
import { createIpcDepsRuntime, registerIpcHandlers, type IpcServiceDeps } from './ipc';
import { IPC_CHANNELS } from '../../shared/ipc/contracts';
interface FakeIpcRegistrar {
@@ -77,6 +77,90 @@ function createControllerConfigFixture() {
};
}
function createRegisterIpcDeps(overrides: Partial<IpcServiceDeps> = {}): IpcServiceDeps {
return {
onOverlayModalClosed: () => {},
openYomitanSettings: () => {},
quitApp: () => {},
toggleDevTools: () => {},
getVisibleOverlayVisibility: () => false,
toggleVisibleOverlay: () => {},
tokenizeCurrentSubtitle: async () => null,
getCurrentSubtitleRaw: () => '',
getCurrentSubtitleAss: () => '',
getPlaybackPaused: () => false,
getSubtitlePosition: () => null,
getSubtitleStyle: () => null,
saveSubtitlePosition: () => {},
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
setMecabEnabled: () => {},
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async () => {},
getSecondarySubMode: () => 'hover',
getCurrentSecondarySub: () => '',
focusMainWindow: () => {},
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
getAnkiConnectStatus: () => false,
getRuntimeOptions: () => [],
setRuntimeOption: () => ({ ok: true }),
cycleRuntimeOption: () => ({ ok: true }),
reportOverlayContentBounds: () => {},
getAnilistStatus: () => ({}),
clearAnilistToken: () => {},
openAnilistSetup: () => {},
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
immersionTracker: null,
...overrides,
};
}
function createFakeImmersionTracker(
overrides: Partial<NonNullable<IpcServiceDeps['immersionTracker']>> = {},
): NonNullable<IpcServiceDeps['immersionTracker']> {
return {
recordYomitanLookup: () => {},
getSessionSummaries: async () => [],
getDailyRollups: async () => [],
getMonthlyRollups: async () => [],
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalActiveMin: 0,
totalCards: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async () => [],
getSessionEvents: async () => [],
getVocabularyStats: async () => [],
getKanjiStats: async () => [],
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
...overrides,
};
}
test('createIpcDepsRuntime wires AniList handlers', async () => {
const calls: string[] = [];
const deps = createIpcDepsRuntime({
@@ -97,6 +181,8 @@ test('createIpcDepsRuntime wires AniList handlers', async () => {
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: () => {},
saveControllerPreference: () => {},
@@ -164,6 +250,8 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: () => {},
saveControllerPreference: () => {},
@@ -232,6 +320,194 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
);
});
test('registerIpcHandlers forwards yomitan lookup tracking commands to immersion tracker', () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: string[] = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: createFakeImmersionTracker({
recordYomitanLookup: () => {
calls.push('lookup');
},
}),
}),
registrar,
);
const handler = handlers.on.get(IPC_CHANNELS.command.recordYomitanLookup);
assert.equal(typeof handler, 'function');
handler?.({}, null);
assert.deepEqual(calls, ['lookup']);
});
test('registerIpcHandlers returns empty stats overview shape without a tracker', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
registerIpcHandlers(createRegisterIpcDeps(), registrar);
const overviewHandler = handlers.handle.get(IPC_CHANNELS.request.statsGetOverview);
assert.ok(overviewHandler);
assert.deepEqual(await overviewHandler!({}), {
sessions: [],
rollups: [],
hints: {
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
},
});
});
test('registerIpcHandlers validates and clamps stats request limits', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: Array<[string, number, number?]> = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: {
recordYomitanLookup: () => {},
getSessionSummaries: async (limit = 0) => {
calls.push(['sessions', limit]);
return [];
},
getDailyRollups: async (limit = 0) => {
calls.push(['daily', limit]);
return [];
},
getMonthlyRollups: async (limit = 0) => {
calls.push(['monthly', limit]);
return [];
},
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async (sessionId: number, limit = 0) => {
calls.push(['timeline', limit, sessionId]);
return [];
},
getSessionEvents: async (sessionId: number, limit = 0) => {
calls.push(['events', limit, sessionId]);
return [];
},
getVocabularyStats: async (limit = 0) => {
calls.push(['vocabulary', limit]);
return [];
},
getKanjiStats: async (limit = 0) => {
calls.push(['kanji', limit]);
return [];
},
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
},
}),
registrar,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetDailyRollups)!({}, -1);
await handlers.handle.get(IPC_CHANNELS.request.statsGetMonthlyRollups)!(
{},
Number.POSITIVE_INFINITY,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessions)!({}, 9999);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, 12.5);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionEvents)!({}, 7, 0);
await handlers.handle.get(IPC_CHANNELS.request.statsGetVocabulary)!({}, 1000);
await handlers.handle.get(IPC_CHANNELS.request.statsGetKanji)!({}, NaN);
assert.deepEqual(calls, [
['daily', 60],
['monthly', 24],
['sessions', 500],
['timeline', 200, 7],
['events', 500, 7],
['vocabulary', 500],
['kanji', 100],
]);
});
test('registerIpcHandlers requests the full timeline when no limit is provided', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: Array<[string, number | undefined, number]> = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: {
recordYomitanLookup: () => {},
getSessionSummaries: async () => [],
getDailyRollups: async () => [],
getMonthlyRollups: async () => [],
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async (sessionId: number, limit?: number) => {
calls.push(['timeline', limit, sessionId]);
return [];
},
getSessionEvents: async () => [],
getVocabularyStats: async () => [],
getKanjiStats: async () => [],
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
},
}),
registrar,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, undefined);
assert.deepEqual(calls, [['timeline', undefined, 7]]);
});
test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const saves: unknown[] = [];
@@ -265,10 +541,10 @@ test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: (update) => {
controllerSaves.push(update);
},
saveControllerConfig: () => {},
saveControllerPreference: (update) => {
controllerSaves.push(update);
},
@@ -329,6 +605,8 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async (update) => {
@@ -376,85 +654,6 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
]);
});
test('registerIpcHandlers awaits saveControllerConfig through request-response IPC', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const controllerConfigSaves: unknown[] = [];
registerIpcHandlers(
{
onOverlayModalClosed: () => {},
openYomitanSettings: () => {},
quitApp: () => {},
toggleDevTools: () => {},
getVisibleOverlayVisibility: () => false,
toggleVisibleOverlay: () => {},
tokenizeCurrentSubtitle: async () => null,
getCurrentSubtitleRaw: () => '',
getCurrentSubtitleAss: () => '',
getPlaybackPaused: () => false,
getSubtitlePosition: () => null,
getSubtitleStyle: () => null,
saveSubtitlePosition: () => {},
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
setMecabEnabled: () => {},
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async (update) => {
await Promise.resolve();
controllerConfigSaves.push(update);
},
saveControllerPreference: async () => {},
getSecondarySubMode: () => 'hover',
getCurrentSecondarySub: () => '',
focusMainWindow: () => {},
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
getAnkiConnectStatus: () => false,
getRuntimeOptions: () => [],
setRuntimeOption: () => ({ ok: true }),
cycleRuntimeOption: () => ({ ok: true }),
reportOverlayContentBounds: () => {},
getAnilistStatus: () => ({}),
clearAnilistToken: () => {},
openAnilistSetup: () => {},
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
},
registrar,
);
const saveHandler = handlers.handle.get(IPC_CHANNELS.command.saveControllerConfig);
assert.ok(saveHandler);
await assert.rejects(
async () => {
await saveHandler!({}, { bindings: { toggleLookup: { kind: 'button', buttonIndex: -1 } } });
},
/Invalid controller config payload/,
);
await saveHandler!({}, {
preferredGamepadId: 'pad-2',
bindings: {
toggleLookup: { kind: 'button', buttonIndex: 11 },
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
},
});
assert.deepEqual(controllerConfigSaves, [
{
preferredGamepadId: 'pad-2',
bindings: {
toggleLookup: { kind: 'button', buttonIndex: 11 },
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
},
},
]);
});
test('registerIpcHandlers rejects malformed controller preference payloads', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
registerIpcHandlers(
@@ -477,6 +676,8 @@ test('registerIpcHandlers rejects malformed controller preference payloads', asy
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async () => {},

View File

@@ -50,6 +50,8 @@ export interface IpcServiceDeps {
handleMpvCommand: (command: Array<string | number>) => void;
getKeybindings: () => unknown;
getConfiguredShortcuts: () => unknown;
getStatsToggleKey: () => string;
getMarkWatchedKey: () => string;
getControllerConfig: () => ResolvedControllerConfig;
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
@@ -68,6 +70,39 @@ export interface IpcServiceDeps {
getAnilistQueueStatus: () => unknown;
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
immersionTracker?: {
recordYomitanLookup: () => void;
getSessionSummaries: (limit?: number) => Promise<unknown>;
getDailyRollups: (limit?: number) => Promise<unknown>;
getMonthlyRollups: (limit?: number) => Promise<unknown>;
getQueryHints: () => Promise<{
totalSessions: number;
activeSessions: number;
episodesToday: number;
activeAnimeCount: number;
totalActiveMin: number;
totalCards: number;
activeDays: number;
totalEpisodesWatched: number;
totalAnimeCompleted: number;
totalTokensSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
newWordsToday: number;
newWordsThisWeek: number;
}>;
getSessionTimeline: (sessionId: number, limit?: number) => Promise<unknown>;
getSessionEvents: (sessionId: number, limit?: number) => Promise<unknown>;
getVocabularyStats: (limit?: number) => Promise<unknown>;
getKanjiStats: (limit?: number) => Promise<unknown>;
getMediaLibrary: () => Promise<unknown>;
getMediaDetail: (videoId: number) => Promise<unknown>;
getMediaSessions: (videoId: number, limit?: number) => Promise<unknown>;
getMediaDailyRollups: (videoId: number, limit?: number) => Promise<unknown>;
getCoverArt: (videoId: number) => Promise<unknown>;
markActiveVideoWatched: () => Promise<boolean>;
} | null;
}
interface WindowLike {
@@ -116,6 +151,8 @@ export interface IpcDepsRuntimeOptions {
handleMpvCommand: (command: Array<string | number>) => void;
getKeybindings: () => unknown;
getConfiguredShortcuts: () => unknown;
getStatsToggleKey: () => string;
getMarkWatchedKey: () => string;
getControllerConfig: () => ResolvedControllerConfig;
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
@@ -134,6 +171,7 @@ export interface IpcDepsRuntimeOptions {
getAnilistQueueStatus: () => unknown;
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
getImmersionTracker?: () => IpcServiceDeps['immersionTracker'];
}
export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcServiceDeps {
@@ -170,6 +208,8 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
handleMpvCommand: options.handleMpvCommand,
getKeybindings: options.getKeybindings,
getConfiguredShortcuts: options.getConfiguredShortcuts,
getStatsToggleKey: options.getStatsToggleKey,
getMarkWatchedKey: options.getMarkWatchedKey,
getControllerConfig: options.getControllerConfig,
saveControllerConfig: options.saveControllerConfig,
saveControllerPreference: options.saveControllerPreference,
@@ -192,10 +232,31 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
getAnilistQueueStatus: options.getAnilistQueueStatus,
retryAnilistQueueNow: options.retryAnilistQueueNow,
appendClipboardVideoToQueue: options.appendClipboardVideoToQueue,
get immersionTracker() {
return options.getImmersionTracker?.() ?? null;
},
};
}
export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar = ipcMain): void {
const parsePositiveIntLimit = (
value: unknown,
defaultValue: number,
maxValue: number,
): number => {
if (!Number.isInteger(value) || (value as number) < 1) {
return defaultValue;
}
return Math.min(value as number, maxValue);
};
const parsePositiveInteger = (value: unknown): number | null => {
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
return null;
}
return value;
};
ipc.on(
IPC_CHANNELS.command.setIgnoreMouseEvents,
(event: unknown, ignore: unknown, options: unknown = {}) => {
@@ -224,6 +285,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
deps.openYomitanSettings();
});
ipc.on(IPC_CHANNELS.command.recordYomitanLookup, () => {
deps.immersionTracker?.recordYomitanLookup();
});
ipc.handle(IPC_CHANNELS.command.markActiveVideoWatched, async () => {
return (await deps.immersionTracker?.markActiveVideoWatched()) ?? false;
});
ipc.on(IPC_CHANNELS.command.quitApp, () => {
deps.quitApp();
});
@@ -312,6 +381,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
return deps.getConfiguredShortcuts();
});
ipc.handle(IPC_CHANNELS.request.getStatsToggleKey, () => {
return deps.getStatsToggleKey();
});
ipc.handle(IPC_CHANNELS.request.getMarkWatchedKey, () => {
return deps.getMarkWatchedKey();
});
ipc.handle(IPC_CHANNELS.request.getControllerConfig, () => {
return deps.getControllerConfig();
});
@@ -397,4 +474,115 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
ipc.handle(IPC_CHANNELS.request.appendClipboardVideoToQueue, () => {
return deps.appendClipboardVideoToQueue();
});
// Stats request handlers
ipc.handle(IPC_CHANNELS.request.statsGetOverview, async () => {
const tracker = deps.immersionTracker;
if (!tracker) {
return {
sessions: [],
rollups: [],
hints: {
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalActiveMin: 0,
totalCards: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
},
};
}
const [sessions, rollups, hints] = await Promise.all([
tracker.getSessionSummaries(5),
tracker.getDailyRollups(14),
tracker.getQueryHints(),
]);
return { sessions, rollups, hints };
});
ipc.handle(IPC_CHANNELS.request.statsGetDailyRollups, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 60, 500);
return deps.immersionTracker?.getDailyRollups(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMonthlyRollups, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 24, 120);
return deps.immersionTracker?.getMonthlyRollups(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetSessions, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 50, 500);
return deps.immersionTracker?.getSessionSummaries(parsedLimit) ?? [];
});
ipc.handle(
IPC_CHANNELS.request.statsGetSessionTimeline,
async (_event, sessionId: unknown, limit: unknown) => {
const parsedSessionId = parsePositiveInteger(sessionId);
if (parsedSessionId === null) return [];
const parsedLimit = limit === undefined ? undefined : parsePositiveIntLimit(limit, 200, 1000);
return deps.immersionTracker?.getSessionTimeline(parsedSessionId, parsedLimit) ?? [];
},
);
ipc.handle(
IPC_CHANNELS.request.statsGetSessionEvents,
async (_event, sessionId: unknown, limit: unknown) => {
const parsedSessionId = parsePositiveInteger(sessionId);
if (parsedSessionId === null) return [];
const parsedLimit = parsePositiveIntLimit(limit, 500, 1000);
return deps.immersionTracker?.getSessionEvents(parsedSessionId, parsedLimit) ?? [];
},
);
ipc.handle(IPC_CHANNELS.request.statsGetVocabulary, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getVocabularyStats(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetKanji, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getKanjiStats(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMediaLibrary, async () => {
return deps.immersionTracker?.getMediaLibrary() ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMediaDetail, async (_event, videoId: unknown) => {
if (typeof videoId !== 'number') return null;
return deps.immersionTracker?.getMediaDetail(videoId) ?? null;
});
ipc.handle(
IPC_CHANNELS.request.statsGetMediaSessions,
async (_event, videoId: unknown, limit: unknown) => {
if (typeof videoId !== 'number') return [];
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getMediaSessions(videoId, parsedLimit) ?? [];
},
);
ipc.handle(
IPC_CHANNELS.request.statsGetMediaDailyRollups,
async (_event, videoId: unknown, limit: unknown) => {
if (typeof videoId !== 'number') return [];
const parsedLimit = parsePositiveIntLimit(limit, 90, 500);
return deps.immersionTracker?.getMediaDailyRollups(videoId, parsedLimit) ?? [];
},
);
ipc.handle(IPC_CHANNELS.request.statsGetMediaCover, async (_event, videoId: unknown) => {
if (typeof videoId !== 'number') return null;
return deps.immersionTracker?.getCoverArt(videoId) ?? null;
});
}

View File

@@ -59,9 +59,12 @@ const MPV_SUBTITLE_PROPERTY_OBSERVATIONS: string[] = [
'sub-ass-override',
'sub-use-margins',
'pause',
'duration',
'media-title',
'secondary-sub-visibility',
'sub-visibility',
'sid',
'track-list',
];
const MPV_INITIAL_PROPERTY_REQUESTS: Array<MpvProtocolCommand> = [

View File

@@ -60,6 +60,8 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
emitSubtitleAssChange: (payload) => state.events.push(payload),
emitSubtitleTiming: (payload) => state.events.push(payload),
emitSecondarySubtitleChange: (payload) => state.events.push(payload),
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
getCurrentSubText: () => state.subText,
setCurrentSubText: (text) => {
state.subText = text;
@@ -87,6 +89,7 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
getPauseAtTime: () => null,
setPauseAtTime: () => {},
emitTimePosChange: () => {},
emitDurationChange: () => {},
emitPauseChange: () => {},
autoLoadSecondarySubTrack: () => {},
setCurrentVideoPath: () => {},
@@ -119,6 +122,21 @@ test('dispatchMpvProtocolMessage emits subtitle text on property change', async
assert.deepEqual(state.events, [{ text: '字幕', isOverlayVisible: false }]);
});
test('dispatchMpvProtocolMessage emits subtitle track changes', async () => {
const { deps, state } = createDeps({
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
});
await dispatchMpvProtocolMessage({ event: 'property-change', name: 'sid', data: '3' }, deps);
await dispatchMpvProtocolMessage(
{ event: 'property-change', name: 'track-list', data: [{ type: 'sub', id: 3 }] },
deps,
);
assert.deepEqual(state.events, [{ sid: 3 }, { trackList: [{ type: 'sub', id: 3 }] }]);
});
test('dispatchMpvProtocolMessage enforces sub-visibility hidden when overlay suppression is enabled', async () => {
const { deps, state } = createDeps({
isVisibleOverlayVisible: () => true,

View File

@@ -52,6 +52,8 @@ export interface MpvProtocolHandleMessageDeps {
emitSubtitleAssChange: (payload: { text: string }) => void;
emitSubtitleTiming: (payload: { text: string; start: number; end: number }) => void;
emitSecondarySubtitleChange: (payload: { text: string }) => void;
emitSubtitleTrackChange: (payload: { sid: number | null }) => void;
emitSubtitleTrackListChange: (payload: { trackList: unknown[] | null }) => void;
getCurrentSubText: () => string;
setCurrentSubText: (text: string) => void;
setCurrentSubStart: (value: number) => void;
@@ -61,6 +63,7 @@ export interface MpvProtocolHandleMessageDeps {
emitMediaPathChange: (payload: { path: string }) => void;
emitMediaTitleChange: (payload: { title: string | null }) => void;
emitTimePosChange: (payload: { time: number }) => void;
emitDurationChange: (payload: { duration: number }) => void;
emitPauseChange: (payload: { paused: boolean }) => void;
emitSubtitleMetricsChange: (payload: Partial<MpvSubtitleRenderMetrics>) => void;
setCurrentSecondarySubText: (text: string) => void;
@@ -159,6 +162,18 @@ export async function dispatchMpvProtocolMessage(
const nextSubText = (msg.data as string) || '';
deps.setCurrentSecondarySubText(nextSubText);
deps.emitSecondarySubtitleChange({ text: nextSubText });
} else if (msg.name === 'sid') {
const sid =
typeof msg.data === 'number'
? msg.data
: typeof msg.data === 'string'
? Number(msg.data)
: null;
deps.emitSubtitleTrackChange({ sid: sid !== null && Number.isFinite(sid) ? sid : null });
} else if (msg.name === 'track-list') {
deps.emitSubtitleTrackListChange({
trackList: Array.isArray(msg.data) ? (msg.data as unknown[]) : null,
});
} else if (msg.name === 'aid') {
deps.setCurrentAudioTrackId(typeof msg.data === 'number' ? (msg.data as number) : null);
deps.syncCurrentAudioStreamIndex();
@@ -172,6 +187,11 @@ export async function dispatchMpvProtocolMessage(
deps.setPauseAtTime(null);
deps.sendCommand({ command: ['set_property', 'pause', true] });
}
} else if (msg.name === 'duration') {
const duration = typeof msg.data === 'number' ? msg.data : 0;
if (duration > 0) {
deps.emitDurationChange({ duration });
}
} else if (msg.name === 'pause') {
deps.emitPauseChange({ paused: asBoolean(msg.data, false) });
} else if (msg.name === 'media-title') {

View File

@@ -115,8 +115,11 @@ export interface MpvIpcClientEventMap {
'subtitle-ass-change': { text: string };
'subtitle-timing': { text: string; start: number; end: number };
'time-pos-change': { time: number };
'duration-change': { duration: number };
'pause-change': { paused: boolean };
'secondary-subtitle-change': { text: string };
'subtitle-track-change': { sid: number | null };
'subtitle-track-list-change': { trackList: unknown[] | null };
'media-path-change': { path: string };
'media-title-change': { title: string | null };
'subtitle-metrics-change': { patch: Partial<MpvSubtitleRenderMetrics> };
@@ -314,6 +317,9 @@ export class MpvIpcClient implements MpvClient {
emitTimePosChange: (payload) => {
this.emit('time-pos-change', payload);
},
emitDurationChange: (payload) => {
this.emit('duration-change', payload);
},
emitPauseChange: (payload) => {
this.playbackPaused = payload.paused;
this.emit('pause-change', payload);
@@ -321,6 +327,12 @@ export class MpvIpcClient implements MpvClient {
emitSecondarySubtitleChange: (payload) => {
this.emit('secondary-subtitle-change', payload);
},
emitSubtitleTrackChange: (payload) => {
this.emit('subtitle-track-change', payload);
},
emitSubtitleTrackListChange: (payload) => {
this.emit('subtitle-track-list-change', payload);
},
getCurrentSubText: () => this.currentSubText,
setCurrentSubText: (text: string) => {
this.currentSubText = text;

View File

@@ -109,6 +109,60 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayRuntime can skip starting Anki integration transport', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;
let setIntegrationCalls = 0;
initializeOverlayRuntime({
backendOverride: null,
createMainWindow: () => {},
registerGlobalShortcuts: () => {},
updateVisibleOverlayBounds: () => {},
isVisibleOverlayVisible: () => false,
updateVisibleOverlayVisibility: () => {},
getOverlayWindows: () => [],
syncOverlayShortcuts: () => {},
setWindowTracker: () => {},
getMpvSocketPath: () => '/tmp/mpv.sock',
createWindowTracker: () => null,
getResolvedConfig: () => ({
ankiConnect: { enabled: true } as never,
}),
getSubtitleTimingTracker: () => ({}),
getMpvClient: () => ({
send: () => {},
}),
getRuntimeOptionsManager: () => ({
getEffectiveAnkiConnectConfig: (config) => config as never,
}),
createAnkiIntegration: () => {
createdIntegrations += 1;
return {
start: () => {
startedIntegrations += 1;
},
};
},
setAnkiIntegration: () => {
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 7,
deleteNoteId: 8,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
shouldStartAnkiIntegration: () => false,
});
assert.equal(createdIntegrations, 1);
assert.equal(startedIntegrations, 0);
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayRuntime merges shared ai config with Anki overrides', () => {
initializeOverlayRuntime({
backendOverride: null,
@@ -213,3 +267,49 @@ test('initializeOverlayRuntime re-syncs overlay shortcuts when tracker focus cha
tracker.onWindowFocusChange?.(true);
assert.equal(syncCalls, 1);
});
test('initializeOverlayRuntime refreshes visible overlay when tracker focus changes while overlay is shown', () => {
let visibilityRefreshCalls = 0;
const tracker = {
onGeometryChange: null as ((...args: unknown[]) => void) | null,
onWindowFound: null as ((...args: unknown[]) => void) | null,
onWindowLost: null as (() => void) | null,
onWindowFocusChange: null as ((focused: boolean) => void) | null,
start: () => {},
};
initializeOverlayRuntime({
backendOverride: null,
createMainWindow: () => {},
registerGlobalShortcuts: () => {},
updateVisibleOverlayBounds: () => {},
isVisibleOverlayVisible: () => true,
updateVisibleOverlayVisibility: () => {
visibilityRefreshCalls += 1;
},
getOverlayWindows: () => [],
syncOverlayShortcuts: () => {},
setWindowTracker: () => {},
getMpvSocketPath: () => '/tmp/mpv.sock',
createWindowTracker: () => tracker as never,
getResolvedConfig: () => ({
ankiConnect: { enabled: false } as never,
}),
getSubtitleTimingTracker: () => null,
getMpvClient: () => null,
getRuntimeOptionsManager: () => null,
setAnkiIntegration: () => {},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 1,
deleteNoteId: 2,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
tracker.onWindowFocusChange?.(true);
assert.equal(visibilityRefreshCalls, 2);
});

View File

@@ -75,6 +75,7 @@ export function initializeOverlayRuntime(options: {
data: KikuFieldGroupingRequestData,
) => Promise<KikuFieldGroupingChoice>;
getKnownWordCacheStatePath: () => string;
shouldStartAnkiIntegration?: () => boolean;
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
}): void {
options.createMainWindow();
@@ -90,9 +91,6 @@ export function initializeOverlayRuntime(options: {
windowTracker.onGeometryChange = (geometry: WindowGeometry) => {
options.updateVisibleOverlayBounds(geometry);
};
windowTracker.onTargetWindowFocusChange = () => {
options.syncOverlayShortcuts();
};
windowTracker.onWindowFound = (geometry: WindowGeometry) => {
options.updateVisibleOverlayBounds(geometry);
if (options.isVisibleOverlayVisible()) {
@@ -106,6 +104,9 @@ export function initializeOverlayRuntime(options: {
options.syncOverlayShortcuts();
};
windowTracker.onWindowFocusChange = () => {
if (options.isVisibleOverlayVisible()) {
options.updateVisibleOverlayVisibility();
}
options.syncOverlayShortcuts();
};
windowTracker.start();
@@ -135,7 +136,9 @@ export function initializeOverlayRuntime(options: {
createFieldGroupingCallback: options.createFieldGroupingCallback,
knownWordCacheStatePath: options.getKnownWordCacheStatePath(),
});
integration.start();
if (options.shouldStartAnkiIntegration?.() !== false) {
integration.start();
}
options.setAnkiIntegration(integration);
}

View File

@@ -200,6 +200,81 @@ test('Windows visible overlay stays click-through and does not steal focus while
assert.ok(!calls.includes('focus'));
});
test('macOS tracked visible overlay stays visible without passively stealing focus', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: tracker as never,
trackerNotReadyWarningShown: false,
setTrackerNotReadyWarningShown: () => {},
updateVisibleOverlayBounds: () => {
calls.push('update-bounds');
},
ensureOverlayWindowLevel: () => {
calls.push('ensure-level');
},
syncPrimaryOverlayWindowLayer: () => {
calls.push('sync-layer');
},
enforceOverlayLayerOrder: () => {
calls.push('enforce-order');
},
syncOverlayShortcuts: () => {
calls.push('sync-shortcuts');
},
isMacOSPlatform: true,
isWindowsPlatform: false,
} as never);
assert.ok(calls.includes('mouse-ignore:false:plain'));
assert.ok(calls.includes('show'));
assert.ok(!calls.includes('focus'));
});
test('forced mouse passthrough keeps macOS tracked overlay passive while visible', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: tracker as never,
trackerNotReadyWarningShown: false,
setTrackerNotReadyWarningShown: () => {},
updateVisibleOverlayBounds: () => {
calls.push('update-bounds');
},
ensureOverlayWindowLevel: () => {
calls.push('ensure-level');
},
syncPrimaryOverlayWindowLayer: () => {
calls.push('sync-layer');
},
enforceOverlayLayerOrder: () => {
calls.push('enforce-order');
},
syncOverlayShortcuts: () => {
calls.push('sync-shortcuts');
},
isMacOSPlatform: true,
isWindowsPlatform: false,
forceMousePassthrough: true,
} as never);
assert.ok(calls.includes('mouse-ignore:true:forward'));
assert.ok(calls.includes('show'));
assert.ok(!calls.includes('focus'));
});
test('Windows keeps visible overlay hidden while tracker is not ready', () => {
const { window, calls } = createMainWindowRecorder();
let trackerWarning = false;
@@ -283,6 +358,59 @@ test('macOS keeps visible overlay hidden while tracker is not initialized yet',
assert.ok(!calls.includes('update-bounds'));
});
test('macOS suppresses immediate repeat loading OSD after tracker recovery until cooldown expires', () => {
const { window } = createMainWindowRecorder();
const osdMessages: string[] = [];
let trackerWarning = false;
let lastLoadingOsdAtMs: number | null = null;
let nowMs = 1_000;
const hiddenTracker: WindowTrackerStub = {
isTracking: () => false,
getGeometry: () => null,
};
const trackedTracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
const run = (windowTracker: WindowTrackerStub) =>
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: windowTracker as never,
trackerNotReadyWarningShown: trackerWarning,
setTrackerNotReadyWarningShown: (shown: boolean) => {
trackerWarning = shown;
},
updateVisibleOverlayBounds: () => {},
ensureOverlayWindowLevel: () => {},
syncPrimaryOverlayWindowLayer: () => {},
enforceOverlayLayerOrder: () => {},
syncOverlayShortcuts: () => {},
isMacOSPlatform: true,
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
} as never);
run(hiddenTracker);
run(trackedTracker);
nowMs = 2_000;
run(hiddenTracker);
run(trackedTracker);
nowMs = 6_500;
run(hiddenTracker);
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);
});
test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly', () => {
const calls: string[] = [];
setVisibleOverlayVisible({
@@ -298,10 +426,12 @@ test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly'
assert.deepEqual(calls, ['state:true', 'update']);
});
test('macOS loading OSD can show again after overlay is hidden and retried', () => {
test('macOS explicit hide resets loading OSD suppression before retry', () => {
const { window, calls } = createMainWindowRecorder();
const osdMessages: string[] = [];
let trackerWarning = false;
let lastLoadingOsdAtMs: number | null = null;
let nowMs = 1_000;
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
@@ -331,8 +461,17 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
nowMs = 1_500;
updateVisibleOverlayVisibility({
visibleOverlayVisible: false,
mainWindow: window as never,
@@ -349,6 +488,9 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
syncOverlayShortcuts: () => {},
isMacOSPlatform: true,
showOverlayLoadingOsd: () => {},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
updateVisibleOverlayVisibility({
@@ -379,6 +521,14 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);

View File

@@ -4,6 +4,7 @@ import { WindowGeometry } from '../../types';
export function updateVisibleOverlayVisibility(args: {
visibleOverlayVisible: boolean;
forceMousePassthrough?: boolean;
mainWindow: BrowserWindow | null;
windowTracker: BaseWindowTracker | null;
trackerNotReadyWarningShown: boolean;
@@ -16,6 +17,9 @@ export function updateVisibleOverlayVisibility(args: {
isMacOSPlatform?: boolean;
isWindowsPlatform?: boolean;
showOverlayLoadingOsd?: (message: string) => void;
shouldShowOverlayLoadingOsd?: () => boolean;
markOverlayLoadingOsdShown?: () => void;
resetOverlayLoadingOsdSuppression?: () => void;
resolveFallbackBounds?: () => WindowGeometry;
}): void {
if (!args.mainWindow || args.mainWindow.isDestroyed()) {
@@ -25,20 +29,33 @@ export function updateVisibleOverlayVisibility(args: {
const mainWindow = args.mainWindow;
const showPassiveVisibleOverlay = (): void => {
if (args.isWindowsPlatform) {
const forceMousePassthrough = args.forceMousePassthrough === true;
if (args.isWindowsPlatform || forceMousePassthrough) {
mainWindow.setIgnoreMouseEvents(true, { forward: true });
} else {
mainWindow.setIgnoreMouseEvents(false);
}
args.ensureOverlayWindowLevel(mainWindow);
mainWindow.show();
if (!args.isWindowsPlatform) {
if (!args.isWindowsPlatform && !args.isMacOSPlatform && !forceMousePassthrough) {
mainWindow.focus();
}
};
const maybeShowOverlayLoadingOsd = (): void => {
if (!args.isMacOSPlatform || !args.showOverlayLoadingOsd) {
return;
}
if (args.shouldShowOverlayLoadingOsd && !args.shouldShowOverlayLoadingOsd()) {
return;
}
args.showOverlayLoadingOsd('Overlay loading...');
args.markOverlayLoadingOsdShown?.();
};
if (!args.visibleOverlayVisible) {
args.setTrackerNotReadyWarningShown(false);
args.resetOverlayLoadingOsdSuppression?.();
mainWindow.hide();
args.syncOverlayShortcuts();
return;
@@ -61,9 +78,7 @@ export function updateVisibleOverlayVisibility(args: {
if (args.isMacOSPlatform || args.isWindowsPlatform) {
if (!args.trackerNotReadyWarningShown) {
args.setTrackerNotReadyWarningShown(true);
if (args.isMacOSPlatform) {
args.showOverlayLoadingOsd?.('Overlay loading...');
}
maybeShowOverlayLoadingOsd();
}
mainWindow.hide();
args.syncOverlayShortcuts();
@@ -79,9 +94,7 @@ export function updateVisibleOverlayVisibility(args: {
if (!args.trackerNotReadyWarningShown) {
args.setTrackerNotReadyWarningShown(true);
if (args.isMacOSPlatform) {
args.showOverlayLoadingOsd?.('Overlay loading...');
}
maybeShowOverlayLoadingOsd();
}
mainWindow.hide();

View File

@@ -46,6 +46,7 @@ export function ensureOverlayWindowLevel(window: BrowserWindow): void {
window.setAlwaysOnTop(true, 'screen-saver', 1);
window.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
window.setFullScreenable(false);
window.moveTop();
return;
}
if (process.platform === 'win32') {

View File

@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,

View File

@@ -0,0 +1,196 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { runAppReadyRuntime } from './startup';
test('runAppReadyRuntime minimal startup skips Yomitan and first-run setup while still handling CLI args', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({}),
getConfigWarnings: () => [],
logConfigWarning: () => {
calls.push('config-warning');
},
setLogLevel: () => {
calls.push('set-log-level');
},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {
calls.push('set-secondary-sub-mode');
},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {
calls.push('subtitle-ws');
},
startAnnotationWebsocket: () => {
calls.push('annotation-ws');
},
startTexthooker: () => {
calls.push('texthooker');
},
log: () => {
calls.push('log');
},
createMecabTokenizerAndCheck: async () => {
calls.push('mecab');
},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {
calls.push('immersion');
},
startJellyfinRemoteSession: async () => {
calls.push('jellyfin');
},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {
calls.push('first-run');
},
prewarmSubtitleDictionaries: async () => {
calls.push('prewarm');
},
startBackgroundWarmups: () => {
calls.push('warmups');
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
setVisibleOverlayVisible: () => {
calls.push('visible-overlay');
},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldUseMinimalStartup: () => true,
shouldSkipHeavyStartup: () => false,
});
assert.deepEqual(calls, ['bootstrap', 'reload-config', 'handle-initial-args']);
});
test('runAppReadyRuntime headless refresh bootstraps Anki runtime without UI startup', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({}),
getConfigWarnings: () => [],
logConfigWarning: () => {
calls.push('config-warning');
},
setLogLevel: () => {
calls.push('set-log-level');
},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {
calls.push('set-secondary-sub-mode');
},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {
calls.push('subtitle-ws');
},
startAnnotationWebsocket: () => {
calls.push('annotation-ws');
},
startTexthooker: () => {
calls.push('texthooker');
},
log: () => {
calls.push('log');
},
createMecabTokenizerAndCheck: async () => {
calls.push('mecab');
},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {
calls.push('immersion');
},
startJellyfinRemoteSession: async () => {
calls.push('jellyfin');
},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {
calls.push('first-run');
},
prewarmSubtitleDictionaries: async () => {
calls.push('prewarm');
},
startBackgroundWarmups: () => {
calls.push('warmups');
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
setVisibleOverlayVisible: () => {
calls.push('visible-overlay');
},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
runHeadlessInitialCommand: async () => {
calls.push('run-headless-command');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldRunHeadlessInitialCommand: () => true,
shouldUseMinimalStartup: () => false,
shouldSkipHeavyStartup: () => false,
});
assert.deepEqual(calls, [
'bootstrap',
'reload-config',
'init-runtime-options',
'run-headless-command',
]);
});

View File

@@ -131,10 +131,13 @@ export interface AppReadyRuntimeDeps {
shouldAutoInitializeOverlayRuntimeFromConfig: () => boolean;
setVisibleOverlayVisible: (visible: boolean) => void;
initializeOverlayRuntime: () => void;
runHeadlessInitialCommand?: () => Promise<void>;
handleInitialArgs: () => void;
logDebug?: (message: string) => void;
onCriticalConfigErrors?: (errors: string[]) => void;
now?: () => number;
shouldRunHeadlessInitialCommand?: () => boolean;
shouldUseMinimalStartup?: () => boolean;
shouldSkipHeavyStartup?: () => boolean;
}
@@ -183,6 +186,32 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
const now = deps.now ?? (() => Date.now());
const startupStartedAtMs = now();
deps.ensureDefaultConfigBootstrap();
if (deps.shouldRunHeadlessInitialCommand?.()) {
deps.reloadConfig();
deps.initRuntimeOptionsManager();
if (deps.runHeadlessInitialCommand) {
await deps.runHeadlessInitialCommand();
} else {
deps.createMpvClient();
deps.createSubtitleTimingTracker();
deps.initializeOverlayRuntime();
deps.handleInitialArgs();
}
return;
}
if (deps.texthookerOnlyMode) {
deps.reloadConfig();
deps.handleInitialArgs();
return;
}
if (deps.shouldUseMinimalStartup?.()) {
deps.reloadConfig();
deps.handleInitialArgs();
return;
}
if (deps.shouldSkipHeavyStartup?.()) {
await deps.loadYomitanExtension();
deps.reloadConfig();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
import type { BrowserWindow, BrowserWindowConstructorOptions } from 'electron';
import type { WindowGeometry } from '../../types';
const DEFAULT_STATS_WINDOW_WIDTH = 900;
const DEFAULT_STATS_WINDOW_HEIGHT = 700;
type StatsWindowLevelController = Pick<BrowserWindow, 'setAlwaysOnTop' | 'moveTop'> &
Partial<Pick<BrowserWindow, 'setVisibleOnAllWorkspaces' | 'setFullScreenable'>>;
function isBareToggleKeyInput(input: Electron.Input, toggleKey: string): boolean {
return (
input.type === 'keyDown' &&
input.code === toggleKey &&
!input.control &&
!input.alt &&
!input.meta &&
!input.shift &&
!input.isAutoRepeat
);
}
export function shouldHideStatsWindowForInput(input: Electron.Input, toggleKey: string): boolean {
return (
(input.type === 'keyDown' && input.key === 'Escape') || isBareToggleKeyInput(input, toggleKey)
);
}
export function buildStatsWindowOptions(options: {
preloadPath: string;
bounds?: WindowGeometry | null;
}): BrowserWindowConstructorOptions {
return {
x: options.bounds?.x,
y: options.bounds?.y,
width: options.bounds?.width ?? DEFAULT_STATS_WINDOW_WIDTH,
height: options.bounds?.height ?? DEFAULT_STATS_WINDOW_HEIGHT,
frame: false,
transparent: true,
alwaysOnTop: true,
resizable: false,
skipTaskbar: true,
hasShadow: false,
focusable: true,
acceptFirstMouse: true,
fullscreenable: false,
backgroundColor: '#1e1e2e',
show: false,
webPreferences: {
nodeIntegration: false,
contextIsolation: true,
preload: options.preloadPath,
sandbox: true,
},
};
}
export function promoteStatsWindowLevel(
window: StatsWindowLevelController,
platform: NodeJS.Platform = process.platform,
): void {
if (platform === 'darwin') {
window.setAlwaysOnTop(true, 'screen-saver', 2);
window.setVisibleOnAllWorkspaces?.(true, { visibleOnFullScreen: true });
window.setFullScreenable?.(false);
window.moveTop();
return;
}
if (platform === 'win32') {
window.setAlwaysOnTop(true, 'screen-saver', 2);
window.moveTop();
return;
}
window.setAlwaysOnTop(true);
window.moveTop();
}
export function buildStatsWindowLoadFileOptions(apiBaseUrl?: string): {
query: Record<string, string>;
} {
return {
query: {
overlay: '1',
...(apiBaseUrl ? { apiBase: apiBaseUrl } : {}),
},
};
}

View File

@@ -0,0 +1,202 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
buildStatsWindowLoadFileOptions,
buildStatsWindowOptions,
promoteStatsWindowLevel,
shouldHideStatsWindowForInput,
} from './stats-window-runtime';
test('buildStatsWindowOptions uses tracked overlay bounds and preload-friendly web preferences', () => {
const options = buildStatsWindowOptions({
preloadPath: '/tmp/preload-stats.js',
bounds: {
x: 120,
y: 80,
width: 1440,
height: 900,
},
});
assert.equal(options.x, 120);
assert.equal(options.y, 80);
assert.equal(options.width, 1440);
assert.equal(options.height, 900);
assert.equal(options.frame, false);
assert.equal(options.transparent, true);
assert.equal(options.resizable, false);
assert.equal(options.webPreferences?.preload, '/tmp/preload-stats.js');
assert.equal(options.webPreferences?.contextIsolation, true);
assert.equal(options.webPreferences?.nodeIntegration, false);
assert.equal(options.webPreferences?.sandbox, true);
});
test('shouldHideStatsWindowForInput matches Escape and configured bare toggle key', () => {
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: 'Escape',
code: 'Escape',
} as Electron.Input,
'Backquote',
),
true,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
} as Electron.Input,
'Backquote',
),
true,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
control: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
alt: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
meta: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
isAutoRepeat: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
shift: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyUp',
key: '`',
code: 'Backquote',
} as Electron.Input,
'Backquote',
),
false,
);
});
test('buildStatsWindowLoadFileOptions enables overlay rendering mode', () => {
assert.deepEqual(buildStatsWindowLoadFileOptions(), {
query: {
overlay: '1',
},
});
});
test('buildStatsWindowLoadFileOptions includes provided stats API base URL', () => {
assert.deepEqual(buildStatsWindowLoadFileOptions('http://127.0.0.1:6123'), {
query: {
overlay: '1',
apiBase: 'http://127.0.0.1:6123',
},
});
});
test('promoteStatsWindowLevel raises stats above overlay level on macOS', () => {
const calls: string[] = [];
promoteStatsWindowLevel(
{
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
},
setVisibleOnAllWorkspaces: (
visible: boolean,
options?: { visibleOnFullScreen?: boolean },
) => {
calls.push(
`all-workspaces:${visible}:${options?.visibleOnFullScreen === true ? 'fullscreen' : 'plain'}`,
);
},
setFullScreenable: (fullscreenable: boolean) => {
calls.push(`fullscreenable:${fullscreenable}`);
},
moveTop: () => {
calls.push('move-top');
},
} as never,
'darwin',
);
assert.deepEqual(calls, [
'always-on-top:true:screen-saver:2',
'all-workspaces:true:fullscreen',
'fullscreenable:false',
'move-top',
]);
});
test('promoteStatsWindowLevel raises stats above overlay level on Windows', () => {
const calls: string[] = [];
promoteStatsWindowLevel(
{
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
},
moveTop: () => {
calls.push('move-top');
},
} as never,
'win32',
);
assert.deepEqual(calls, ['always-on-top:true:screen-saver:2', 'move-top']);
});

View File

@@ -0,0 +1,118 @@
import { BrowserWindow, ipcMain } from 'electron';
import * as path from 'path';
import type { WindowGeometry } from '../../types.js';
import { IPC_CHANNELS } from '../../shared/ipc/contracts.js';
import {
buildStatsWindowLoadFileOptions,
buildStatsWindowOptions,
promoteStatsWindowLevel,
shouldHideStatsWindowForInput,
} from './stats-window-runtime.js';
let statsWindow: BrowserWindow | null = null;
let toggleRegistered = false;
export interface StatsWindowOptions {
/** Absolute path to stats/dist/ directory */
staticDir: string;
/** Absolute path to the compiled preload-stats.js */
preloadPath: string;
/** Resolve the active stats API base URL */
getApiBaseUrl?: () => string;
/** Resolve the active stats toggle key from config */
getToggleKey: () => string;
/** Resolve the tracked overlay/mpv bounds */
resolveBounds: () => WindowGeometry | null;
/** Notify the main process when the stats overlay becomes visible/hidden */
onVisibilityChanged?: (visible: boolean) => void;
}
function syncStatsWindowBounds(window: BrowserWindow, bounds: WindowGeometry | null): void {
if (!bounds || window.isDestroyed()) return;
window.setBounds({
x: bounds.x,
y: bounds.y,
width: bounds.width,
height: bounds.height,
});
}
function showStatsWindow(window: BrowserWindow, options: StatsWindowOptions): void {
syncStatsWindowBounds(window, options.resolveBounds());
promoteStatsWindowLevel(window);
window.show();
window.focus();
options.onVisibilityChanged?.(true);
promoteStatsWindowLevel(window);
}
/**
* Toggle the stats overlay window: create on first call, then show/hide.
* The React app stays mounted across toggles — state is preserved.
*/
export function toggleStatsOverlay(options: StatsWindowOptions): void {
if (!statsWindow) {
statsWindow = new BrowserWindow(
buildStatsWindowOptions({
preloadPath: options.preloadPath,
bounds: options.resolveBounds(),
}),
);
const indexPath = path.join(options.staticDir, 'index.html');
statsWindow.loadFile(indexPath, buildStatsWindowLoadFileOptions(options.getApiBaseUrl?.()));
statsWindow.on('closed', () => {
options.onVisibilityChanged?.(false);
statsWindow = null;
});
statsWindow.webContents.on('before-input-event', (event, input) => {
if (shouldHideStatsWindowForInput(input, options.getToggleKey())) {
event.preventDefault();
statsWindow?.hide();
options.onVisibilityChanged?.(false);
}
});
statsWindow.once('ready-to-show', () => {
if (!statsWindow) return;
showStatsWindow(statsWindow, options);
});
statsWindow.on('blur', () => {
if (!statsWindow || statsWindow.isDestroyed() || !statsWindow.isVisible()) {
return;
}
promoteStatsWindowLevel(statsWindow);
});
} else if (statsWindow.isVisible()) {
statsWindow.hide();
options.onVisibilityChanged?.(false);
} else {
showStatsWindow(statsWindow, options);
}
}
/**
* Register the IPC command handler for toggling the overlay.
* Call this once during app initialization.
*/
export function registerStatsOverlayToggle(options: StatsWindowOptions): void {
if (toggleRegistered) return;
toggleRegistered = true;
ipcMain.on(IPC_CHANNELS.command.toggleStatsOverlay, () => {
toggleStatsOverlay(options);
});
}
/**
* Clean up — destroy the stats window if it exists.
* Call during app quit.
*/
export function destroyStatsWindow(): void {
if (statsWindow && !statsWindow.isDestroyed()) {
statsWindow.destroy();
statsWindow = null;
}
}

View File

@@ -0,0 +1,245 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { parseSrtCues, parseAssCues, parseSubtitleCues } from './subtitle-cue-parser';
import type { SubtitleCue } from './subtitle-cue-parser';
test('parseSrtCues parses basic SRT content', () => {
const content = [
'1',
'00:00:01,000 --> 00:00:04,000',
'こんにちは',
'',
'2',
'00:00:05,000 --> 00:00:08,500',
'元気ですか',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseSrtCues handles multi-line subtitle text', () => {
const content = ['1', '00:01:00,000 --> 00:01:05,000', 'これは', 'テストです', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは\nテストです');
});
test('parseSrtCues handles hours in timestamps', () => {
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseSrtCues handles VTT-style dot separator', () => {
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTスタイル', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
});
test('parseSrtCues returns empty array for empty content', () => {
assert.deepEqual(parseSrtCues(''), []);
assert.deepEqual(parseSrtCues(' \n\n '), []);
});
test('parseSrtCues skips malformed timing lines gracefully', () => {
const content = [
'1',
'NOT A TIMING LINE',
'テスト',
'',
'2',
'00:00:01,000 --> 00:00:02,000',
'有効',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, '有効');
});
test('parseAssCues parses basic ASS dialogue lines', () => {
const content = [
'[Script Info]',
'Title: Test',
'',
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,こんにちは',
'Dialogue: 0,0:00:05.00,0:00:08.50,Default,,0,0,0,,元気ですか',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseAssCues strips override tags from text', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\b1}太字{\\b0}テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '太字テスト');
});
test('parseAssCues handles text containing commas', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,はい、そうです、ね',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, 'はい、そうです、ね');
});
test('parseAssCues handles \\N line breaks', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,一行目\\N二行目',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '一行目\\N二行目');
});
test('parseAssCues returns empty for content without Events section', () => {
const content = ['[Script Info]', 'Title: Test'].join('\n');
assert.deepEqual(parseAssCues(content), []);
});
test('parseAssCues skips Comment lines', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Comment: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,これはコメント',
'Dialogue: 0,0:00:05.00,0:00:08.00,Default,,0,0,0,,これは字幕',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは字幕');
});
test('parseAssCues handles hour timestamps', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,1:30:00.00,1:30:05.00,Default,,0,0,0,,テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseAssCues respects dynamic field ordering from the Format row', () => {
const content = [
'[Events]',
'Format: Layer, Style, Start, End, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,Default,0:00:01.00,0:00:04.00,,0,0,0,,順番が違う',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, '順番が違う');
});
test('parseSubtitleCues auto-detects SRT format', () => {
const content = ['1', '00:00:01,000 --> 00:00:04,000', 'SRTテスト', ''].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'SRTテスト');
});
test('parseSubtitleCues auto-detects ASS format', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,ASSテスト',
].join('\n');
const cues = parseSubtitleCues(content, 'test.ass');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'ASSテスト');
});
test('parseSubtitleCues auto-detects VTT format', () => {
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTテスト', ''].join('\n');
const cues = parseSubtitleCues(content, 'test.vtt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'VTTテスト');
});
test('parseSubtitleCues returns empty for unknown format', () => {
assert.deepEqual(parseSubtitleCues('random content', 'test.xyz'), []);
});
test('parseSubtitleCues returns cues sorted by start time', () => {
const content = [
'1',
'00:00:10,000 --> 00:00:14,000',
'二番目',
'',
'2',
'00:00:01,000 --> 00:00:04,000',
'一番目',
'',
].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues[0]!.text, '一番目');
assert.equal(cues[1]!.text, '二番目');
});
test('parseSubtitleCues detects subtitle formats from remote URLs', () => {
const assContent = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,URLテスト',
].join('\n');
const cues = parseSubtitleCues(assContent, 'https://host/subs.ass?lang=ja#track');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'URLテスト');
});

View File

@@ -0,0 +1,191 @@
export interface SubtitleCue {
startTime: number;
endTime: number;
text: string;
}
const SRT_TIMING_PATTERN =
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
function parseTimestamp(
hours: string | undefined,
minutes: string,
seconds: string,
millis: string,
): number {
return (
Number(hours || 0) * 3600 +
Number(minutes) * 60 +
Number(seconds) +
Number(millis.padEnd(3, '0')) / 1000
);
}
export function parseSrtCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let i = 0;
while (i < lines.length) {
const line = lines[i]!;
const timingMatch = SRT_TIMING_PATTERN.exec(line);
if (!timingMatch) {
i += 1;
continue;
}
const startTime = parseTimestamp(
timingMatch[1],
timingMatch[2]!,
timingMatch[3]!,
timingMatch[4]!,
);
const endTime = parseTimestamp(
timingMatch[5],
timingMatch[6]!,
timingMatch[7]!,
timingMatch[8]!,
);
i += 1;
const textLines: string[] = [];
while (i < lines.length && lines[i]!.trim() !== '') {
textLines.push(lines[i]!);
i += 1;
}
const text = textLines.join('\n').trim();
if (text) {
cues.push({ startTime, endTime, text });
}
}
return cues;
}
const ASS_OVERRIDE_TAG_PATTERN = /\{[^}]*\}/g;
const ASS_TIMING_PATTERN = /^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/;
const ASS_FORMAT_PREFIX = 'Format:';
const ASS_DIALOGUE_PREFIX = 'Dialogue:';
function parseAssTimestamp(raw: string): number | null {
const match = ASS_TIMING_PATTERN.exec(raw.trim());
if (!match) {
return null;
}
const hours = Number(match[1]);
const minutes = Number(match[2]);
const seconds = Number(match[3]);
const centiseconds = Number(match[4]!.padEnd(2, '0'));
return hours * 3600 + minutes * 60 + seconds + centiseconds / 100;
}
export function parseAssCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let inEventsSection = false;
let startFieldIndex = -1;
let endFieldIndex = -1;
let textFieldIndex = -1;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
inEventsSection = trimmed.toLowerCase() === '[events]';
if (!inEventsSection) {
startFieldIndex = -1;
endFieldIndex = -1;
textFieldIndex = -1;
}
continue;
}
if (!inEventsSection) {
continue;
}
if (trimmed.startsWith(ASS_FORMAT_PREFIX)) {
const formatFields = trimmed
.slice(ASS_FORMAT_PREFIX.length)
.split(',')
.map((field) => field.trim().toLowerCase());
startFieldIndex = formatFields.indexOf('start');
endFieldIndex = formatFields.indexOf('end');
textFieldIndex = formatFields.indexOf('text');
continue;
}
if (!trimmed.startsWith(ASS_DIALOGUE_PREFIX)) {
continue;
}
if (startFieldIndex < 0 || endFieldIndex < 0 || textFieldIndex < 0) {
continue;
}
const fields = trimmed.slice(ASS_DIALOGUE_PREFIX.length).split(',');
if (
startFieldIndex >= fields.length ||
endFieldIndex >= fields.length ||
textFieldIndex >= fields.length
) {
continue;
}
const startTime = parseAssTimestamp(fields[startFieldIndex]!);
const endTime = parseAssTimestamp(fields[endFieldIndex]!);
if (startTime === null || endTime === null) {
continue;
}
const rawText = fields
.slice(textFieldIndex)
.join(',')
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
.trim();
if (rawText) {
cues.push({ startTime, endTime, text: rawText });
}
}
return cues;
}
function detectSubtitleFormat(source: string): 'srt' | 'vtt' | 'ass' | 'ssa' | null {
const [normalizedSource = source] =
(() => {
try {
return /^[a-z]+:\/\//i.test(source) ? new URL(source).pathname : source;
} catch {
return source;
}
})().split(/[?#]/, 1)[0] ?? '';
const ext = normalizedSource.split('.').pop()?.toLowerCase() ?? '';
if (ext === 'srt') return 'srt';
if (ext === 'vtt') return 'vtt';
if (ext === 'ass' || ext === 'ssa') return 'ass';
return null;
}
export function parseSubtitleCues(content: string, filename: string): SubtitleCue[] {
const format = detectSubtitleFormat(filename);
let cues: SubtitleCue[];
switch (format) {
case 'srt':
case 'vtt':
cues = parseSrtCues(content);
break;
case 'ass':
case 'ssa':
cues = parseAssCues(content);
break;
default:
return [];
}
cues.sort((a, b) => a.startTime - b.startTime);
return cues;
}

View File

@@ -0,0 +1,244 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { computePriorityWindow, createSubtitlePrefetchService } from './subtitle-prefetch';
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
function makeCues(count: number, startOffset = 0): SubtitleCue[] {
return Array.from({ length: count }, (_, i) => ({
startTime: startOffset + i * 5,
endTime: startOffset + i * 5 + 4,
text: `line-${i}`,
}));
}
test('computePriorityWindow returns next N cues from current position', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 12.0, 5);
assert.equal(window.length, 5);
// Position 12.0 falls during cue 2, so the active cue should be warmed first.
assert.equal(window[0]!.text, 'line-2');
assert.equal(window[4]!.text, 'line-6');
});
test('computePriorityWindow clamps to remaining cues at end of file', () => {
const cues = makeCues(5);
const window = computePriorityWindow(cues, 18.0, 10);
// Position 18.0 is during cue 3 (start=15), so cue 3 and cue 4 remain.
assert.equal(window.length, 2);
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[1]!.text, 'line-4');
});
test('computePriorityWindow returns empty when past all cues', () => {
const cues = makeCues(3);
const window = computePriorityWindow(cues, 999.0, 10);
assert.equal(window.length, 0);
});
test('computePriorityWindow at position 0 returns first N cues', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 0, 5);
assert.equal(window.length, 5);
assert.equal(window[0]!.text, 'line-0');
});
test('computePriorityWindow includes the active cue when current position is mid-line', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 18.0, 3);
assert.equal(window.length, 3);
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[1]!.text, 'line-4');
assert.equal(window[2]!.text, 'line-5');
});
function flushMicrotasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
}
test('prefetch service tokenizes priority window cues and caches them', async () => {
const cues = makeCues(20);
const cached: Map<string, SubtitleData> = new Map();
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: (text, data) => {
cached.set(text, data);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Allow all async tokenization to complete
for (let i = 0; i < 25; i += 1) {
await flushMicrotasks();
}
service.stop();
// Priority window (first 3) should be cached
assert.ok(cached.has('line-0'));
assert.ok(cached.has('line-1'));
assert.ok(cached.has('line-2'));
});
test('prefetch service stops when cache is full', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
let cacheSize = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {
cacheSize += 1;
},
isCacheFull: () => cacheSize >= 5,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// Should have stopped at 5 (cache full), not tokenized all 20
assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
});
test('prefetch service can be stopped mid-flight', async () => {
const cues = makeCues(100);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.stop();
const callsAtStop = tokenizeCalls;
// Wait more to confirm no further calls
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
});
test('prefetch service onSeek re-prioritizes from new position', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Let a few cues process
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Seek to near the end
service.onSeek(80.0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// After seek to 80.0, cues starting after 80.0 (line-17, line-18, line-19) should appear in cached
const hasPostSeekCue = cachedTexts.some(
(t) => t === 'line-17' || t === 'line-18' || t === 'line-19',
);
assert.ok(hasPostSeekCue, 'Should have cached cues after seek position');
});
test('prefetch service still warms the priority window when cache is full', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => true,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.deepEqual(cachedTexts.slice(0, 3), ['line-0', 'line-1', 'line-2']);
});
test('prefetch service pause/resume halts and continues tokenization', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.pause();
const callsWhenPaused = tokenizeCalls;
// Wait while paused
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Should not have advanced much (may have 1 in-flight)
assert.ok(tokenizeCalls <= callsWhenPaused + 1, 'Should not tokenize much while paused');
service.resume();
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.ok(tokenizeCalls > callsWhenPaused + 1, 'Should resume tokenizing after unpause');
});

View File

@@ -0,0 +1,153 @@
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
export interface SubtitlePrefetchServiceDeps {
cues: SubtitleCue[];
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
preCacheTokenization: (text: string, data: SubtitleData) => void;
isCacheFull: () => boolean;
priorityWindowSize?: number;
}
export interface SubtitlePrefetchService {
start: (currentTimeSeconds: number) => void;
stop: () => void;
onSeek: (newTimeSeconds: number) => void;
pause: () => void;
resume: () => void;
}
const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
export function computePriorityWindow(
cues: SubtitleCue[],
currentTimeSeconds: number,
windowSize: number,
): SubtitleCue[] {
if (cues.length === 0) {
return [];
}
// Find the first cue whose end time is after the current position.
// This includes the currently active cue when playback starts or seeks
// mid-line, while still skipping cues that have already finished.
let startIndex = -1;
for (let i = 0; i < cues.length; i += 1) {
if (cues[i]!.endTime > currentTimeSeconds) {
startIndex = i;
break;
}
}
if (startIndex < 0) {
// All cues are before current time
return [];
}
return cues.slice(startIndex, startIndex + windowSize);
}
export function createSubtitlePrefetchService(
deps: SubtitlePrefetchServiceDeps,
): SubtitlePrefetchService {
const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
let stopped = true;
let paused = false;
let currentRunId = 0;
async function tokenizeCueList(
cuesToProcess: SubtitleCue[],
runId: number,
options: { allowWhenCacheFull?: boolean } = {},
): Promise<void> {
for (const cue of cuesToProcess) {
if (stopped || runId !== currentRunId) {
return;
}
// Wait while paused
while (paused && !stopped && runId === currentRunId) {
await new Promise((resolve) => setTimeout(resolve, 10));
}
if (stopped || runId !== currentRunId) {
return;
}
if (!options.allowWhenCacheFull && deps.isCacheFull()) {
return;
}
try {
const result = await deps.tokenizeSubtitle(cue.text);
if (result && !stopped && runId === currentRunId) {
deps.preCacheTokenization(cue.text, result);
}
} catch {
// Skip failed cues, continue prefetching
}
// Yield to allow live processing to take priority
await new Promise((resolve) => setTimeout(resolve, 0));
}
}
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
const cues = deps.cues;
// Phase 1: Priority window
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
await tokenizeCueList(priorityCues, runId, { allowWhenCacheFull: true });
if (stopped || runId !== currentRunId) {
return;
}
// Phase 2: Background - remaining cues forward from current position
const priorityTexts = new Set(priorityCues.map((c) => c.text));
const remainingCues = cues.filter(
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(remainingCues, runId);
if (stopped || runId !== currentRunId) {
return;
}
// Phase 3: Background - earlier cues (for rewind support)
const earlierCues = cues.filter(
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(earlierCues, runId);
}
return {
start(currentTimeSeconds: number) {
stopped = false;
paused = false;
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(currentTimeSeconds, runId);
},
stop() {
stopped = true;
currentRunId += 1;
},
onSeek(newTimeSeconds: number) {
// Cancel current run and restart from new position
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(newTimeSeconds, runId);
},
pause() {
paused = true;
},
resume() {
paused = false;
},
};
}

View File

@@ -170,3 +170,87 @@ test('subtitle processing cache invalidation only affects future subtitle events
assert.equal(callsByText.get('same'), 2);
});
test('preCacheTokenization stores entry that is returned on next subtitle change', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('予め', { text: '予め', tokens: [] });
controller.onSubtitleChange('予め');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'should not call tokenize when pre-cached');
assert.deepEqual(emitted, [{ text: '予め', tokens: [] }]);
});
test('preCacheTokenization reuses normalized subtitle text across ASS linebreak variants', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('一行目\\N二行目', { text: '一行目\n二行目', tokens: [] });
controller.onSubtitleChange('一行目\n二行目');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'should not call tokenize when normalized text matches');
assert.deepEqual(emitted, [{ text: '一行目\n二行目', tokens: [] }]);
});
test('consumeCachedSubtitle returns prefetched payload and prevents reprocessing same line', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('猫\\Nです', { text: '猫\nです', tokens: [] });
const immediate = controller.consumeCachedSubtitle('猫\nです');
assert.deepEqual(immediate, { text: '猫\nです', tokens: [] });
controller.onSubtitleChange('猫\nです');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'same cached subtitle should not reprocess after immediate consume');
assert.deepEqual(emitted, []);
});
test('isCacheFull returns false when cache is below limit', () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: null }),
emitSubtitle: () => {},
});
assert.equal(controller.isCacheFull(), false);
});
test('isCacheFull returns true when cache reaches limit', async () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
emitSubtitle: () => {},
});
// Fill cache to the 256 limit
for (let i = 0; i < 256; i += 1) {
controller.preCacheTokenization(`line-${i}`, { text: `line-${i}`, tokens: [] });
}
assert.equal(controller.isCacheFull(), true);
});

View File

@@ -11,6 +11,13 @@ export interface SubtitleProcessingController {
onSubtitleChange: (text: string) => void;
refreshCurrentSubtitle: (textOverride?: string) => void;
invalidateTokenizationCache: () => void;
preCacheTokenization: (text: string, data: SubtitleData) => void;
consumeCachedSubtitle: (text: string) => SubtitleData | null;
isCacheFull: () => boolean;
}
function normalizeSubtitleCacheKey(text: string): string {
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
}
export function createSubtitleProcessingController(
@@ -26,18 +33,19 @@ export function createSubtitleProcessingController(
const now = deps.now ?? (() => Date.now());
const getCachedTokenization = (text: string): SubtitleData | null => {
const cached = tokenizationCache.get(text);
const cacheKey = normalizeSubtitleCacheKey(text);
const cached = tokenizationCache.get(cacheKey);
if (!cached) {
return null;
}
tokenizationCache.delete(text);
tokenizationCache.set(text, cached);
tokenizationCache.delete(cacheKey);
tokenizationCache.set(cacheKey, cached);
return cached;
};
const setCachedTokenization = (text: string, payload: SubtitleData): void => {
tokenizationCache.set(text, payload);
tokenizationCache.set(normalizeSubtitleCacheKey(text), payload);
while (tokenizationCache.size > SUBTITLE_TOKENIZATION_CACHE_LIMIT) {
const firstKey = tokenizationCache.keys().next().value;
if (firstKey !== undefined) {
@@ -130,5 +138,22 @@ export function createSubtitleProcessingController(
invalidateTokenizationCache: () => {
tokenizationCache.clear();
},
preCacheTokenization: (text: string, data: SubtitleData) => {
setCachedTokenization(text, data);
},
consumeCachedSubtitle: (text: string) => {
const cached = getCachedTokenization(text);
if (!cached) {
return null;
}
latestText = text;
lastEmittedText = text;
refreshRequested = false;
return cached;
},
isCacheFull: () => {
return tokenizationCache.size >= SUBTITLE_TOKENIZATION_CACHE_LIMIT;
},
};
}

View File

@@ -108,8 +108,9 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
frequencyRank: 12,
},
],
@@ -122,9 +123,35 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
);
assert.match(
markup,
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア" data-frequency-rank="12">アレクシア<\/span>/,
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア">アレクシア<\/span>/,
);
assert.doesNotMatch(markup, /word-name-match word-known|word-known word-name-match/);
assert.doesNotMatch(markup, /word-name-match word-n-plus-one|word-n-plus-one word-name-match/);
assert.doesNotMatch(markup, /data-frequency-rank="12"|data-jlpt-level="N5"|word-jlpt-n5/);
});
test('serializeSubtitleMarkup keeps filtered tokens hoverable without annotation attrs', () => {
const payload: SubtitleData = {
text: 'は',
tokens: [
{
surface: 'は',
reading: 'は',
headword: 'は',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
},
],
};
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
assert.equal(markup, '<span class="word" data-reading="は" data-headword="は">は</span>');
});
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {

View File

@@ -47,10 +47,15 @@ function escapeHtml(text: string): string {
.replaceAll("'", '&#39;');
}
function hasPrioritizedNameMatch(token: MergedToken): boolean {
return token.isNameMatch === true;
}
function computeFrequencyClass(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): string | null {
if (hasPrioritizedNameMatch(token)) return null;
if (!options.enabled) return null;
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
@@ -70,6 +75,7 @@ function getFrequencyRankLabel(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): string | null {
if (hasPrioritizedNameMatch(token)) return null;
if (!options.enabled) return null;
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
@@ -79,21 +85,25 @@ function getFrequencyRankLabel(
}
function getJlptLevelLabel(token: MergedToken): string | null {
if (hasPrioritizedNameMatch(token)) {
return null;
}
return token.jlptLevel ?? null;
}
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
const classes = ['word'];
if (token.isNPlusOneTarget) {
classes.push('word-n-plus-one');
} else if (token.isNameMatch) {
if (hasPrioritizedNameMatch(token)) {
classes.push('word-name-match');
} else if (token.isNPlusOneTarget) {
classes.push('word-n-plus-one');
} else if (token.isKnown) {
classes.push('word-known');
}
if (token.jlptLevel) {
if (!hasPrioritizedNameMatch(token) && token.jlptLevel) {
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
}
@@ -137,6 +147,8 @@ function serializeSubtitleToken(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): SerializedSubtitleToken {
const prioritizedNameMatch = hasPrioritizedNameMatch(token);
return {
surface: token.surface,
reading: token.reading,
@@ -146,10 +158,10 @@ function serializeSubtitleToken(
partOfSpeech: token.partOfSpeech,
isMerged: token.isMerged,
isKnown: token.isKnown,
isNPlusOneTarget: token.isNPlusOneTarget,
isNPlusOneTarget: prioritizedNameMatch ? false : token.isNPlusOneTarget,
isNameMatch: token.isNameMatch ?? false,
jlptLevel: token.jlptLevel,
frequencyRank: token.frequencyRank,
jlptLevel: prioritizedNameMatch ? undefined : token.jlptLevel,
frequencyRank: prioritizedNameMatch ? undefined : token.frequencyRank,
className: computeWordClass(token, options),
frequencyRankLabel: getFrequencyRankLabel(token, options),
jlptLevelLabel: getJlptLevelLabel(token),

View File

@@ -1,23 +1,72 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { injectTexthookerBootstrapHtml } from './texthooker';
import { injectTexthookerBootstrapHtml, type TexthookerBootstrapSettings } from './texthooker';
test('injectTexthookerBootstrapHtml injects websocket bootstrap before head close', () => {
const html = '<html><head><title>Texthooker</title></head><body></body></html>';
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678');
const settings: TexthookerBootstrapSettings = {
enableKnownWordColoring: true,
enableNPlusOneColoring: true,
enableNameMatchColoring: true,
enableFrequencyColoring: true,
enableJlptColoring: true,
characterDictionaryEnabled: true,
knownWordColor: '#a6da95',
nPlusOneColor: '#c6a0f6',
nameMatchColor: '#f5bde6',
hoverTokenColor: '#f4dbd6',
hoverTokenBackgroundColor: 'rgba(54, 58, 79, 0.84)',
jlptColors: {
N1: '#ed8796',
N2: '#f5a97f',
N3: '#f9e2af',
N4: '#a6e3a1',
N5: '#8aadf4',
},
frequencyDictionary: {
singleColor: '#f5a97f',
bandedColors: ['#ed8796', '#f5a97f', '#f9e2af', '#8bd5ca', '#8aadf4'],
},
};
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678', settings);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-websocketUrl', "ws:\/\/127\.0\.0\.1:6678"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableKnownWordColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableNPlusOneColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableNameMatchColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableFrequencyColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableJlptColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-characterDictionaryEnabled', "1"\)/,
);
assert.match(actual, /--subminer-known-word-color:\s*#a6da95;/);
assert.match(actual, /--subminer-n-plus-one-color:\s*#c6a0f6;/);
assert.match(actual, /--subminer-name-match-color:\s*#f5bde6;/);
assert.match(actual, /--subminer-jlpt-n1-color:\s*#ed8796;/);
assert.match(actual, /--subminer-frequency-band-4-color:\s*#8bd5ca;/);
assert.match(actual, /--sm-token-hover-bg:\s*rgba\(54, 58, 79, 0\.84\);/);
assert.doesNotMatch(actual, /p \.word\.word-known\s*\{/);
assert.ok(actual.indexOf('</script></head>') !== -1);
assert.ok(actual.includes('bannou-texthooker-websocketUrl'));
assert.ok(!actual.includes('bannou-texthooker-enableKnownWordColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableNPlusOneColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableNameMatchColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableFrequencyColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableJlptColoring'));
});
test('injectTexthookerBootstrapHtml leaves html unchanged without websocketUrl', () => {

View File

@@ -5,23 +5,92 @@ import { createLogger } from '../../logger';
const logger = createLogger('main:texthooker');
export function injectTexthookerBootstrapHtml(html: string, websocketUrl?: string): string {
if (!websocketUrl) {
export type TexthookerBootstrapSettings = {
enableKnownWordColoring: boolean;
enableNPlusOneColoring: boolean;
enableNameMatchColoring: boolean;
enableFrequencyColoring: boolean;
enableJlptColoring: boolean;
characterDictionaryEnabled: boolean;
knownWordColor: string;
nPlusOneColor: string;
nameMatchColor: string;
hoverTokenColor: string;
hoverTokenBackgroundColor: string;
jlptColors: {
N1: string;
N2: string;
N3: string;
N4: string;
N5: string;
};
frequencyDictionary: {
singleColor: string;
bandedColors: readonly [string, string, string, string, string];
};
};
function buildTexthookerBootstrapScript(
websocketUrl?: string,
settings?: TexthookerBootstrapSettings,
): string {
const statements: string[] = [];
if (websocketUrl) {
statements.push(
`window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(websocketUrl)});`,
);
}
if (settings) {
const booleanStorageValue = (enabled: boolean): '"1"' | '"0"' => (enabled ? '"1"' : '"0"');
statements.push(
`window.localStorage.setItem('bannou-texthooker-enableKnownWordColoring', ${booleanStorageValue(settings.enableKnownWordColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableNPlusOneColoring', ${booleanStorageValue(settings.enableNPlusOneColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableNameMatchColoring', ${booleanStorageValue(settings.enableNameMatchColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableFrequencyColoring', ${booleanStorageValue(settings.enableFrequencyColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableJlptColoring', ${booleanStorageValue(settings.enableJlptColoring)});`,
`window.localStorage.setItem('bannou-texthooker-characterDictionaryEnabled', ${booleanStorageValue(settings.characterDictionaryEnabled)});`,
);
}
return statements.length > 0 ? `<script>${statements.join('')}</script>` : '';
}
function buildTexthookerBootstrapStyle(settings?: TexthookerBootstrapSettings): string {
if (!settings) {
return '';
}
const [band1, band2, band3, band4, band5] = settings.frequencyDictionary.bandedColors;
return `<style id="subminer-texthooker-bootstrap-style">:root{--subminer-known-word-color:${settings.knownWordColor};--subminer-n-plus-one-color:${settings.nPlusOneColor};--subminer-name-match-color:${settings.nameMatchColor};--subminer-jlpt-n1-color:${settings.jlptColors.N1};--subminer-jlpt-n2-color:${settings.jlptColors.N2};--subminer-jlpt-n3-color:${settings.jlptColors.N3};--subminer-jlpt-n4-color:${settings.jlptColors.N4};--subminer-jlpt-n5-color:${settings.jlptColors.N5};--subminer-frequency-single-color:${settings.frequencyDictionary.singleColor};--subminer-frequency-band-1-color:${band1};--subminer-frequency-band-2-color:${band2};--subminer-frequency-band-3-color:${band3};--subminer-frequency-band-4-color:${band4};--subminer-frequency-band-5-color:${band5};--sm-token-hover-bg:${settings.hoverTokenBackgroundColor};--sm-token-hover-text:${settings.hoverTokenColor};}</style>`;
}
export function injectTexthookerBootstrapHtml(
html: string,
websocketUrl?: string,
settings?: TexthookerBootstrapSettings,
): string {
const bootstrapStyle = buildTexthookerBootstrapStyle(settings);
const bootstrapScript = buildTexthookerBootstrapScript(websocketUrl, settings);
if (!bootstrapStyle && !bootstrapScript) {
return html;
}
const bootstrapScript = `<script>window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(
websocketUrl,
)});</script>`;
if (html.includes('</head>')) {
return html.replace('</head>', `${bootstrapScript}</head>`);
return html.replace('</head>', `${bootstrapStyle}${bootstrapScript}</head>`);
}
return `${bootstrapScript}${html}`;
return `${bootstrapStyle}${bootstrapScript}${html}`;
}
export class Texthooker {
constructor(
private readonly getBootstrapSettings?: () => TexthookerBootstrapSettings | undefined,
) {}
private server: http.Server | null = null;
public isRunning(): boolean {
@@ -62,9 +131,16 @@ export class Texthooker {
res.end('Not found');
return;
}
const bootstrapSettings = this.getBootstrapSettings?.();
const responseData =
urlPath === '/' || urlPath === '/index.html'
? Buffer.from(injectTexthookerBootstrapHtml(data.toString('utf-8'), websocketUrl))
? Buffer.from(
injectTexthookerBootstrapHtml(
data.toString('utf-8'),
websocketUrl,
bootstrapSettings,
),
)
: data;
res.writeHead(200, { 'Content-Type': mimeTypes[ext] || 'text/plain' });
res.end(responseData);

File diff suppressed because it is too large Load Diff

View File

@@ -23,6 +23,7 @@ import {
requestYomitanScanTokens,
requestYomitanTermFrequencies,
} from './tokenizer/yomitan-parser-runtime';
import type { YomitanTermFrequency } from './tokenizer/yomitan-parser-runtime';
const logger = createLogger('main:tokenizer');
@@ -177,6 +178,19 @@ async function applyAnnotationStage(
);
}
async function stripSubtitleAnnotationMetadata(tokens: MergedToken[]): Promise<MergedToken[]> {
if (tokens.length === 0) {
return tokens;
}
if (!annotationStageModulePromise) {
annotationStageModulePromise = import('./tokenizer/annotation-stage');
}
const annotationStage = await annotationStageModulePromise;
return tokens.map((token) => annotationStage.stripSubtitleAnnotationMetadata(token));
}
export function createTokenizerDepsRuntime(
options: TokenizerDepsRuntimeOptions,
): TokenizerServiceDeps {
@@ -225,7 +239,13 @@ export function createTokenizerDepsRuntime(
return null;
}
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode(), false);
return mergeTokens(
rawTokens,
options.isKnownWord,
options.getKnownWordMatchMode(),
false,
text,
);
},
enrichTokensWithMecab: async (tokens, mecabTokens) =>
enrichTokensWithMecabAsync(tokens, mecabTokens),
@@ -336,56 +356,162 @@ function resolveFrequencyLookupText(
return token.surface;
}
function resolveYomitanFrequencyLookupTexts(
token: MergedToken,
matchMode: FrequencyDictionaryMatchMode,
): string[] {
const primaryLookupText = resolveFrequencyLookupText(token, matchMode).trim();
if (!primaryLookupText) {
return [];
}
if (matchMode !== 'headword') {
return [primaryLookupText];
}
const normalizedHeadword = token.headword.trim();
const normalizedSurface = token.surface.trim();
if (
!normalizedHeadword ||
!normalizedSurface ||
normalizedSurface === normalizedHeadword ||
normalizedSurface === primaryLookupText
) {
return [primaryLookupText];
}
return [primaryLookupText, normalizedSurface];
}
function buildYomitanFrequencyTermReadingList(
tokens: MergedToken[],
matchMode: FrequencyDictionaryMatchMode,
): Array<{ term: string; reading: string | null }> {
const termReadingList: Array<{ term: string; reading: string | null }> = [];
for (const token of tokens) {
const term = resolveFrequencyLookupText(token, matchMode).trim();
if (!term) {
continue;
}
const readingRaw =
token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null;
termReadingList.push({ term, reading: readingRaw });
for (const term of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
termReadingList.push({ term, reading: readingRaw });
}
}
return termReadingList;
}
function buildYomitanFrequencyRankMap(
frequencies: ReadonlyArray<{ term: string; frequency: number; dictionaryPriority?: number }>,
): Map<string, number> {
const rankByTerm = new Map<string, { rank: number; dictionaryPriority: number }>();
function makeYomitanFrequencyPairKey(term: string, reading: string | null): string {
return `${term}\u0000${reading ?? ''}`;
}
interface NormalizedYomitanTermFrequency extends YomitanTermFrequency {
reading: string | null;
frequency: number;
}
interface YomitanFrequencyIndex {
byPair: Map<string, NormalizedYomitanTermFrequency[]>;
byTerm: Map<string, NormalizedYomitanTermFrequency[]>;
}
function appendYomitanFrequencyEntry(
map: Map<string, NormalizedYomitanTermFrequency[]>,
key: string,
entry: NormalizedYomitanTermFrequency,
): void {
const existing = map.get(key);
if (existing) {
existing.push(entry);
return;
}
map.set(key, [entry]);
}
function buildYomitanFrequencyIndex(
frequencies: ReadonlyArray<YomitanTermFrequency>,
): YomitanFrequencyIndex {
const byPair = new Map<string, NormalizedYomitanTermFrequency[]>();
const byTerm = new Map<string, NormalizedYomitanTermFrequency[]>();
for (const frequency of frequencies) {
const normalizedTerm = frequency.term.trim();
const term = frequency.term.trim();
const rank = normalizePositiveFrequencyRank(frequency.frequency);
if (!normalizedTerm || rank === null) {
if (!term || rank === null) {
continue;
}
const dictionaryPriority =
typeof frequency.dictionaryPriority === 'number' &&
Number.isFinite(frequency.dictionaryPriority)
? Math.max(0, Math.floor(frequency.dictionaryPriority))
: Number.MAX_SAFE_INTEGER;
const current = rankByTerm.get(normalizedTerm);
const reading =
typeof frequency.reading === 'string' && frequency.reading.trim().length > 0
? frequency.reading.trim()
: null;
const normalizedEntry: NormalizedYomitanTermFrequency = {
...frequency,
term,
reading,
frequency: rank,
};
appendYomitanFrequencyEntry(
byPair,
makeYomitanFrequencyPairKey(term, reading),
normalizedEntry,
);
appendYomitanFrequencyEntry(byTerm, term, normalizedEntry);
}
return { byPair, byTerm };
}
function selectBestYomitanFrequencyRank(
entries: ReadonlyArray<NormalizedYomitanTermFrequency>,
): number | null {
let bestEntry: NormalizedYomitanTermFrequency | null = null;
for (const entry of entries) {
if (
current === undefined ||
dictionaryPriority < current.dictionaryPriority ||
(dictionaryPriority === current.dictionaryPriority && rank < current.rank)
bestEntry === null ||
entry.dictionaryPriority < bestEntry.dictionaryPriority ||
(entry.dictionaryPriority === bestEntry.dictionaryPriority &&
entry.frequency < bestEntry.frequency)
) {
rankByTerm.set(normalizedTerm, { rank, dictionaryPriority });
bestEntry = entry;
}
}
const collapsedRankByTerm = new Map<string, number>();
for (const [term, entry] of rankByTerm.entries()) {
collapsedRankByTerm.set(term, entry.rank);
return bestEntry?.frequency ?? null;
}
function getYomitanFrequencyRank(
token: MergedToken,
candidateText: string,
matchMode: FrequencyDictionaryMatchMode,
frequencyIndex: YomitanFrequencyIndex,
): number | null {
const normalizedCandidateText = candidateText.trim();
if (!normalizedCandidateText) {
return null;
}
return collapsedRankByTerm;
const reading =
typeof token.reading === 'string' && token.reading.trim().length > 0
? token.reading.trim()
: null;
const pairEntries =
frequencyIndex.byPair.get(makeYomitanFrequencyPairKey(normalizedCandidateText, reading)) ?? [];
const candidateEntries =
pairEntries.length > 0
? pairEntries
: (frequencyIndex.byTerm.get(normalizedCandidateText) ?? []);
if (candidateEntries.length === 0) {
return null;
}
const normalizedHeadword = token.headword.trim();
const normalizedSurface = token.surface.trim();
const isInflectedHeadwordFallback =
matchMode === 'headword' &&
normalizedCandidateText === normalizedHeadword &&
normalizedSurface.length > 0 &&
normalizedSurface !== normalizedHeadword;
return selectBestYomitanFrequencyRank(candidateEntries);
}
function getLocalFrequencyRank(
@@ -416,7 +542,7 @@ function getLocalFrequencyRank(
function applyFrequencyRanks(
tokens: MergedToken[],
matchMode: FrequencyDictionaryMatchMode,
yomitanRankByTerm: Map<string, number>,
yomitanFrequencyIndex: YomitanFrequencyIndex,
getFrequencyRank: FrequencyDictionaryLookup | undefined,
): MergedToken[] {
if (tokens.length === 0) {
@@ -441,12 +567,19 @@ function applyFrequencyRanks(
};
}
const yomitanRank = yomitanRankByTerm.get(lookupText);
if (yomitanRank !== undefined) {
return {
...token,
frequencyRank: yomitanRank,
};
for (const candidateText of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
const yomitanRank = getYomitanFrequencyRank(
token,
candidateText,
matchMode,
yomitanFrequencyIndex,
);
if (yomitanRank !== null) {
return {
...token,
frequencyRank: yomitanRank,
};
}
}
if (!getFrequencyRank) {
@@ -501,6 +634,7 @@ async function parseWithYomitanInternalParser(
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: token.isNameMatch ?? false,
frequencyRank: token.frequencyRank,
}),
),
);
@@ -510,7 +644,7 @@ async function parseWithYomitanInternalParser(
}
deps.onTokenizationReady?.(text);
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
const frequencyRankPromise: Promise<YomitanFrequencyIndex> = options.frequencyEnabled
? (async () => {
const frequencyMatchMode = options.frequencyMatchMode;
const termReadingList = buildYomitanFrequencyTermReadingList(
@@ -522,9 +656,9 @@ async function parseWithYomitanInternalParser(
deps,
logger,
);
return buildYomitanFrequencyRankMap(yomitanFrequencies);
return buildYomitanFrequencyIndex(yomitanFrequencies);
})()
: Promise.resolve(new Map<string, number>());
: Promise.resolve({ byPair: new Map(), byTerm: new Map() });
const mecabEnrichmentPromise: Promise<MergedToken[]> = needsMecabPosEnrichment(options)
? (async () => {
@@ -545,7 +679,7 @@ async function parseWithYomitanInternalParser(
})()
: Promise.resolve(normalizedSelectedTokens);
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
const [yomitanFrequencyIndex, enrichedTokens] = await Promise.all([
frequencyRankPromise,
mecabEnrichmentPromise,
]);
@@ -554,7 +688,7 @@ async function parseWithYomitanInternalParser(
return applyFrequencyRanks(
enrichedTokens,
options.frequencyMatchMode,
yomitanRankByTerm,
yomitanFrequencyIndex,
deps.getFrequencyRank,
);
}
@@ -585,9 +719,12 @@ export async function tokenizeSubtitle(
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
if (yomitanTokens && yomitanTokens.length > 0) {
const annotatedTokens = await stripSubtitleAnnotationMetadata(
await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
);
return {
text: displayText,
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
};
}

View File

@@ -1,7 +1,12 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { MergedToken, PartOfSpeech } from '../../../types';
import { annotateTokens, AnnotationStageDeps } from './annotation-stage';
import {
annotateTokens,
AnnotationStageDeps,
shouldExcludeTokenFromSubtitleAnnotations,
stripSubtitleAnnotationMetadata,
} from './annotation-stage';
function makeToken(overrides: Partial<MergedToken> = {}): MergedToken {
return {
@@ -50,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
assert.equal(surfaceResult[0]?.isKnown, false);
});
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
const tokens = [
makeToken({
surface: '大体',
headword: '大体',
reading: 'だいたい',
frequencyRank: 1895,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'だいたい',
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
}),
);
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.jlptLevel, 'N4');
assert.equal(result[0]?.frequencyRank, 1895);
});
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
const tokens = [
makeToken({
@@ -150,6 +178,278 @@ test('annotateTokens handles JLPT disabled and eligibility exclusion paths', ()
assert.equal(excludedLookupCalls, 0);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory ending variants', () => {
const tokens = [
makeToken({
surface: 'んです',
headword: 'ん',
reading: 'ンデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のだ',
headword: 'の',
reading: 'ノダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'んだ',
headword: 'ん',
reading: 'ンダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のです',
headword: 'の',
reading: 'ノデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'なんです',
headword: 'だ',
reading: 'ナンデス',
pos1: '助動詞|名詞|助動詞',
pos2: '|非自立',
}),
makeToken({
surface: 'んでした',
headword: 'ん',
reading: 'ンデシタ',
pos1: '助動詞|助動詞|助動詞',
}),
makeToken({
surface: 'のでは',
headword: 'の',
reading: 'ノデハ',
pos1: '助詞|接続詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory pondering endings', () => {
const token = makeToken({
surface: 'のかな',
headword: 'の',
reading: 'ノカナ',
pos1: '名詞|助動詞',
pos2: '非自立',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes auxiliary-stem そうだ grammar tails', () => {
const token = makeToken({
surface: 'そうだ',
headword: 'そうだ',
reading: 'ソウダ',
pos1: '名詞|助動詞',
pos2: '特殊',
pos3: '助動詞語幹',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside explanatory ending family', () => {
const token = makeToken({
surface: '問題',
headword: '問題',
reading: 'モンダイ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
pos2: '一般',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
const tokens = [
makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
}),
makeToken({
surface: 'です',
headword: 'です',
reading: 'デス',
partOfSpeech: PartOfSpeech.bound_auxiliary,
pos1: '助動詞',
}),
makeToken({
surface: 'この',
headword: 'この',
reading: 'コノ',
partOfSpeech: PartOfSpeech.other,
pos1: '連体詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps mixed content tokens with trailing helpers', () => {
const token = makeToken({
surface: '行きます',
headword: '行く',
reading: 'イキマス',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞|助動詞',
pos2: '自立',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes merged lexical tokens with trailing quote particles', () => {
const token = makeToken({
surface: 'どうしてもって',
headword: 'どうしても',
reading: 'ドウシテモッテ',
partOfSpeech: PartOfSpeech.other,
pos1: '副詞|助詞',
pos2: '一般|格助詞',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes kana-only demonstrative helper merges', () => {
const token = makeToken({
surface: 'これで',
headword: 'これ',
reading: 'コレデ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞|助詞',
pos2: '代名詞|格助詞',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
const token = makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isKnown: true,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
frequencyRank: 12,
});
assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
});
});
test('stripSubtitleAnnotationMetadata leaves content tokens unchanged', () => {
const token = makeToken({
surface: '猫',
headword: '猫',
reading: 'ネコ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isKnown: true,
jlptLevel: 'N5',
frequencyRank: 42,
});
assert.strictEqual(stripSubtitleAnnotationMetadata(token), token);
});
test('annotateTokens prioritizes name matches over n+1, frequency, and JLPT when enabled', () => {
let jlptLookupCalls = 0;
const tokens = [
makeToken({
surface: 'オリヴィア',
reading: 'オリヴィア',
headword: 'オリヴィア',
isNameMatch: true,
frequencyRank: 42,
startPos: 0,
endPos: 5,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
getJlptLevel: () => {
jlptLookupCalls += 1;
return 'N2';
},
}),
{
nameMatchEnabled: true,
minSentenceWordsForNPlusOne: 1,
},
);
assert.equal(result[0]?.isNameMatch, true);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
assert.equal(jlptLookupCalls, 0);
});
test('annotateTokens keeps other annotations for name matches when name highlighting is disabled', () => {
let jlptLookupCalls = 0;
const tokens = [
makeToken({
surface: 'オリヴィア',
reading: 'オリヴィア',
headword: 'オリヴィア',
isNameMatch: true,
frequencyRank: 42,
startPos: 0,
endPos: 5,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
getJlptLevel: () => {
jlptLookupCalls += 1;
return 'N2';
},
}),
{
nameMatchEnabled: false,
minSentenceWordsForNPlusOne: 1,
},
);
assert.equal(result[0]?.isNameMatch, true);
assert.equal(result[0]?.isNPlusOneTarget, true);
assert.equal(result[0]?.frequencyRank, 42);
assert.equal(result[0]?.jlptLevel, 'N2');
assert.equal(jlptLookupCalls, 1);
});
test('annotateTokens N+1 handoff marks expected target when threshold is satisfied', () => {
const tokens = [
makeToken({ surface: '私', headword: '私', startPos: 0, endPos: 1 }),
@@ -206,8 +506,8 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
);
assert.equal(result[0]?.isKnown, false);
assert.equal(result[1]?.isKnown, true);
assert.equal(result[2]?.isKnown, true);
assert.equal(result[1]?.isKnown, false);
assert.equal(result[2]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
});
@@ -293,6 +593,32 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+
assert.equal(result[0]?.isNPlusOneTarget, false);
});
test('annotateTokens clears all annotations for non-independent kanji noun tokens under unified gate', () => {
const tokens = [
makeToken({
surface: '者',
reading: 'もの',
headword: '者',
partOfSpeech: PartOfSpeech.other,
pos1: '名詞',
pos2: '非自立',
pos3: '一般',
startPos: 0,
endPos: 1,
frequencyRank: 475,
}),
];
const result = annotateTokens(tokens, makeDeps(), {
minSentenceWordsForNPlusOne: 1,
});
assert.equal(result[0]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
});
test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => {
const tokens = [
makeToken({
@@ -444,3 +770,33 @@ test('annotateTokens excludes composite tokens when all component pos tags are e
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.isNPlusOneTarget, false);
});
test('annotateTokens applies one shared exclusion gate across known N+1 frequency and JLPT', () => {
const tokens = [
makeToken({
surface: 'これで',
headword: 'これ',
reading: 'コレデ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞|助詞',
pos2: '代名詞|格助詞',
startPos: 0,
endPos: 3,
frequencyRank: 9,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'これ',
getJlptLevel: (text) => (text === 'これ' ? 'N5' : null),
}),
{ minSentenceWordsForNPlusOne: 1 },
);
assert.equal(result[0]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
});

View File

@@ -9,11 +9,65 @@ import {
} from '../../../token-pos2-exclusions';
import { JlptLevel, MergedToken, NPlusOneMatchMode, PartOfSpeech } from '../../../types';
import { shouldIgnoreJlptByTerm, shouldIgnoreJlptForMecabPos1 } from '../jlpt-token-filter';
import {
shouldExcludeTokenFromSubtitleAnnotations as sharedShouldExcludeTokenFromSubtitleAnnotations,
stripSubtitleAnnotationMetadata as sharedStripSubtitleAnnotationMetadata,
} from './subtitle-annotation-filter';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'ああ',
'ええ',
'うう',
'おお',
'はあ',
'はは',
'へえ',
'ふう',
'ほう',
]);
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
'だ',
'です',
'でした',
'だった',
'では',
'じゃ',
'でしょう',
'だろう',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
'',
'か',
'ね',
'よ',
'な',
'よね',
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
(particle) => `${prefix}${core}${particle}`,
),
),
),
);
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
'って',
'ってよ',
'ってね',
'ってな',
'ってさ',
'ってか',
'ってば',
]);
const jlptLevelLookupCaches = new WeakMap<
(text: string) => JlptLevel | null,
@@ -28,6 +82,7 @@ export interface AnnotationStageDeps {
export interface AnnotationStageOptions {
nPlusOneEnabled?: boolean;
nameMatchEnabled?: boolean;
jlptEnabled?: boolean;
frequencyEnabled?: boolean;
minSentenceWordsForNPlusOne?: number;
@@ -43,33 +98,27 @@ function resolveKnownWordText(
return matchMode === 'surface' ? surface : headword;
}
function applyKnownWordMarking(
tokens: MergedToken[],
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): MergedToken[] {
return tokens.map((token) => {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return {
...token,
isKnown: token.isKnown || (matchText ? isKnownWord(matchText) : false),
};
});
}
function normalizePos1Tag(pos1: string | undefined): string {
return typeof pos1 === 'string' ? pos1.trim() : '';
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const SUBTITLE_ANNOTATION_EXCLUDED_POS1 = new Set(['感動詞']);
const SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1 = new Set(['助詞', '助動詞', '連体詞']);
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
return false;
return [];
}
const parts = normalizedTag
return normalizedTag
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const parts = splitNormalizedTagParts(normalizedTag);
if (parts.length === 0) {
return false;
}
@@ -78,6 +127,50 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
return parts.some((part) => exclusions.has(part));
}
function isExcludedFromSubtitleAnnotationsByPos1(normalizedPos1: string): boolean {
const parts = splitNormalizedTagParts(normalizedPos1);
if (parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part))) {
return true;
}
return parts.length > 0 && parts.every((part) => SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(part));
}
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
const normalizedSurface = normalizeJlptTextForExclusion(token.surface);
const normalizedHeadword = normalizeJlptTextForExclusion(token.headword);
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
return false;
}
const suffix = normalizedSurface.slice(normalizedHeadword.length);
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
if (pos1Parts.length < 2) {
return false;
}
const [leadingPos1, ...trailingPos1] = pos1Parts;
if (!leadingPos1 || SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(leadingPos1)) {
return false;
}
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
}
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
return false;
}
const pos3Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos3));
return pos3Parts.includes('助動詞語幹');
}
function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
if (options.pos1Exclusions) {
return options.pos1Exclusions;
@@ -98,6 +191,61 @@ function normalizePos2Tag(pos2: string | undefined): string {
return typeof pos2 === 'string' ? pos2.trim() : '';
}
function hasKanjiChar(text: string): boolean {
for (const char of text) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (
(code >= 0x3400 && code <= 0x4dbf) ||
(code >= 0x4e00 && code <= 0x9fff) ||
(code >= 0xf900 && code <= 0xfaff)
) {
return true;
}
}
return false;
}
function isExcludedComponent(
pos1: string | undefined,
pos2: string | undefined,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
return (
(typeof pos1 === 'string' && pos1Exclusions.has(pos1)) ||
(typeof pos2 === 'string' && pos2Exclusions.has(pos2))
);
}
function shouldAllowContentLedMergedTokenFrequency(
normalizedPos1: string,
normalizedPos2: string,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
const pos1Parts = splitNormalizedTagParts(normalizedPos1);
if (pos1Parts.length < 2) {
return false;
}
const pos2Parts = splitNormalizedTagParts(normalizedPos2);
if (isExcludedComponent(pos1Parts[0], pos2Parts[0], pos1Exclusions, pos2Exclusions)) {
return false;
}
const componentCount = Math.max(pos1Parts.length, pos2Parts.length);
for (let index = 1; index < componentCount; index += 1) {
if (!isExcludedComponent(pos1Parts[index], pos2Parts[index], pos1Exclusions, pos2Exclusions)) {
return false;
}
}
return true;
}
function isFrequencyExcludedByPos(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
@@ -109,13 +257,20 @@ function isFrequencyExcludedByPos(
const normalizedPos1 = normalizePos1Tag(token.pos1);
const hasPos1 = normalizedPos1.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
const allowContentLedMergedToken = shouldAllowContentLedMergedTokenFrequency(
normalizedPos1,
normalizedPos2,
pos1Exclusions,
pos2Exclusions,
);
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions) && !allowContentLedMergedToken) {
return true;
}
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions) && !allowContentLedMergedToken) {
return true;
}
@@ -133,26 +288,43 @@ function isFrequencyExcludedByPos(
);
}
function applyFrequencyMarking(
tokens: MergedToken[],
function shouldKeepFrequencyForNonIndependentKanjiNoun(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): MergedToken[] {
return tokens.map((token) => {
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
return { ...token, frequencyRank: undefined };
}
): boolean {
if (pos1Exclusions.has('名詞')) {
return false;
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
const rank = Math.max(1, Math.floor(token.frequencyRank));
return { ...token, frequencyRank: rank };
}
const rank =
typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)
? Math.max(1, Math.floor(token.frequencyRank))
: null;
if (rank === null) {
return false;
}
return {
...token,
frequencyRank: undefined,
};
});
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
const pos2Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos2));
if (pos1Parts.length !== 1 || pos2Parts.length !== 1) {
return false;
}
if (pos1Parts[0] !== '名詞' || pos2Parts[0] !== '非自立') {
return false;
}
return hasKanjiChar(token.surface) || hasKanjiChar(token.headword);
}
export function shouldExcludeTokenFromVocabularyPersistence(
token: MergedToken,
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
): boolean {
return isFrequencyExcludedByPos(
token,
resolvePos1Exclusions(options),
resolvePos2Exclusions(options),
);
}
function getCachedJlptLevel(
@@ -312,6 +484,23 @@ function isReduplicatedKanaSfx(text: string): boolean {
return chars.slice(0, half).join('') === chars.slice(half).join('');
}
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
const normalized = normalizeJlptTextForExclusion(text);
if (!normalized) {
return false;
}
if (isReduplicatedKanaSfx(normalized)) {
return true;
}
if (normalized.length <= 1 || !normalized.endsWith('と')) {
return false;
}
return isReduplicatedKanaSfx(normalized.slice(0, -1));
}
function hasAdjacentKanaRepeat(text: string): boolean {
const normalized = normalizeJlptTextForExclusion(text);
if (!normalized) {
@@ -386,12 +575,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return false;
}
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.reading,
token.headword,
].filter(
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -414,24 +598,110 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return true;
}
function applyJlptMarking(
tokens: MergedToken[],
getJlptLevel: (text: string) => JlptLevel | null,
): MergedToken[] {
return tokens.map((token) => {
if (!isJlptEligibleToken(token)) {
return { ...token, jlptLevel: undefined };
function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
const candidates = [token.surface, token.reading, resolveJlptLookupText(token)].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
for (const candidate of candidates) {
const trimmedCandidate = candidate.trim();
if (!trimmedCandidate) {
continue;
}
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
const normalizedCandidate = normalizeJlptTextForExclusion(trimmedCandidate);
if (!normalizedCandidate) {
continue;
}
return {
...token,
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
};
});
if (
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalizedCandidate)
) {
return true;
}
if (
isTrailingSmallTsuKanaSfx(trimmedCandidate) ||
isTrailingSmallTsuKanaSfx(normalizedCandidate) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmedCandidate) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalizedCandidate)
) {
return true;
}
}
return false;
}
export function shouldExcludeTokenFromSubtitleAnnotations(token: MergedToken): boolean {
return sharedShouldExcludeTokenFromSubtitleAnnotations(token);
}
export function stripSubtitleAnnotationMetadata(token: MergedToken): MergedToken {
return sharedStripSubtitleAnnotationMetadata(token);
}
function computeTokenKnownStatus(
token: MergedToken,
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
return true;
}
const normalizedReading = token.reading.trim();
if (!normalizedReading) {
return false;
}
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
}
function filterTokenFrequencyRank(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): number | undefined {
if (
isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions) &&
!shouldKeepFrequencyForNonIndependentKanjiNoun(token, pos1Exclusions)
) {
return undefined;
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
return Math.max(1, Math.floor(token.frequencyRank));
}
return undefined;
}
function computeTokenJlptLevel(
token: MergedToken,
getJlptLevel: (text: string) => JlptLevel | null,
): JlptLevel | undefined {
if (!isJlptEligibleToken(token)) {
return undefined;
}
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
const level = primaryLevel ?? fallbackLevel ?? token.jlptLevel;
return level ?? undefined;
}
function hasPrioritizedNameMatch(
token: MergedToken,
options: Pick<AnnotationStageOptions, 'nameMatchEnabled'>,
): boolean {
return options.nameMatchEnabled !== false && token.isNameMatch === true;
}
export function annotateTokens(
@@ -442,36 +712,50 @@ export function annotateTokens(
const pos1Exclusions = resolvePos1Exclusions(options);
const pos2Exclusions = resolvePos2Exclusions(options);
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
const knownMarkedTokens = nPlusOneEnabled
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
: tokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
const nameMatchEnabled = options.nameMatchEnabled !== false;
const frequencyEnabled = options.frequencyEnabled !== false;
const frequencyMarkedTokens = frequencyEnabled
? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
: knownMarkedTokens.map((token) => ({
...token,
frequencyRank: undefined,
}));
const jlptEnabled = options.jlptEnabled !== false;
const jlptMarkedTokens = jlptEnabled
? applyJlptMarking(frequencyMarkedTokens, deps.getJlptLevel)
: frequencyMarkedTokens.map((token) => ({
...token,
jlptLevel: undefined,
}));
// Single pass: compute known word status, frequency filtering, and JLPT level together
const annotated = tokens.map((token) => {
if (
sharedShouldExcludeTokenFromSubtitleAnnotations(token, {
pos1Exclusions,
pos2Exclusions,
})
) {
return sharedStripSubtitleAnnotationMetadata(token, {
pos1Exclusions,
pos2Exclusions,
});
}
const prioritizedNameMatch = nameMatchEnabled && token.isNameMatch === true;
const isKnown = nPlusOneEnabled
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
: false;
const frequencyRank =
frequencyEnabled && !prioritizedNameMatch
? filterTokenFrequencyRank(token, pos1Exclusions, pos2Exclusions)
: undefined;
const jlptLevel =
jlptEnabled && !prioritizedNameMatch
? computeTokenJlptLevel(token, deps.getJlptLevel)
: undefined;
return {
...token,
isKnown,
isNPlusOneTarget: nPlusOneEnabled && !prioritizedNameMatch ? token.isNPlusOneTarget : false,
frequencyRank,
jlptLevel,
};
});
if (!nPlusOneEnabled) {
return jlptMarkedTokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
return annotated;
}
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
@@ -482,10 +766,25 @@ export function annotateTokens(
? minSentenceWordsForNPlusOne
: 3;
return markNPlusOneTargets(
jlptMarkedTokens,
const nPlusOneMarked = markNPlusOneTargets(
annotated,
sanitizedMinSentenceWordsForNPlusOne,
pos1Exclusions,
pos2Exclusions,
);
if (!nameMatchEnabled) {
return nPlusOneMarked;
}
return nPlusOneMarked.map((token) =>
hasPrioritizedNameMatch(token, options)
? {
...token,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
}
: token,
);
}

View File

@@ -212,3 +212,57 @@ test('merges trailing katakana continuation without headword into previous token
],
);
});
// Regression: merged content+function token candidate must not beat a multi-token split
// candidate that preserves the content token as a standalone frequency-eligible unit.
// Background: Yomitan scanning can produce a single-token candidate where a content word
// is merged with trailing function particles (e.g. かかってこいよ → headword かかってくる).
// When a competing multi-token candidate splits content and function separately, the
// multi-token candidate should win so the content token remains frequency-highlightable.
test('multi-token candidate beats single merged content+function token candidate (frequency regression)', () => {
// Candidate A: single merged token — content verb fused with trailing sentence-final particle
// This is the "bad" candidate: downstream annotation would exclude frequency for the whole
// token because the merged pos1 would contain a function-word component.
const mergedCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
]);
// Candidate B: two tokens — content verb surface + particle separately.
// The content token is frequency-eligible on its own.
const splitCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
]);
// When merged candidate comes first in the array, multi-token split still wins.
const tokens = selectYomitanParseTokens(
[mergedCandidate, splitCandidate],
() => false,
'headword',
);
assert.equal(tokens?.length, 2);
assert.equal(tokens?.[0]?.surface, 'かかってこい');
assert.equal(tokens?.[0]?.headword, 'かかってくる');
assert.equal(tokens?.[1]?.surface, 'よ');
});
test('multi-token candidate beats single merged content+function token regardless of input order', () => {
const mergedCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
]);
const splitCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
]);
// Split candidate comes first — should still win over merged.
const tokens = selectYomitanParseTokens(
[splitCandidate, mergedCandidate],
() => false,
'headword',
);
assert.equal(tokens?.length, 2);
assert.equal(tokens?.[0]?.surface, 'かかってこい');
assert.equal(tokens?.[1]?.surface, 'よ');
});

View File

@@ -0,0 +1,56 @@
import { PartOfSpeech } from '../../../types';
function normalizePosTag(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
}
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
switch (normalizePosTag(pos1)) {
case '名詞':
return PartOfSpeech.noun;
case '動詞':
return PartOfSpeech.verb;
case '形容詞':
return PartOfSpeech.i_adjective;
case '形状詞':
case '形容動詞':
return PartOfSpeech.na_adjective;
case '助詞':
return PartOfSpeech.particle;
case '助動詞':
return PartOfSpeech.bound_auxiliary;
case '記号':
case '補助記号':
return PartOfSpeech.symbol;
default:
return PartOfSpeech.other;
}
}
export function deriveStoredPartOfSpeech(input: {
partOfSpeech?: string | null;
pos1?: string | null;
}): PartOfSpeech {
const pos1Parts = normalizePosTag(input.pos1)
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
if (pos1Parts.length > 0) {
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
if (derivedParts.length === 1) {
return derivedParts[0]!;
}
return PartOfSpeech.other;
}
if (isPartOfSpeechValue(input.partOfSpeech)) {
return input.partOfSpeech;
}
return PartOfSpeech.other;
}

View File

@@ -0,0 +1,352 @@
import {
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
resolveAnnotationPos1ExclusionSet,
} from '../../../token-pos1-exclusions';
import {
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
resolveAnnotationPos2ExclusionSet,
} from '../../../token-pos2-exclusions';
import { MergedToken, PartOfSpeech } from '../../../types';
import { shouldIgnoreJlptByTerm } from '../jlpt-token-filter';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'ああ',
'ええ',
'うう',
'おお',
'はあ',
'はは',
'へえ',
'ふう',
'ほう',
]);
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
'だ',
'です',
'でした',
'だった',
'では',
'じゃ',
'でしょう',
'だろう',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
'',
'か',
'ね',
'よ',
'な',
'よね',
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES = ['か', 'かな', 'かね'] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
(particle) => `${prefix}${core}${particle}`,
),
),
),
);
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
'って',
'ってよ',
'ってね',
'ってな',
'ってさ',
'ってか',
'ってば',
]);
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
export interface SubtitleAnnotationFilterOptions {
pos1Exclusions?: ReadonlySet<string>;
pos2Exclusions?: ReadonlySet<string>;
}
function normalizePosTag(pos: string | undefined): string {
return typeof pos === 'string' ? pos.trim() : '';
}
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
return [];
}
return normalizedTag
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const parts = splitNormalizedTagParts(normalizedTag);
if (parts.length === 0) {
return false;
}
return parts.every((part) => exclusions.has(part));
}
function resolvePos1Exclusions(
options: SubtitleAnnotationFilterOptions = {},
): ReadonlySet<string> {
if (options.pos1Exclusions) {
return options.pos1Exclusions;
}
return resolveAnnotationPos1ExclusionSet(DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG);
}
function resolvePos2Exclusions(
options: SubtitleAnnotationFilterOptions = {},
): ReadonlySet<string> {
if (options.pos2Exclusions) {
return options.pos2Exclusions;
}
return resolveAnnotationPos2ExclusionSet(DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG);
}
function normalizeKana(text: string): string {
const raw = text.trim();
if (!raw) {
return '';
}
let normalized = '';
for (const char of raw) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
continue;
}
normalized += char;
}
return normalized;
}
function isKanaChar(char: string): boolean {
const code = char.codePointAt(0);
if (code === undefined) {
return false;
}
return (
(code >= 0x3041 && code <= 0x3096) ||
(code >= 0x309b && code <= 0x309f) ||
code === 0x30fc ||
(code >= 0x30a0 && code <= 0x30fa) ||
(code >= 0x30fd && code <= 0x30ff)
);
}
function isTrailingSmallTsuKanaSfx(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
const chars = [...normalized];
if (chars.length < 2 || chars.length > 4) {
return false;
}
if (!chars.every(isKanaChar)) {
return false;
}
return chars[chars.length - 1] === 'っ';
}
function isReduplicatedKanaSfx(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
const chars = [...normalized];
if (chars.length < 4 || chars.length % 2 !== 0) {
return false;
}
if (!chars.every(isKanaChar)) {
return false;
}
const half = chars.length / 2;
return chars.slice(0, half).join('') === chars.slice(half).join('');
}
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
if (isReduplicatedKanaSfx(normalized)) {
return true;
}
if (normalized.length <= 1 || !normalized.endsWith('と')) {
return false;
}
return isReduplicatedKanaSfx(normalized.slice(0, -1));
}
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
const normalizedSurface = normalizeKana(token.surface);
const normalizedHeadword = normalizeKana(token.headword);
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
return false;
}
const suffix = normalizedSurface.slice(normalizedHeadword.length);
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
if (pos1Parts.length < 2) {
return false;
}
const [leadingPos1, ...trailingPos1] = pos1Parts;
if (!leadingPos1 || resolvePos1Exclusions().has(leadingPos1)) {
return false;
}
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
}
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
return false;
}
const pos3Parts = splitNormalizedTagParts(normalizePosTag(token.pos3));
return pos3Parts.includes('助動詞語幹');
}
function isExcludedByTerm(token: MergedToken): boolean {
const candidates = [token.surface, token.reading, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
for (const candidate of candidates) {
const trimmed = candidate.trim();
if (!trimmed) {
continue;
}
const normalized = normalizeKana(trimmed);
if (!normalized) {
continue;
}
if (
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.some((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES.some(
(suffix) => normalized === `${prefix}${suffix}`,
),
)
) {
return true;
}
if (
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmed) ||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalized) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmed) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalized) ||
shouldIgnoreJlptByTerm(trimmed) ||
shouldIgnoreJlptByTerm(normalized)
) {
return true;
}
if (
isTrailingSmallTsuKanaSfx(trimmed) ||
isTrailingSmallTsuKanaSfx(normalized) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmed) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalized)
) {
return true;
}
}
return false;
}
export function shouldExcludeTokenFromSubtitleAnnotations(
token: MergedToken,
options: SubtitleAnnotationFilterOptions = {},
): boolean {
const pos1Exclusions = resolvePos1Exclusions(options);
const pos2Exclusions = resolvePos2Exclusions(options);
const normalizedPos1 = normalizePosTag(token.pos1);
const normalizedPos2 = normalizePosTag(token.pos2);
const hasPos1 = normalizedPos1.length > 0;
const hasPos2 = normalizedPos2.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
return true;
}
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
return true;
}
if (
!hasPos1 &&
!hasPos2 &&
(token.partOfSpeech === PartOfSpeech.particle ||
token.partOfSpeech === PartOfSpeech.bound_auxiliary ||
token.partOfSpeech === PartOfSpeech.symbol)
) {
return true;
}
if (isAuxiliaryStemGrammarTailToken(token)) {
return true;
}
if (isExcludedTrailingParticleMergedToken(token)) {
return true;
}
return isExcludedByTerm(token);
}
export function stripSubtitleAnnotationMetadata(
token: MergedToken,
options: SubtitleAnnotationFilterOptions = {},
): MergedToken {
if (!shouldExcludeTokenFromSubtitleAnnotations(token, options)) {
return token;
}
return {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
};
}

View File

@@ -188,6 +188,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
{
term: '猫',
reading: 'ねこ',
hasReading: true,
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 77,
@@ -197,6 +198,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
{
term: '鍛える',
reading: 'きたえる',
hasReading: false,
dictionary: 'freq-dict',
dictionaryPriority: 1,
frequency: 46961,
@@ -217,9 +219,11 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
assert.equal(result.length, 2);
assert.equal(result[0]?.term, '猫');
assert.equal(result[0]?.hasReading, true);
assert.equal(result[0]?.frequency, 77);
assert.equal(result[0]?.dictionaryPriority, 0);
assert.equal(result[1]?.term, '鍛える');
assert.equal(result[1]?.hasReading, false);
assert.equal(result[1]?.frequency, 2847);
assert.match(scriptValue, /getTermFrequencies/);
assert.match(scriptValue, /optionsGetFull/);
@@ -247,6 +251,96 @@ test('requestYomitanTermFrequencies prefers primary rank from displayValue array
assert.equal(result[0]?.frequency, 7141);
});
test('requestYomitanTermFrequencies prefers primary rank from displayValue string pair when raw frequency matches trailing count', async () => {
const deps = createDeps(async () => [
{
term: '潜む',
reading: 'ひそむ',
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 121,
displayValue: '118,121',
displayValueParsed: false,
},
]);
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
error: () => undefined,
});
assert.equal(result.length, 1);
assert.equal(result[0]?.term, '潜む');
assert.equal(result[0]?.frequency, 118);
});
test('requestYomitanTermFrequencies uses leading display digits for displayValue strings', async () => {
const deps = createDeps(async () => [
{
term: '例',
reading: 'れい',
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 1234,
displayValue: '1,234',
displayValueParsed: false,
},
]);
const result = await requestYomitanTermFrequencies([{ term: '例', reading: 'れい' }], deps, {
error: () => undefined,
});
assert.equal(result.length, 1);
assert.equal(result[0]?.term, '例');
assert.equal(result[0]?.frequency, 1);
});
test('requestYomitanTermFrequencies ignores occurrence-based dictionaries for rank tagging', async () => {
let metadataScript = '';
const deps = createDeps(async (script) => {
if (script.includes('getTermFrequencies')) {
return [
{
term: '潜む',
reading: 'ひそむ',
dictionary: 'CC100',
frequency: 118121,
displayValue: null,
displayValueParsed: false,
},
];
}
if (script.includes('optionsGetFull')) {
metadataScript = script;
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['CC100'],
dictionaryPriorityByName: { CC100: 0 },
dictionaryFrequencyModeByName: { CC100: 'occurrence-based' },
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [{ name: 'CC100', enabled: true, id: 0 }],
},
},
],
};
}
return [];
});
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
error: () => undefined,
});
assert.deepEqual(result, []);
assert.match(metadataScript, /getDictionaryInfo/);
});
test('requestYomitanTermFrequencies requests term-only fallback only after reading miss', async () => {
const frequencyScripts: string[] = [];
const deps = createDeps(async (script) => {
@@ -485,6 +579,317 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of
assert.match(scannerScript ?? '', /deinflect:\s*true/);
});
test('requestYomitanScanTokens extracts best frequency rank from selected termsFind entry', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('潜み', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('潜み')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 2,
dictionaryEntries: [
{
headwords: [
{
term: '潜む',
reading: 'ひそむ',
sources: [{ originalText: '潜み', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 20181,
displayValue: '4073,20181句',
},
{
headwordIndex: 0,
dictionary: 'Jiten',
frequency: 28594,
displayValue: '4592,28594句',
},
{
headwordIndex: 0,
dictionary: 'CC100',
frequency: 118121,
displayValue: null,
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '潜み',
reading: 'ひそ',
headword: '潜む',
startPos: 0,
endPos: 2,
isNameMatch: false,
frequencyRank: 4073,
},
]);
});
test('requestYomitanScanTokens uses frequency from later exact-match entry when first exact entry has none', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('者', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('者')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 1,
dictionaryEntries: [
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [],
},
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 79601,
displayValue: '475,79601句',
},
{
headwordIndex: 0,
dictionary: 'Jiten',
frequency: 338,
displayValue: '338',
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '者',
reading: 'もの',
headword: '者',
startPos: 0,
endPos: 1,
isNameMatch: false,
frequencyRank: 475,
},
]);
});
test('requestYomitanScanTokens can use frequency from later exact secondary-match entry', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('者', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('者')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 1,
dictionaryEntries: [
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [],
},
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: false, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 79601,
displayValue: '475,79601句',
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '者',
reading: 'もの',
headword: '者',
startPos: 0,
endPos: 1,
isNameMatch: false,
frequencyRank: 475,
},
]);
});
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
const deps = createDeps(async (script) => {
if (script.includes('optionsGetFull')) {

View File

@@ -20,19 +20,24 @@ interface YomitanParserRuntimeDeps {
createYomitanExtensionWindow?: (pageName: string) => Promise<BrowserWindow | null>;
}
type YomitanFrequencyMode = 'occurrence-based' | 'rank-based';
export interface YomitanDictionaryInfo {
title: string;
revision?: string | number;
frequencyMode?: YomitanFrequencyMode;
}
export interface YomitanTermFrequency {
term: string;
reading: string | null;
hasReading: boolean;
dictionary: string;
dictionaryPriority: number;
frequency: number;
displayValue: string | null;
displayValueParsed: boolean;
frequencyDerivedFromDisplayValue: boolean;
}
export interface YomitanTermReadingPair {
@@ -47,6 +52,7 @@ export interface YomitanScanToken {
startPos: number;
endPos: number;
isNameMatch?: boolean;
frequencyRank?: number;
}
interface YomitanProfileMetadata {
@@ -54,6 +60,7 @@ interface YomitanProfileMetadata {
scanLength: number;
dictionaries: string[];
dictionaryPriorityByName: Record<string, number>;
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>;
}
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
@@ -78,7 +85,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
typeof entry.headword === 'string' &&
typeof entry.startPos === 'number' &&
typeof entry.endPos === 'number' &&
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean') &&
(entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number'),
)
);
}
@@ -117,24 +125,22 @@ function parsePositiveFrequencyString(value: string): number | null {
return null;
}
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
if (!numericPrefix) {
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) {
return null;
}
const chunks = numericPrefix.split(',');
const normalizedNumber =
chunks.length <= 1
? (chunks[0] ?? '')
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
? chunks.join('')
: (chunks[0] ?? '');
const parsed = Number.parseInt(normalizedNumber, 10);
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) {
return null;
}
return parsed;
const normalized = Math.floor(parsed);
if (!Number.isFinite(normalized) || normalized <= 0) {
return null;
}
return normalized;
}
function parsePositiveFrequencyValue(value: unknown): number | null {
@@ -159,6 +165,19 @@ function parsePositiveFrequencyValue(value: unknown): number | null {
return null;
}
function parseDisplayFrequencyValue(value: unknown): number | null {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) {
return null;
}
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyValue(value);
}
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
if (!isObject(value)) {
return null;
@@ -170,7 +189,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
const displayValueRaw = value.displayValue;
const parsedDisplayFrequency =
displayValueRaw !== null && displayValueRaw !== undefined
? parsePositiveFrequencyValue(displayValueRaw)
? parseDisplayFrequencyValue(displayValueRaw)
: null;
const frequency = parsedDisplayFrequency ?? rawFrequency;
if (!term || !dictionary || frequency === null) {
@@ -184,17 +203,20 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
const reading =
value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
const hasReading = value.hasReading === false ? false : reading !== null;
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
const displayValueParsed = value.displayValueParsed === true;
return {
term,
reading,
hasReading,
dictionary,
dictionaryPriority,
frequency,
displayValue,
displayValueParsed,
frequencyDerivedFromDisplayValue: parsedDisplayFrequency !== null,
};
}
@@ -300,17 +322,34 @@ function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null
}
}
const dictionaryFrequencyModeByNameRaw = value.dictionaryFrequencyModeByName;
const dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>> = {};
if (isObject(dictionaryFrequencyModeByNameRaw)) {
for (const [name, frequencyModeRaw] of Object.entries(dictionaryFrequencyModeByNameRaw)) {
const normalizedName = name.trim();
if (!normalizedName) {
continue;
}
if (frequencyModeRaw !== 'occurrence-based' && frequencyModeRaw !== 'rank-based') {
continue;
}
dictionaryFrequencyModeByName[normalizedName] = frequencyModeRaw;
}
}
return {
profileIndex,
scanLength,
dictionaries,
dictionaryPriorityByName,
dictionaryFrequencyModeByName,
};
}
function normalizeFrequencyEntriesWithPriority(
rawResult: unknown[],
dictionaryPriorityByName: Record<string, number>,
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
): YomitanTermFrequency[] {
const normalized: YomitanTermFrequency[] = [];
for (const entry of rawResult) {
@@ -319,6 +358,10 @@ function normalizeFrequencyEntriesWithPriority(
continue;
}
if (dictionaryFrequencyModeByName[frequency.dictionary] === 'occurrence-based') {
continue;
}
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
normalized.push({
...frequency,
@@ -425,8 +468,34 @@ async function requestYomitanProfileMetadata(
acc[entry.name] = index;
return acc;
}, {});
let dictionaryFrequencyModeByName = {};
try {
const dictionaryInfo = await invoke("getDictionaryInfo", undefined);
dictionaryFrequencyModeByName = Array.isArray(dictionaryInfo)
? dictionaryInfo.reduce((acc, entry) => {
if (!entry || typeof entry !== "object" || typeof entry.title !== "string") {
return acc;
}
if (
entry.frequencyMode === "occurrence-based" ||
entry.frequencyMode === "rank-based"
) {
acc[entry.title] = entry.frequencyMode;
}
return acc;
}, {})
: {};
} catch {
dictionaryFrequencyModeByName = {};
}
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
return {
profileIndex,
scanLength,
dictionaries,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
};
})();
`;
@@ -774,7 +843,133 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
}
return segments;
}
function getPreferredHeadword(dictionaryEntries, token) {
function parsePositiveFrequencyNumber(value) {
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
return Math.max(1, Math.floor(value));
}
if (typeof value === 'string') {
const numericMatch = value.trim().match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) { return null; }
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) { return null; }
return Math.max(1, Math.floor(parsed));
}
if (Array.isArray(value)) {
for (const item of value) {
const parsed = parsePositiveFrequencyNumber(item);
if (parsed !== null) { return parsed; }
}
}
return null;
}
function parseDisplayFrequencyNumber(value) {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) { return null; }
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyNumber(value);
}
function getFrequencyDictionaryName(frequency) {
const candidates = [
frequency?.dictionary,
frequency?.dictionaryName,
frequency?.name,
frequency?.title,
frequency?.dictionaryTitle,
frequency?.dictionaryAlias
];
for (const candidate of candidates) {
if (typeof candidate === 'string' && candidate.trim().length > 0) {
return candidate.trim();
}
}
return null;
}
function getBestFrequencyRank(dictionaryEntry, headwordIndex, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
let best = null;
const headwordCount = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords.length : 0;
for (const frequency of dictionaryEntry?.frequencies || []) {
if (!frequency || typeof frequency !== 'object') { continue; }
const frequencyHeadwordIndex = frequency.headwordIndex;
if (typeof frequencyHeadwordIndex === 'number') {
if (frequencyHeadwordIndex !== headwordIndex) { continue; }
} else if (headwordCount > 1) {
continue;
}
const dictionary = getFrequencyDictionaryName(frequency);
if (!dictionary) { continue; }
if (dictionaryFrequencyModeByName[dictionary] === 'occurrence-based') { continue; }
const rank =
parseDisplayFrequencyNumber(frequency.displayValue) ??
parsePositiveFrequencyNumber(frequency.frequency);
if (rank === null) { continue; }
const priorityRaw = dictionaryPriorityByName[dictionary];
const fallbackPriority =
typeof frequency.dictionaryIndex === 'number' && Number.isFinite(frequency.dictionaryIndex)
? Math.max(0, Math.floor(frequency.dictionaryIndex))
: Number.MAX_SAFE_INTEGER;
const priority =
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
? Math.max(0, Math.floor(priorityRaw))
: fallbackPriority;
if (best === null || priority < best.priority || (priority === best.priority && rank < best.rank)) {
best = { priority, rank };
}
}
return best?.rank ?? null;
}
function hasExactSource(headword, token, requirePrimary) {
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (requirePrimary && !src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
return true;
}
return false;
}
function collectExactHeadwordMatches(dictionaryEntries, token, requirePrimary) {
const matches = [];
for (const dictionaryEntry of dictionaryEntries || []) {
const headwords = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords : [];
for (let headwordIndex = 0; headwordIndex < headwords.length; headwordIndex += 1) {
const headword = headwords[headwordIndex];
if (!hasExactSource(headword, token, requirePrimary)) { continue; }
matches.push({ dictionaryEntry, headword, headwordIndex });
}
}
return matches;
}
function sameHeadword(match, preferredMatch) {
if (!match || !preferredMatch) {
return false;
}
if (match.headword?.term !== preferredMatch.headword?.term) {
return false;
}
const matchReading = typeof match.headword?.reading === 'string' ? match.headword.reading : '';
const preferredReading =
typeof preferredMatch.headword?.reading === 'string' ? preferredMatch.headword.reading : '';
return matchReading === preferredReading;
}
function getBestFrequencyRankForMatches(matches, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
let best = null;
for (const match of matches) {
const rank = getBestFrequencyRank(
match.dictionaryEntry,
match.headwordIndex,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
);
if (rank === null) { continue; }
if (best === null || rank < best) {
best = rank;
}
}
return best;
}
function getPreferredHeadword(dictionaryEntries, token, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
function appendDictionaryNames(target, value) {
if (!value || typeof value !== 'object') {
return;
@@ -813,36 +1008,33 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
}
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
}
function hasExactPrimarySource(headword, token) {
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (!src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
return true;
}
return false;
}
const exactPrimaryMatches = collectExactHeadwordMatches(dictionaryEntries, token, true);
let matchedNameDictionary = false;
if (includeNameMatchMetadata) {
for (const dictionaryEntry of dictionaryEntries || []) {
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
for (const headword of dictionaryEntry.headwords || []) {
if (!hasExactPrimarySource(headword, token)) { continue; }
for (const match of exactPrimaryMatches) {
if (match.dictionaryEntry !== dictionaryEntry) { continue; }
matchedNameDictionary = true;
break;
}
if (matchedNameDictionary) { break; }
}
}
for (const dictionaryEntry of dictionaryEntries || []) {
for (const headword of dictionaryEntry.headwords || []) {
if (!hasExactPrimarySource(headword, token)) { continue; }
return {
term: headword.term,
reading: headword.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
};
}
const preferredMatch = exactPrimaryMatches[0];
if (preferredMatch) {
const exactFrequencyMatches = collectExactHeadwordMatches(dictionaryEntries, token, false)
.filter((match) => sameHeadword(match, preferredMatch));
return {
term: preferredMatch.headword.term,
reading: preferredMatch.headword.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(preferredMatch.dictionaryEntry),
frequencyRank: getBestFrequencyRankForMatches(
exactFrequencyMatches.length > 0 ? exactFrequencyMatches : exactPrimaryMatches,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
)
};
}
return null;
}
@@ -853,6 +1045,8 @@ function buildYomitanScanningScript(
profileIndex: number,
scanLength: number,
includeNameMatchMetadata: boolean,
dictionaryPriorityByName: Record<string, number>,
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
): string {
return `
(async () => {
@@ -876,6 +1070,8 @@ function buildYomitanScanningScript(
});
${YOMITAN_SCANNING_HELPERS}
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
const dictionaryPriorityByName = ${JSON.stringify(dictionaryPriorityByName)};
const dictionaryFrequencyModeByName = ${JSON.stringify(dictionaryFrequencyModeByName)};
const text = ${JSON.stringify(text)};
const details = {matchType: "exact", deinflect: true};
const tokens = [];
@@ -889,7 +1085,12 @@ ${YOMITAN_SCANNING_HELPERS}
const originalTextLength = typeof result?.originalTextLength === "number" ? result.originalTextLength : 0;
if (dictionaryEntries.length > 0 && originalTextLength > 0 && (originalTextLength !== character.length || isCodePointJapanese(codePoint))) {
const source = substring.substring(0, originalTextLength);
const preferredHeadword = getPreferredHeadword(dictionaryEntries, source);
const preferredHeadword = getPreferredHeadword(
dictionaryEntries,
source,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
);
if (preferredHeadword && typeof preferredHeadword.term === "string") {
const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : "";
const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source);
@@ -900,6 +1101,10 @@ ${YOMITAN_SCANNING_HELPERS}
startPos: i,
endPos: i + originalTextLength,
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
frequencyRank:
typeof preferredHeadword.frequencyRank === "number" && Number.isFinite(preferredHeadword.frequencyRank)
? Math.max(1, Math.floor(preferredHeadword.frequencyRank))
: undefined,
});
i += originalTextLength;
continue;
@@ -1036,6 +1241,8 @@ export async function requestYomitanScanTokens(
profileIndex,
scanLength,
options?.includeNameMatchMetadata === true,
metadata?.dictionaryPriorityByName ?? {},
metadata?.dictionaryFrequencyModeByName ?? {},
),
true,
);
@@ -1099,7 +1306,11 @@ async function fetchYomitanTermFrequencies(
try {
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
return Array.isArray(rawResult)
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
? normalizeFrequencyEntriesWithPriority(
rawResult,
metadata.dictionaryPriorityByName,
metadata.dictionaryFrequencyModeByName,
)
: [];
} catch (err) {
logger.error('Yomitan term frequency request failed:', (err as Error).message);
@@ -1541,10 +1752,15 @@ export async function getYomitanDictionaryInfo(
.map((entry) => {
const title = typeof entry.title === 'string' ? entry.title.trim() : '';
const revision = entry.revision;
const frequencyMode: YomitanFrequencyMode | undefined =
entry.frequencyMode === 'occurrence-based' || entry.frequencyMode === 'rank-based'
? entry.frequencyMode
: undefined;
return {
title,
revision:
typeof revision === 'string' || typeof revision === 'number' ? revision : undefined,
frequencyMode,
};
})
.filter((entry) => entry.title.length > 0);
@@ -1763,3 +1979,34 @@ export async function removeYomitanDictionarySettings(
return await setYomitanSettingsFull(optionsFull, deps, logger);
}
export async function addYomitanNoteViaSearch(
word: string,
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
): Promise<number | null> {
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
return null;
}
const escapedWord = JSON.stringify(word);
const script = `
(async () => {
if (typeof window.__subminerAddNote !== 'function') {
throw new Error('Yomitan search page bridge not initialized');
}
return await window.__subminerAddNote(${escapedWord});
})();
`;
try {
const noteId = await parserWindow.webContents.executeJavaScript(script, true);
return typeof noteId === 'number' ? noteId : null;
} catch (err) {
logger.error('Yomitan addNoteFromWord failed:', (err as Error).message);
return null;
}
}