feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions
File diff suppressed because it is too large Load Diff
@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
});
assert.deepEqual(result, {
title: 'Guessit Title',
season: null,
episode: 7,
source: 'guessit',
});
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
});
assert.deepEqual(result, {
title: 'My Anime',
season: 1,
episode: 3,
source: 'fallback',
});
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
]);
assert.deepEqual(result, {
title: 'Rascal Does Not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
});
assert.deepEqual(result, {
title: 'Rascal Does not Dream of Bunny Girl Senpai',
season: null,
episode: 1,
source: 'guessit',
});
+7 -4
View File
@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
export interface AnilistMediaGuess {
title: string;
season: number | null;
episode: number | null;
source: 'guessit' | 'fallback';
}
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
};
}
function runGuessit(target: string): Promise<string> {
export function runGuessit(target: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.execFile(
'guessit',
@@ -73,9 +74,9 @@ function runGuessit(target: string): Promise<string> {
});
}
type GuessAnilistMediaInfoDeps = {
export interface GuessAnilistMediaInfoDeps {
runGuessit: (target: string) => Promise<string>;
};
}
function firstString(value: unknown): string | null {
if (typeof value === 'string') {
@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
const parsed = JSON.parse(stdout) as Record<string, unknown>;
const title = readGuessitTitle(parsed.title);
const episode = firstPositiveInteger(parsed.episode);
const season = firstPositiveInteger(parsed.season);
if (title) {
return { title, episode, source: 'guessit' };
return { title, season, episode, source: 'guessit' };
}
} catch {
// Ignore guessit failures and fall back to internal parser.
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
}
return {
title: parsed.title.trim(),
season: parsed.season,
episode: parsed.episode,
source: 'fallback',
};
@@ -0,0 +1,244 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';
import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
import { Database } from '../immersion-tracker/sqlite.js';
import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
return path.join(dir, 'immersion.sqlite');
}
function cleanupDbPath(dbPath: string): void {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
}
test('stripFilenameTags normalizes common media-title formats', () => {
assert.equal(
stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
'The Eminence in Shadow',
);
assert.equal(
stripFilenameTags(
'[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
'Kono Subarashii Sekai ni Shukufuku wo!',
);
assert.equal(
stripFilenameTags(
'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
),
'Little Witch Academia',
);
});
test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
canonicalTitle: 'Cover Fetcher Test',
sourcePath: '/tmp/cover-fetcher-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
upsertCoverArt(db, videoId, {
anilistId: 7,
coverUrl: 'https://images.test/cover.jpg',
coverBlob: null,
titleRomaji: 'Test Title',
titleEnglish: 'Test Title',
episodesTotal: 12,
});
const fetchCalls: string[] = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = (async (input: RequestInfo | URL) => {
const url = String(input);
fetchCalls.push(url);
assert.equal(url, 'https://images.test/cover.jpg');
return new Response(new Uint8Array([1, 2, 3, 4]), {
status: 200,
headers: { 'Content-Type': 'image/jpeg' },
});
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
);
const fetched = await fetcher.fetchIfMissing(
db,
videoId,
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
);
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(fetchCalls.length, 1);
assert.equal(stored?.coverBlob?.length, 4);
assert.equal(stored?.titleEnglish, 'Test Title');
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
function createJsonResponse(payload: unknown): Response {
return new Response(JSON.stringify(payload), {
status: 200,
headers: { 'content-type': 'application/json' },
});
}
test('fetchIfMissing uses guessit primary title and season when available', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
canonicalTitle:
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
sourcePath: '/tmp/cover-fetcher-season-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const searchCalls: Array<{ search: string }> = [];
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
const search = payload.variables.search;
searchCalls.push({ search });
if (search.includes('Season 2')) {
return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
}
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 19,
episodes: 24,
coverImage: { large: 'https://images.test/cover.jpg', medium: null },
title: {
romaji: 'Little Witch Academia',
english: 'Little Witch Academia',
native: null,
},
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () =>
JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(searchCalls.length, 2);
assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
assert.equal(stored?.anilistId, 19);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
canonicalTitle: 'School Vlog S01E01',
sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
let requestCount = 0;
const originalFetch = globalThis.fetch;
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
requestCount += 1;
const raw = (init?.body as string | undefined) ?? '';
const payload = JSON.parse(raw) as { variables: { search: string } };
assert.equal(payload.variables.search, 'School Vlog');
return Promise.resolve(
createJsonResponse({
data: {
Page: {
media: [
{
id: 21,
episodes: 12,
coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
},
],
},
},
}),
);
}) as typeof fetch;
try {
const fetcher = createCoverArtFetcher(
{
acquire: async () => {},
recordResponse: () => {},
},
console,
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
const stored = getCoverArt(db, videoId);
assert.equal(fetched, true);
assert.equal(requestCount, 2);
assert.equal(stored?.anilistId, 21);
} finally {
globalThis.fetch = originalFetch;
db.close();
cleanupDbPath(dbPath);
}
});
@@ -0,0 +1,435 @@
import type { AnilistRateLimiter } from './rate-limiter';
import type { DatabaseSync } from '../immersion-tracker/sqlite';
import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from './anilist-updater';
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
const SEARCH_QUERY = `
query ($search: String!) {
Page(perPage: 5) {
media(search: $search, type: ANIME) {
id
episodes
season
seasonYear
coverImage { large medium }
title { romaji english native }
}
}
}
`;
interface AnilistMedia {
id: number;
episodes: number | null;
season: string | null;
seasonYear: number | null;
coverImage: { large: string | null; medium: string | null } | null;
title: { romaji: string | null; english: string | null; native: string | null } | null;
}
interface AnilistSearchResponse {
data?: {
Page?: {
media?: AnilistMedia[];
};
};
errors?: Array<{ message?: string }>;
}
export interface CoverArtFetcher {
fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
}
interface Logger {
info(msg: string, ...args: unknown[]): void;
warn(msg: string, ...args: unknown[]): void;
error(msg: string, ...args: unknown[]): void;
}
interface CoverArtCandidate {
title: string;
source: 'guessit' | 'fallback';
season: number | null;
episode: number | null;
}
interface CoverArtFetcherOptions {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
export function stripFilenameTags(raw: string): string {
let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
title = title.replace(/[._]+/g, ' ');
// Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
title = title.replace(/\s+S\d+E\d+.*$/i, '');
title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
title = title.replace(/\s+E\d+[: -].*$/i, '');
title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
// Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
// Remove common codec/source tags that may appear without brackets
title = title.replace(
/\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
'',
);
// Remove trailing dashes and group tags like "-Retr0"
title = title.replace(/\s*-\s*[\w]+$/, '');
return title.trim().replace(/\s{2,}/g, ' ');
}
function removeSeasonHint(title: string): string {
return title
.replace(/\bseason\s*\d+\b/gi, '')
.replace(/\s{2,}/g, ' ')
.trim();
}
function normalizeTitle(text: string): string {
return text.trim().toLowerCase().replace(/\s+/g, ' ');
}
function extractCandidateSeasonHints(text: string): Set<number> {
const normalized = normalizeTitle(text);
const matches = [
...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
];
const values = new Set<number>();
for (const match of matches) {
const value = Number.parseInt(match[1]!, 10);
if (Number.isInteger(value)) {
values.add(value);
}
}
return values;
}
function isSeasonMentioned(titles: string[], season: number | null): boolean {
if (!season) {
return false;
}
const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
return hints.includes(season);
}
function pickBestSearchResult(
title: string,
episode: number | null,
season: number | null,
media: AnilistMedia[],
): { id: number; title: string } | null {
const cleanedTitle = removeSeasonHint(title);
const targets = [title, cleanedTitle]
.map(normalizeTitle)
.map((value) => value.trim())
.filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
const filtered =
episode === null
? media
: media.filter((item) => {
const total = item.episodes;
return total === null || total >= episode;
});
const candidates = filtered.length > 0 ? filtered : media;
if (candidates.length === 0) {
return null;
}
const scored = candidates.map((item) => {
const candidateTitles = [item.title?.romaji, item.title?.english, item.title?.native]
.filter((value): value is string => typeof value === 'string')
.map((value) => normalizeTitle(value));
let score = 0;
for (const target of targets) {
if (candidateTitles.includes(target)) {
score += 120;
continue;
}
if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
score += 30;
}
if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
score += 10;
}
}
if (episode !== null && item.episodes === episode) {
score += 20;
}
if (season !== null && isSeasonMentioned(candidateTitles, season)) {
score += 15;
}
return { item, score };
});
scored.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
return b.item.id - a.item.id;
});
const selected = scored[0]!;
const selectedTitle =
selected.item.title?.english ??
selected.item.title?.romaji ??
selected.item.title?.native ??
title;
return { id: selected.item.id, title: selectedTitle };
}
function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
const candidateTitles = [
...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
? [`${parsed.title} Season ${parsed.season}`]
: []),
parsed.title,
];
return candidateTitles
.map((title) => title.trim())
.filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
}
async function searchAnilist(
rateLimiter: AnilistRateLimiter,
title: string,
): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
await rateLimiter.acquire();
const res = await fetch(ANILIST_GRAPHQL_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
});
rateLimiter.recordResponse(res.headers);
if (res.status === 429) {
return { media: [], rateLimited: true };
}
if (!res.ok) {
throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
}
const json = (await res.json()) as AnilistSearchResponse;
const mediaList = json.data?.Page?.media;
if (!mediaList || mediaList.length === 0) {
return { media: [], rateLimited: false };
}
return { media: mediaList, rateLimited: false };
}
async function downloadImage(url: string): Promise<Buffer | null> {
try {
const res = await fetch(url);
if (!res.ok) return null;
const arrayBuf = await res.arrayBuffer();
return Buffer.from(arrayBuf);
} catch {
return null;
}
}
export function createCoverArtFetcher(
rateLimiter: AnilistRateLimiter,
logger: Logger,
options: CoverArtFetcherOptions = {},
): CoverArtFetcher {
const resolveCanonicalTitle = (
db: DatabaseSync,
videoId: number,
fallbackTitle: string,
): string => {
const row = db
.prepare(
`
SELECT canonical_title AS canonicalTitle
FROM imm_videos
WHERE video_id = ?
LIMIT 1
`,
)
.get(videoId) as { canonicalTitle: string | null } | undefined;
return row?.canonicalTitle?.trim() || fallbackTitle;
};
const resolveMediaInfo = async (
db: DatabaseSync,
videoId: number,
canonicalTitle: string,
): Promise<CoverArtCandidate | null> => {
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
const parsed = await guessAnilistMediaInfo(null, effectiveTitle, {
runGuessit: options.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
return {
title: parsed.title,
season: parsed.season,
episode: parsed.episode,
source: parsed.source,
};
};
return {
async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
const existing = getCoverArt(db, videoId);
if (existing?.coverBlob) {
return true;
}
if (existing?.coverUrl) {
const coverBlob = await downloadImage(existing.coverUrl);
if (coverBlob) {
upsertCoverArt(db, videoId, {
anilistId: existing.anilistId,
coverUrl: existing.coverUrl,
coverBlob,
titleRomaji: existing.titleRomaji,
titleEnglish: existing.titleEnglish,
episodesTotal: existing.episodesTotal,
});
return true;
}
}
if (
existing &&
existing.coverUrl === null &&
existing.anilistId === null &&
Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
) {
return false;
}
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
const cleaned = stripFilenameTags(effectiveTitle);
if (!cleaned) {
logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const parsedInfo = await resolveMediaInfo(db, videoId, canonicalTitle);
const searchBase = parsedInfo?.title ?? cleaned;
const searchCandidates = parsedInfo ? buildSearchCandidates(parsedInfo) : [cleaned];
const effectiveCandidates = searchCandidates.includes(cleaned)
? searchCandidates
: [...searchCandidates, cleaned];
let selected: AnilistMedia | null = null;
let rateLimited = false;
for (const candidate of effectiveCandidates) {
logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
try {
const result = await searchAnilist(rateLimiter, candidate);
rateLimited = result.rateLimited;
if (result.media.length === 0) {
continue;
}
const picked = pickBestSearchResult(
searchBase,
parsedInfo?.episode ?? null,
parsedInfo?.season ?? null,
result.media,
);
if (picked) {
const match = result.media.find((media) => media.id === picked.id);
if (match) {
selected = match;
break;
}
}
} catch (err) {
logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
return false;
}
}
if (rateLimited) {
logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
return false;
}
if (!selected) {
logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
upsertCoverArt(db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: null,
titleEnglish: null,
episodesTotal: null,
});
return false;
}
const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
let coverBlob: Buffer | null = null;
if (coverUrl) {
coverBlob = await downloadImage(coverUrl);
}
upsertCoverArt(db, videoId, {
anilistId: selected.id,
coverUrl,
coverBlob,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
episodesTotal: selected.episodes ?? null,
});
updateAnimeAnilistInfo(db, videoId, {
anilistId: selected.id,
titleRomaji: selected.title?.romaji ?? null,
titleEnglish: selected.title?.english ?? null,
titleNative: selected.title?.native ?? null,
episodesTotal: selected.episodes ?? null,
});
logger.info(
'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
videoId,
selected.id,
selected.title?.romaji ?? searchBase,
);
return true;
},
};
}
+72
View File
@@ -0,0 +1,72 @@
const DEFAULT_MAX_PER_MINUTE = 20;
const WINDOW_MS = 60_000;
const SAFETY_REMAINING_THRESHOLD = 5;
export interface AnilistRateLimiter {
acquire(): Promise<void>;
recordResponse(headers: Headers): void;
}
export function createAnilistRateLimiter(
maxPerMinute = DEFAULT_MAX_PER_MINUTE,
): AnilistRateLimiter {
const timestamps: number[] = [];
let pauseUntilMs = 0;
function pruneOld(now: number): void {
const cutoff = now - WINDOW_MS;
while (timestamps.length > 0 && timestamps[0]! < cutoff) {
timestamps.shift();
}
}
return {
async acquire(): Promise<void> {
const now = Date.now();
if (now < pauseUntilMs) {
const waitMs = pauseUntilMs - now;
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
pruneOld(Date.now());
if (timestamps.length >= maxPerMinute) {
const oldest = timestamps[0]!;
const waitMs = oldest + WINDOW_MS - Date.now() + 100;
if (waitMs > 0) {
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
pruneOld(Date.now());
}
timestamps.push(Date.now());
},
recordResponse(headers: Headers): void {
const remaining = headers.get('x-ratelimit-remaining');
if (remaining !== null) {
const n = parseInt(remaining, 10);
if (Number.isFinite(n) && n < SAFETY_REMAINING_THRESHOLD) {
const reset = headers.get('x-ratelimit-reset');
if (reset) {
const resetMs = parseInt(reset, 10) * 1000;
if (Number.isFinite(resetMs)) {
pauseUntilMs = Math.max(pauseUntilMs, resetMs);
}
} else {
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + WINDOW_MS);
}
}
}
const retryAfter = headers.get('retry-after');
if (retryAfter) {
const seconds = parseInt(retryAfter, 10);
if (Number.isFinite(seconds) && seconds > 0) {
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + seconds * 1000);
}
}
},
};
}
+1
View File
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,
+16
View File
@@ -176,6 +176,22 @@ test('runAppReadyRuntime skips heavy startup when shouldSkipHeavyStartup returns
assert.ok(calls.indexOf('handleFirstRunSetup') < calls.indexOf('handleInitialArgs'));
});
test('runAppReadyRuntime uses minimal startup for texthooker-only mode', async () => {
const { deps, calls } = makeDeps({
texthookerOnlyMode: true,
reloadConfig: () => calls.push('reloadConfig'),
handleInitialArgs: () => calls.push('handleInitialArgs'),
});
await runAppReadyRuntime(deps);
assert.deepEqual(calls, [
'ensureDefaultConfigBootstrap',
'reloadConfig',
'handleInitialArgs',
]);
});
test('runAppReadyRuntime skips Jellyfin remote startup when dependency is not wired', async () => {
const { deps, calls } = makeDeps({
startJellyfinRemoteSession: undefined,
+33
View File
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,
@@ -177,6 +178,9 @@ function createDeps(overrides: Partial<CliCommandServiceDeps> = {}) {
mediaTitle: 'Test',
entryCount: 10,
}),
runStatsCommand: async () => {
calls.push('runStatsCommand');
},
runJellyfinCommand: async () => {
calls.push('runJellyfinCommand');
},
@@ -249,6 +253,21 @@ test('handleCliCommand opens first-run setup window for --setup', () => {
assert.equal(calls.includes('openYomitanSettingsDelayed:1000'), false);
});
test('handleCliCommand dispatches stats command without overlay startup', async () => {
const { deps, calls } = createDeps({
runStatsCommand: async () => {
calls.push('runStatsCommand');
},
});
handleCliCommand(makeArgs({ stats: true }), 'initial', deps);
await Promise.resolve();
assert.ok(calls.includes('runStatsCommand'));
assert.equal(calls.includes('initializeOverlayRuntime'), false);
assert.equal(calls.includes('connectMpvClient'), false);
});
test('handleCliCommand applies cli log level for second-instance commands', () => {
const { deps, calls } = createDeps({
setLogLevel: (level) => {
@@ -520,8 +539,21 @@ test('handleCliCommand runs refresh-known-words command', () => {
assert.ok(calls.includes('refreshKnownWords'));
});
test('handleCliCommand stops app after headless initial refresh-known-words completes', async () => {
const { deps, calls } = createDeps({
hasMainWindow: () => false,
});
handleCliCommand(makeArgs({ refreshKnownWords: true }), 'initial', deps);
await new Promise((resolve) => setImmediate(resolve));
assert.ok(calls.includes('refreshKnownWords'));
assert.ok(calls.includes('stopApp'));
});
test('handleCliCommand reports async refresh-known-words errors to OSD', async () => {
const { deps, calls, osd } = createDeps({
hasMainWindow: () => false,
refreshKnownWords: async () => {
throw new Error('refresh boom');
},
@@ -532,4 +564,5 @@ test('handleCliCommand reports async refresh-known-words errors to OSD', async (
assert.ok(calls.some((value) => value.startsWith('error:refreshKnownWords failed:')));
assert.ok(osd.some((value) => value.includes('Refresh known words failed: refresh boom')));
assert.ok(calls.includes('stopApp'));
});
+17 -6
View File
@@ -61,6 +61,7 @@ export interface CliCommandServiceDeps {
mediaTitle: string;
entryCount: number;
}>;
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
runJellyfinCommand: (args: CliArgs) => Promise<void>;
printHelp: () => void;
hasMainWindow: () => boolean;
@@ -154,6 +155,7 @@ export interface CliCommandDepsRuntimeOptions {
};
jellyfin: {
openSetup: () => void;
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
runCommand: (args: CliArgs) => Promise<void>;
};
ui: UiCliRuntime;
@@ -222,6 +224,7 @@ export function createCliCommandDepsRuntime(
getAnilistQueueStatus: options.anilist.getQueueStatus,
retryAnilistQueue: options.anilist.retryQueueNow,
generateCharacterDictionary: options.dictionary.generate,
runStatsCommand: options.jellyfin.runStatsCommand,
runJellyfinCommand: options.jellyfin.runCommand,
printHelp: options.ui.printHelp,
hasMainWindow: options.app.hasMainWindow,
@@ -331,12 +334,18 @@ export function handleCliCommand(
'Update failed',
);
} else if (args.refreshKnownWords) {
runAsyncWithOsd(
() => deps.refreshKnownWords(),
deps,
'refreshKnownWords',
'Refresh known words failed',
);
const shouldStopAfterRun = source === 'initial' && !deps.hasMainWindow();
deps
.refreshKnownWords()
.catch((err) => {
deps.error('refreshKnownWords failed:', err);
deps.showMpvOsd(`Refresh known words failed: ${(err as Error).message}`);
})
.finally(() => {
if (shouldStopAfterRun) {
deps.stopApp();
}
});
} else if (args.toggleSecondarySub) {
deps.cycleSecondarySubMode();
} else if (args.triggerFieldGrouping) {
@@ -410,6 +419,8 @@ export function handleCliCommand(
deps.stopApp();
}
});
} else if (args.stats) {
void deps.runStatsCommand(args, source);
} else if (args.anilistRetryQueue) {
const queueStatus = deps.getAnilistQueueStatus();
deps.log(
@@ -130,6 +130,56 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r
assert.equal(lookup('高み'), 9933);
});
test('createFrequencyDictionaryLookup uses leading display digits for displayValue strings', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['潜む', 1, { frequency: { value: 121, displayValue: '118,121' } }],
['例', 2, { frequency: { value: 1234, displayValue: '1,234' } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: () => undefined,
});
assert.equal(lookup('潜む'), 118);
assert.equal(lookup('例'), 1);
});
test('createFrequencyDictionaryLookup ignores occurrence-based Yomitan dictionaries', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
fs.writeFileSync(
path.join(tempDir, 'index.json'),
JSON.stringify({
title: 'CC100',
revision: '1',
frequencyMode: 'occurrence-based',
}),
);
fs.writeFileSync(
path.join(tempDir, 'term_meta_bank_1.json'),
JSON.stringify([['潜む', 1, { frequency: { value: 118121 } }]]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('潜む'), null);
assert.equal(
logs.some((entry) => entry.includes('occurrence-based') && entry.includes('CC100')),
true,
);
});
test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
+76 -14
View File
@@ -6,6 +6,8 @@ export interface FrequencyDictionaryLookupOptions {
log: (message: string) => void;
}
type FrequencyDictionaryMode = 'occurrence-based' | 'rank-based';
interface FrequencyDictionaryEntry {
rank: number;
term: string;
@@ -29,30 +31,67 @@ function normalizeFrequencyTerm(value: string): string {
return value.trim().toLowerCase();
}
async function readDictionaryMetadata(
dictionaryPath: string,
log: (message: string) => void,
): Promise<{ title: string | null; frequencyMode: FrequencyDictionaryMode | null }> {
const indexPath = path.join(dictionaryPath, 'index.json');
let rawText: string;
try {
rawText = await fs.readFile(indexPath, 'utf-8');
} catch (error) {
if (isErrorCode(error, 'ENOENT')) {
return { title: null, frequencyMode: null };
}
log(`Failed to read frequency dictionary index ${indexPath}: ${String(error)}`);
return { title: null, frequencyMode: null };
}
let rawIndex: unknown;
try {
rawIndex = JSON.parse(rawText) as unknown;
} catch {
log(`Failed to parse frequency dictionary index as JSON: ${indexPath}`);
return { title: null, frequencyMode: null };
}
if (!rawIndex || typeof rawIndex !== 'object') {
return { title: null, frequencyMode: null };
}
const titleRaw = (rawIndex as { title?: unknown }).title;
const frequencyModeRaw = (rawIndex as { frequencyMode?: unknown }).frequencyMode;
return {
title: typeof titleRaw === 'string' && titleRaw.trim().length > 0 ? titleRaw.trim() : null,
frequencyMode:
frequencyModeRaw === 'occurrence-based' || frequencyModeRaw === 'rank-based'
? frequencyModeRaw
: null,
};
}
function parsePositiveFrequencyString(value: string): number | null {
const trimmed = value.trim();
if (!trimmed) {
return null;
}
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
if (!numericPrefix) {
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) {
return null;
}
const chunks = numericPrefix.split(',');
const normalizedNumber =
chunks.length <= 1
? (chunks[0] ?? '')
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
? chunks.join('')
: (chunks[0] ?? '');
const parsed = Number.parseInt(normalizedNumber, 10);
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) {
return null;
}
return parsed;
const normalized = Math.floor(parsed);
if (!Number.isFinite(normalized) || normalized <= 0) {
return null;
}
return normalized;
}
function parsePositiveFrequencyNumber(value: unknown): number | null {
@@ -68,18 +107,32 @@ function parsePositiveFrequencyNumber(value: unknown): number | null {
return null;
}
function parseDisplayFrequencyNumber(value: unknown): number | null {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) {
return null;
}
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyNumber(value);
}
function extractFrequencyDisplayValue(meta: unknown): number | null {
if (!meta || typeof meta !== 'object') return null;
const frequency = (meta as { frequency?: unknown }).frequency;
if (!frequency || typeof frequency !== 'object') return null;
const rawValue = (frequency as { value?: unknown }).value;
const parsedRawValue = parsePositiveFrequencyNumber(rawValue);
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
const parsedDisplayValue = parsePositiveFrequencyNumber(displayValue);
const parsedDisplayValue = parseDisplayFrequencyNumber(displayValue);
if (parsedDisplayValue !== null) {
return parsedDisplayValue;
}
const rawValue = (frequency as { value?: unknown }).value;
return parsePositiveFrequencyNumber(rawValue);
return parsedRawValue;
}
function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null {
@@ -141,6 +194,15 @@ async function collectDictionaryFromPath(
log: (message: string) => void,
): Promise<Map<string, number>> {
const terms = new Map<string, number>();
const metadata = await readDictionaryMetadata(dictionaryPath, log);
if (metadata.frequencyMode === 'occurrence-based') {
log(
`Skipping occurrence-based frequency dictionary ${
metadata.title ?? dictionaryPath
}; SubMiner frequency tags require rank-based values.`,
);
return terms;
}
let fileNames: string[];
try {
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,71 @@
import type { Token } from '../../../types';
import type { LegacyVocabularyPosResolution } from './types';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
function normalizeLookupText(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
function katakanaToHiragana(text: string): string {
let normalized = '';
for (const char of text) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
continue;
}
normalized += char;
}
return normalized;
}
function toResolution(token: Token): LegacyVocabularyPosResolution {
return {
headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
partOfSpeech: deriveStoredPartOfSpeech({
partOfSpeech: token.partOfSpeech,
pos1: token.pos1,
}),
pos1: normalizeLookupText(token.pos1),
pos2: normalizeLookupText(token.pos2),
pos3: normalizeLookupText(token.pos3),
};
}
export function resolveLegacyVocabularyPosFromTokens(
lookupText: string,
tokens: Token[] | null,
): LegacyVocabularyPosResolution | null {
const normalizedLookup = normalizeLookupText(lookupText);
if (!normalizedLookup || !tokens || tokens.length === 0) {
return null;
}
const exactSurfaceMatches = tokens.filter(
(token) => normalizeLookupText(token.word) === normalizedLookup,
);
if (exactSurfaceMatches.length === 1) {
return toResolution(exactSurfaceMatches[0]!);
}
const exactHeadwordMatches = tokens.filter(
(token) => normalizeLookupText(token.headword) === normalizedLookup,
);
if (exactHeadwordMatches.length === 1) {
return toResolution(exactHeadwordMatches[0]!);
}
if (tokens.length === 1) {
return toResolution(tokens[0]!);
}
return null;
}
@@ -0,0 +1,569 @@
import type { DatabaseSync } from './sqlite';
import { finalizeSessionRecord } from './session';
import type { LifetimeRebuildSummary, SessionState } from './types';
interface TelemetryRow {
active_watched_ms: number | null;
cards_mined: number | null;
lines_seen: number | null;
tokens_seen: number | null;
}
interface VideoRow {
anime_id: number | null;
watched: number;
}
interface AnimeRow {
episodes_total: number | null;
}
function asPositiveNumber(value: number | null, fallback: number): number {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
return Math.max(0, Math.floor(value));
}
interface ExistenceRow {
count: number;
}
interface LifetimeMediaStateRow {
completed: number;
}
interface LifetimeAnimeStateRow {
episodes_completed: number;
}
interface RetainedSessionRow {
sessionId: number;
videoId: number;
startedAtMs: number;
endedAtMs: number;
lastMediaMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
seekBackwardCount: number;
mediaBufferEvents: number;
}
function hasRetainedPriorSession(
db: DatabaseSync,
videoId: number,
startedAtMs: number,
currentSessionId: number,
): boolean {
return (
Number(
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE video_id = ?
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(videoId, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count ?? 0,
) > 0
);
}
function isFirstSessionForLocalDay(
db: DatabaseSync,
currentSessionId: number,
startedAtMs: number,
): boolean {
return (
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE CAST(strftime('%s', started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
= CAST(strftime('%s', ? / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(startedAtMs, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count === 0
);
}
function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
db.exec(`
DELETE FROM imm_lifetime_anime;
DELETE FROM imm_lifetime_media;
DELETE FROM imm_lifetime_applied_sessions;
`);
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = 0,
total_active_ms = 0,
total_cards = 0,
active_days = 0,
episodes_started = 0,
episodes_completed = 0,
anime_completed = 0,
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(nowMs, nowMs);
}
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
return {
sessionId: row.sessionId,
videoId: row.videoId,
startedAtMs: row.startedAtMs,
currentLineIndex: 0,
lastWallClockMs: row.endedAtMs,
lastMediaMs: row.lastMediaMs,
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: false,
markedWatched: false,
totalWatchedMs: Math.max(0, row.totalWatchedMs),
activeWatchedMs: Math.max(0, row.activeWatchedMs),
linesSeen: Math.max(0, row.linesSeen),
tokensSeen: Math.max(0, row.tokensSeen),
cardsMined: Math.max(0, row.cardsMined),
lookupCount: Math.max(0, row.lookupCount),
lookupHits: Math.max(0, row.lookupHits),
yomitanLookupCount: Math.max(0, row.yomitanLookupCount),
pauseCount: Math.max(0, row.pauseCount),
pauseMs: Math.max(0, row.pauseMs),
seekForwardCount: Math.max(0, row.seekForwardCount),
seekBackwardCount: Math.max(0, row.seekBackwardCount),
mediaBufferEvents: Math.max(0, row.mediaBufferEvents),
};
}
function getRetainedStaleActiveSessions(db: DatabaseSync): RetainedSessionRow[] {
return db
.prepare(
`
SELECT
s.session_id AS sessionId,
s.video_id AS videoId,
s.started_at_ms AS startedAtMs,
COALESCE(t.sample_ms, s.LAST_UPDATE_DATE, s.started_at_ms) AS endedAtMs,
s.ended_media_ms AS lastMediaMs,
COALESCE(t.total_watched_ms, s.total_watched_ms, 0) AS totalWatchedMs,
COALESCE(t.active_watched_ms, s.active_watched_ms, 0) AS activeWatchedMs,
COALESCE(t.lines_seen, s.lines_seen, 0) AS linesSeen,
COALESCE(t.tokens_seen, s.tokens_seen, 0) AS tokensSeen,
COALESCE(t.cards_mined, s.cards_mined, 0) AS cardsMined,
COALESCE(t.lookup_count, s.lookup_count, 0) AS lookupCount,
COALESCE(t.lookup_hits, s.lookup_hits, 0) AS lookupHits,
COALESCE(t.yomitan_lookup_count, s.yomitan_lookup_count, 0) AS yomitanLookupCount,
COALESCE(t.pause_count, s.pause_count, 0) AS pauseCount,
COALESCE(t.pause_ms, s.pause_ms, 0) AS pauseMs,
COALESCE(t.seek_forward_count, s.seek_forward_count, 0) AS seekForwardCount,
COALESCE(t.seek_backward_count, s.seek_backward_count, 0) AS seekBackwardCount,
COALESCE(t.media_buffer_events, s.media_buffer_events, 0) AS mediaBufferEvents
FROM imm_sessions s
LEFT JOIN imm_session_telemetry t
ON t.telemetry_id = (
SELECT telemetry_id
FROM imm_session_telemetry
WHERE session_id = s.session_id
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
)
WHERE s.ended_at_ms IS NULL
ORDER BY s.started_at_ms ASC, s.session_id ASC
`,
)
.all() as RetainedSessionRow[];
}
function upsertLifetimeMedia(
db: DatabaseSync,
videoId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
completed: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_media(
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(video_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
completed = MAX(completed, excluded.completed),
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
videoId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
completed,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
function upsertLifetimeAnime(
db: DatabaseSync,
animeId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
episodesStartedDelta: number,
episodesCompletedDelta: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_anime(
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(anime_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
episodes_started = episodes_started + excluded.episodes_started,
episodes_completed = episodes_completed + excluded.episodes_completed,
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
animeId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
episodesStartedDelta,
episodesCompletedDelta,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
export function applySessionLifetimeSummary(
db: DatabaseSync,
session: SessionState,
endedAtMs: number,
): void {
const applyResult = db
.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (
?, ?, ?, ?
)
ON CONFLICT(session_id) DO NOTHING
`,
)
.run(session.sessionId, endedAtMs, Date.now(), Date.now());
if ((applyResult.changes ?? 0) <= 0) {
return;
}
const telemetry = db
.prepare(
`
SELECT
active_watched_ms,
cards_mined,
lines_seen,
tokens_seen
FROM imm_session_telemetry
WHERE session_id = ?
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
`,
)
.get(session.sessionId) as TelemetryRow | null;
const video = db
.prepare('SELECT anime_id, watched FROM imm_videos WHERE video_id = ?')
.get(session.videoId) as VideoRow | null;
const mediaLifetime =
(db
.prepare('SELECT completed FROM imm_lifetime_media WHERE video_id = ?')
.get(session.videoId) as LifetimeMediaStateRow | null | undefined) ?? null;
const animeLifetime = video?.anime_id
? ((db
.prepare('SELECT episodes_completed FROM imm_lifetime_anime WHERE anime_id = ?')
.get(video.anime_id) as LifetimeAnimeStateRow | null | undefined) ?? null)
: null;
const anime = video?.anime_id
? ((db
.prepare('SELECT episodes_total FROM imm_anime WHERE anime_id = ?')
.get(video.anime_id) as AnimeRow | null | undefined) ?? null)
: null;
const activeMs = telemetry
? asPositiveNumber(telemetry.active_watched_ms, session.activeWatchedMs)
: session.activeWatchedMs;
const cardsMined = telemetry
? asPositiveNumber(telemetry.cards_mined, session.cardsMined)
: session.cardsMined;
const linesSeen = telemetry
? asPositiveNumber(telemetry.lines_seen, session.linesSeen)
: session.linesSeen;
const tokensSeen = telemetry
? asPositiveNumber(telemetry.tokens_seen, session.tokensSeen)
: session.tokensSeen;
const watched = video?.watched ?? 0;
const isFirstSessionForVideoRun =
mediaLifetime === null &&
!hasRetainedPriorSession(db, session.videoId, session.startedAtMs, session.sessionId);
const isFirstCompletedSessionForVideoRun =
watched > 0 && Number(mediaLifetime?.completed ?? 0) <= 0;
const isFirstSessionForDay = isFirstSessionForLocalDay(
db,
session.sessionId,
session.startedAtMs,
);
const episodesCompletedBefore = Number(animeLifetime?.episodes_completed ?? 0);
const animeEpisodesTotal = anime?.episodes_total ?? null;
const animeCompletedDelta =
watched > 0 &&
isFirstCompletedSessionForVideoRun &&
animeEpisodesTotal !== null &&
animeEpisodesTotal > 0 &&
episodesCompletedBefore < animeEpisodesTotal &&
episodesCompletedBefore + 1 >= animeEpisodesTotal
? 1
: 0;
const nowMs = Date.now();
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + ?,
total_cards = total_cards + ?,
active_days = active_days + ?,
episodes_started = episodes_started + ?,
episodes_completed = episodes_completed + ?,
anime_completed = anime_completed + ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(
activeMs,
cardsMined,
isFirstSessionForDay ? 1 : 0,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
animeCompletedDelta,
nowMs,
);
upsertLifetimeMedia(
db,
session.videoId,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
watched > 0 ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
if (video?.anime_id) {
upsertLifetimeAnime(
db,
video.anime_id,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
}
}
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
const rebuiltAtMs = Date.now();
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
db.exec('BEGIN');
try {
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
}
export function reconcileStaleActiveSessions(db: DatabaseSync): number {
const sessions = getRetainedStaleActiveSessions(db);
if (sessions.length === 0) {
return 0;
}
db.exec('BEGIN');
try {
for (const session of sessions) {
const state = toRebuildSessionState(session);
finalizeSessionRecord(db, state, session.endedAtMs);
applySessionLifetimeSummary(db, state, session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return sessions.length;
}
export function shouldBackfillLifetimeSummaries(db: DatabaseSync): boolean {
const globalRow = db
.prepare('SELECT total_sessions AS totalSessions FROM imm_lifetime_global WHERE global_id = 1')
.get() as { totalSessions: number } | null;
const appliedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_lifetime_applied_sessions')
.get() as ExistenceRow | null;
const endedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_sessions WHERE ended_at_ms IS NOT NULL')
.get() as ExistenceRow | null;
const totalSessions = Number(globalRow?.totalSessions ?? 0);
const appliedSessions = Number(appliedRow?.count ?? 0);
const retainedEndedSessions = Number(endedRow?.count ?? 0);
return retainedEndedSessions > 0 && (appliedSessions === 0 || totalSessions === 0);
}
@@ -0,0 +1,200 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { Database } from './sqlite';
import {
pruneRawRetention,
pruneRollupRetention,
runOptimizeMaintenance,
toMonthKey,
} from './maintenance';
import { ensureSchema } from './storage';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-maintenance-test-'));
return path.join(dir, 'tracker.db');
}
function cleanupDbPath(dbPath: string): void {
try {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
} catch {
// best effort
}
}
test('pruneRawRetention uses session retention separately from telemetry retention', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = 90 * 86_400_000;
const staleEndedAtMs = nowMs - 40 * 86_400_000;
const keptEndedAtMs = nowMs - 5 * 86_400_000;
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, 'session-1', 1, ${staleEndedAtMs - 1_000}, ${staleEndedAtMs}, 2, ${staleEndedAtMs}, ${staleEndedAtMs}),
(2, 'session-2', 1, ${keptEndedAtMs - 1_000}, ${keptEndedAtMs}, 2, ${keptEndedAtMs}, ${keptEndedAtMs});
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, ${nowMs - 2 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}),
(2, ${nowMs - 12 * 60 * 60 * 1000}, 0, 0, ${nowMs}, ${nowMs});
`);
const result = pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 1 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const remainingSessions = db
.prepare('SELECT session_id FROM imm_sessions ORDER BY session_id')
.all() as Array<{ session_id: number }>;
const remainingTelemetry = db
.prepare('SELECT session_id FROM imm_session_telemetry ORDER BY session_id')
.all() as Array<{ session_id: number }>;
assert.equal(result.deletedTelemetryRows, 1);
assert.equal(result.deletedEndedSessions, 1);
assert.deepEqual(
remainingSessions.map((row) => row.session_id),
[2],
);
assert.deepEqual(
remainingTelemetry.map((row) => row.session_id),
[2],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('raw retention keeps rollups and rollup retention prunes them separately', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = Date.UTC(2026, 2, 16, 12, 0, 0, 0);
const oldDay = Math.floor((nowMs - 90 * 86_400_000) / 86_400_000);
const oldMonth = toMonthKey(nowMs - 400 * 86_400_000);
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'session-1', 1, ${nowMs - 90 * 86_400_000}, ${nowMs - 90 * 86_400_000 + 1_000}, 2, ${nowMs}, ${nowMs}
);
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, ${nowMs - 90 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards
) VALUES (
${oldDay}, 1, 1, 10, 1, 1, 1
);
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
${oldMonth}, 1, 1, 10, 1, 1, 1, ${nowMs}, ${nowMs}
);
`);
pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 30 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const rollupsAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupsAfterRawPrune?.total, 1);
assert.equal(monthlyAfterRawPrune?.total, 1);
const rollupPrune = pruneRollupRetention(db, nowMs, {
dailyRollupRetentionMs: 30 * 86_400_000,
monthlyRollupRetentionMs: 365 * 86_400_000,
});
const rollupsAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupPrune.deletedDailyRows, 1);
assert.equal(rollupPrune.deletedMonthlyRows, 1);
assert.equal(rollupsAfterRollupPrune?.total, 0);
assert.equal(monthlyAfterRollupPrune?.total, 0);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds sample_ms index for telemetry rollup scans', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexes = db.prepare("PRAGMA index_list('imm_session_telemetry')").all() as Array<{
name: string;
}>;
const hasSampleMsIndex = indexes.some((row) => row.name === 'idx_telemetry_sample_ms');
assert.equal(hasSampleMsIndex, true);
const indexColumns = db.prepare("PRAGMA index_info('idx_telemetry_sample_ms')").all() as Array<{
name: string;
}>;
assert.deepEqual(
indexColumns.map((column) => column.name),
['sample_ms'],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('runOptimizeMaintenance executes PRAGMA optimize', () => {
const executedSql: string[] = [];
const db = {
exec(source: string) {
executedSql.push(source);
return this;
},
} as unknown as Parameters<typeof runOptimizeMaintenance>[0];
runOptimizeMaintenance(db);
assert.deepEqual(executedSql, ['PRAGMA optimize']);
});
@@ -18,11 +18,9 @@ interface RollupTelemetryResult {
maxSampleMs: number | null;
}
interface RetentionResult {
interface RawRetentionResult {
deletedSessionEvents: number;
deletedTelemetryRows: number;
deletedDailyRows: number;
deletedMonthlyRows: number;
deletedEndedSessions: number;
}
@@ -31,20 +29,18 @@ export function toMonthKey(timestampMs: number): number {
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
}
export function pruneRetention(
export function pruneRawRetention(
db: DatabaseSync,
nowMs: number,
policy: {
eventsRetentionMs: number;
telemetryRetentionMs: number;
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
sessionsRetentionMs: number;
},
): RetentionResult {
): RawRetentionResult {
const eventCutoff = nowMs - policy.eventsRetentionMs;
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
const sessionsCutoff = nowMs - policy.sessionsRetentionMs;
const deletedSessionEvents = (
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
@@ -56,28 +52,49 @@ export function pruneRetention(
changes: number;
}
).changes;
const deletedDailyRows = (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
).changes;
const deletedMonthlyRows = (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(monthCutoff)) as { changes: number }
).changes;
const deletedEndedSessions = (
db
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
.run(telemetryCutoff) as { changes: number }
.run(sessionsCutoff) as { changes: number }
).changes;
return {
deletedSessionEvents,
deletedTelemetryRows,
deletedEndedSessions,
};
}
export function pruneRollupRetention(
db: DatabaseSync,
nowMs: number,
policy: {
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
},
): { deletedDailyRows: number; deletedMonthlyRows: number } {
const deletedDailyRows = Number.isFinite(policy.dailyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor((nowMs - policy.dailyRollupRetentionMs) / DAILY_MS)) as {
changes: number;
}
).changes
: 0;
const deletedMonthlyRows = Number.isFinite(policy.monthlyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(nowMs - policy.monthlyRollupRetentionMs)) as {
changes: number;
}
).changes
: 0;
return {
deletedDailyRows,
deletedMonthlyRows,
deletedEndedSessions,
};
}
@@ -108,49 +125,57 @@ function upsertDailyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, cards_per_hour,
tokens_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS cards_per_hour,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN COALESCE(SUM(sm.max_tokens), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS words_per_min,
END AS tokens_per_min,
CASE
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
ELSE NULL
END AS lookup_hit_rate,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards,
MAX(t.lookup_count) AS max_lookups,
MAX(t.lookup_hits) AS max_hits
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_day, s.video_id
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
cards_per_hour = excluded.cards_per_hour,
words_per_min = excluded.words_per_min,
tokens_per_min = excluded.tokens_per_min,
lookup_hit_rate = excluded.lookup_hit_rate,
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
@@ -173,29 +198,35 @@ function upsertMonthlyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_month, s.video_id
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
@@ -216,8 +247,8 @@ function getAffectedRollupGroups(
.prepare(
`
SELECT DISTINCT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id
FROM imm_session_telemetry t
JOIN imm_sessions s
@@ -292,3 +323,7 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
throw error;
}
}
export function runOptimizeMaintenance(db: DatabaseSync): void {
db.exec('PRAGMA optimize');
}
@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
import test from 'node:test';
import type { spawn as spawnFn } from 'node:child_process';
import { SOURCE_TYPE_LOCAL } from './types';
import { getLocalVideoMetadata, runFfprobe } from './metadata';
import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';
type Spawn = typeof spawnFn;
@@ -146,3 +146,83 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
assert.equal(hashFallbackMetadata.hashSha256, null);
});
test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
const seenTargets: string[] = [];
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async (target) => {
seenTargets.push(target);
return JSON.stringify({
title: 'Little Witch Academia',
season: 2,
episode: 5,
});
},
},
);
assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: 'Little Witch Academia S02E05.mkv',
source: 'guessit',
}),
});
});
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
confidence: 'high',
filename: 'Little Witch Academia S02E05.mkv',
rawTitle: 'Little Witch Academia S02E05',
source: 'fallback',
}),
});
});
test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
const parsed = await guessAnimeVideoMetadata('/tmp/[SubsPlease] Frieren - 03 (1080p).mkv', null, {
runGuessit: async () => JSON.stringify({ episode: 3 }),
});
assert.deepEqual(parsed, {
parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
parsedTitle: 'Frieren - 03 (1080p)',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: JSON.stringify({
confidence: 'low',
filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
rawTitle: 'Frieren - 03 (1080p)',
source: 'fallback',
}),
});
});
@@ -1,6 +1,13 @@
import crypto from 'node:crypto';
import { spawn as nodeSpawn } from 'node:child_process';
import * as fs from 'node:fs';
import path from 'node:path';
import { parseMediaInfo } from '../../../jimaku/utils';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from '../anilist/anilist-updater';
import {
deriveCanonicalTitle,
emptyMetadata,
@@ -8,7 +15,12 @@ import {
parseFps,
toNullableInt,
} from './reducer';
import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
import {
SOURCE_TYPE_LOCAL,
type ParsedAnimeVideoGuess,
type ProbeMetadata,
type VideoMetadata,
} from './types';
type SpawnFn = typeof nodeSpawn;
@@ -24,6 +36,21 @@ interface MetadataDeps {
fs?: FsDeps;
}
interface GuessAnimeVideoMetadataDeps {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
switch (confidence) {
case 'high':
return 1;
case 'medium':
return 0.6;
default:
return 0.2;
}
}
export async function computeSha256(
mediaPath: string,
deps: MetadataDeps = {},
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
metadataJson: null,
};
}
export async function guessAnimeVideoMetadata(
mediaPath: string | null,
mediaTitle: string | null,
deps: GuessAnimeVideoMetadataDeps = {},
): Promise<ParsedAnimeVideoGuess | null> {
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
runGuessit: deps.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
if (parsed.source === 'guessit') {
return {
parsedBasename,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: parsedBasename,
source: 'guessit',
}),
};
}
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
return {
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'fallback',
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
parseMetadataJson: JSON.stringify({
confidence: fallbackInfo.confidence,
filename: fallbackInfo.filename,
rawTitle: fallbackInfo.rawTitle,
source: 'fallback',
}),
};
}
File diff suppressed because it is too large Load Diff
+2 -11
View File
@@ -15,11 +15,11 @@ export function createInitialSessionState(
totalWatchedMs: 0,
activeWatchedMs: 0,
linesSeen: 0,
wordsSeen: 0,
tokensSeen: 0,
cardsMined: 0,
lookupCount: 0,
lookupHits: 0,
yomitanLookupCount: 0,
pauseCount: 0,
pauseMs: 0,
seekForwardCount: 0,
@@ -30,6 +30,7 @@ export function createInitialSessionState(
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: true,
markedWatched: false,
};
}
@@ -50,16 +51,6 @@ export function sanitizePayload(payload: Record<string, unknown>, maxPayloadByte
return json.length <= maxPayloadBytes ? json : JSON.stringify({ truncated: true });
}
export function calculateTextMetrics(value: string): {
words: number;
tokens: number;
} {
const words = value.split(/\s+/).filter(Boolean).length;
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
const tokens = Math.max(words, cjkCount);
return { words, tokens };
}
export function secToMs(seconds: number): number {
const coerced = Number(seconds);
if (!Number.isFinite(coerced)) return 0;
+34 -1
View File
@@ -39,8 +39,41 @@ export function finalizeSessionRecord(
SET
ended_at_ms = ?,
status = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
tokens_seen = ?,
cards_mined = ?,
lookup_count = ?,
lookup_hits = ?,
yomitan_lookup_count = ?,
pause_count = ?,
pause_ms = ?,
seek_forward_count = ?,
seek_backward_count = ?,
media_buffer_events = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
).run(
endedAtMs,
SESSION_STATUS_ENDED,
sessionState.lastMediaMs,
sessionState.totalWatchedMs,
sessionState.activeWatchedMs,
sessionState.linesSeen,
sessionState.tokensSeen,
sessionState.cardsMined,
sessionState.lookupCount,
sessionState.lookupHits,
sessionState.yomitanLookupCount,
sessionState.pauseCount,
sessionState.pauseMs,
sessionState.seekForwardCount,
sessionState.seekBackwardCount,
sessionState.mediaBufferEvents,
Date.now(),
sessionState.sessionId,
);
}
@@ -6,10 +6,15 @@ import test from 'node:test';
import { Database } from './sqlite';
import { finalizeSessionRecord, startSessionRecord } from './session';
import {
applyPragmas,
createTrackerPreparedStatements,
ensureSchema,
executeQueuedWrite,
normalizeCoverBlobBytes,
parseCoverBlobReference,
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
} from './storage';
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
@@ -46,6 +51,34 @@ function cleanupDbPath(dbPath: string): void {
// libsql keeps Windows file handles alive after close when prepared statements were used.
}
test('applyPragmas sets the SQLite tuning defaults used by immersion tracking', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
applyPragmas(db);
const journalModeRow = db.prepare('PRAGMA journal_mode').get() as {
journal_mode: string;
};
const synchronousRow = db.prepare('PRAGMA synchronous').get() as { synchronous: number };
const foreignKeysRow = db.prepare('PRAGMA foreign_keys').get() as { foreign_keys: number };
const busyTimeoutRow = db.prepare('PRAGMA busy_timeout').get() as { timeout: number };
const journalSizeLimitRow = db.prepare('PRAGMA journal_size_limit').get() as {
journal_size_limit: number;
};
assert.equal(journalModeRow.journal_mode, 'wal');
assert.equal(synchronousRow.synchronous, 1);
assert.equal(foreignKeysRow.foreign_keys, 1);
assert.equal(busyTimeoutRow.timeout, 2500);
assert.equal(journalSizeLimitRow.journal_size_limit, 67_108_864);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema creates immersion core tables', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -60,6 +93,7 @@ test('ensureSchema creates immersion core tables', () => {
const tableNames = new Set(rows.map((row) => row.name));
assert.ok(tableNames.has('imm_videos'));
assert.ok(tableNames.has('imm_anime'));
assert.ok(tableNames.has('imm_sessions'));
assert.ok(tableNames.has('imm_session_telemetry'));
assert.ok(tableNames.has('imm_session_events'));
@@ -67,7 +101,37 @@ test('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_monthly_rollups'));
assert.ok(tableNames.has('imm_words'));
assert.ok(tableNames.has('imm_kanji'));
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
assert.ok(tableNames.has('imm_rollup_state'));
assert.ok(tableNames.has('imm_cover_art_blobs'));
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const mediaArtColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_media_art)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(mediaArtColumns.has('cover_blob_hash'));
const rollupStateRow = db
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
@@ -82,6 +146,566 @@ test('ensureSchema creates immersion core tables', () => {
}
});
test('ensureSchema creates large-history performance indexes', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'index' AND name LIKE 'idx_%'`)
.all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(indexNames.has('idx_telemetry_sample_ms'));
assert.ok(indexNames.has('idx_sessions_started_at'));
assert.ok(indexNames.has('idx_sessions_ended_at'));
assert.ok(indexNames.has('idx_words_frequency'));
assert.ok(indexNames.has('idx_kanji_frequency'));
assert.ok(indexNames.has('idx_media_art_anilist_id'));
assert.ok(indexNames.has('idx_media_art_cover_url'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
`);
const insertLegacyVideo = db.prepare(`
INSERT INTO imm_videos (
video_key, canonical_title, source_type, source_path, source_url,
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E05.mkv',
'Episode 5',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E05.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E06.mkv',
'Episode 6',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E06.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
'Episode 3',
SOURCE_TYPE_LOCAL,
'/library/[SubsPlease] Frieren - 03 - Departure.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
ensureSchema(db);
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const animeRows = db
.prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
.all() as Array<{ canonical_title: string }>;
assert.deepEqual(
animeRows.map((row) => row.canonical_title),
['Frieren', 'Little Witch Academia'],
);
const littleWitchRows = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_basename,
v.parsed_season,
v.parsed_episode,
v.parser_source,
v.parser_confidence
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
ORDER BY v.video_key
`,
)
.all() as Array<{
anime_title: string;
parsed_title: string | null;
parsed_basename: string | null;
parsed_season: number | null;
parsed_episode: number | null;
parser_source: string | null;
parser_confidence: number | null;
}>;
assert.equal(littleWitchRows.length, 2);
assert.deepEqual(
littleWitchRows.map((row) => ({
animeTitle: row.anime_title,
parsedTitle: row.parsed_title,
parsedBasename: row.parsed_basename,
parsedSeason: row.parsed_season,
parsedEpisode: row.parsed_episode,
parserSource: row.parser_source,
})),
[
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
},
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
},
],
);
assert.ok(
littleWitchRows.every(
(row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
),
);
const frierenRow = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_episode,
v.parser_source
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key = ?
`,
)
.get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
anime_title: string;
parsed_title: string | null;
parsed_episode: number | null;
parser_source: string | null;
} | null;
assert.ok(frierenRow);
assert.equal(frierenRow?.anime_title, 'Frieren');
assert.equal(frierenRow?.parsed_title, 'Frieren');
assert.equal(frierenRow?.parsed_episode, 3);
assert.equal(frierenRow?.parser_source, 'fallback');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
anime_id INTEGER,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
parsed_basename TEXT,
parsed_title TEXT,
parsed_season INTEGER,
parsed_episode INTEGER,
parser_source TEXT,
parser_confidence REAL,
parse_metadata_json TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_sessions(
session_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_uuid TEXT NOT NULL UNIQUE,
video_id INTEGER NOT NULL,
started_at_ms INTEGER NOT NULL,
ended_at_ms INTEGER,
status INTEGER NOT NULL,
locale_id INTEGER,
target_lang_id INTEGER,
difficulty_tier INTEGER,
subtitle_mode INTEGER,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_session_events(
event_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id INTEGER NOT NULL,
ts_ms INTEGER NOT NULL,
event_type INTEGER NOT NULL,
line_index INTEGER,
segment_start_ms INTEGER,
segment_end_ms INTEGER,
words_delta INTEGER NOT NULL DEFAULT 0,
cards_delta INTEGER NOT NULL DEFAULT 0,
payload_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_words(
id INTEGER PRIMARY KEY AUTOINCREMENT,
headword TEXT,
word TEXT,
reading TEXT,
part_of_speech TEXT,
pos1 TEXT,
pos2 TEXT,
pos3 TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(headword, word, reading)
);
CREATE TABLE imm_kanji(
id INTEGER PRIMARY KEY AUTOINCREMENT,
kanji TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(kanji)
);
CREATE TABLE imm_rollup_state(
state_key TEXT PRIMARY KEY,
state_value INTEGER NOT NULL
);
`);
ensureSchema(db);
const tableNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`)
.all() as Array<{ name: string }>
).map((row) => row.name),
);
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy cover art blobs into the shared blob store', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
db.prepare('UPDATE imm_schema_version SET schema_version = 12').run();
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/legacy-cover-art.mkv', {
canonicalTitle: 'Legacy Cover Art',
sourcePath: '/tmp/legacy-cover-art.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const legacyBlob = Uint8Array.from([0xde, 0xad, 0xbe, 0xef]);
db.prepare(
`
INSERT INTO imm_media_art (
video_id,
anilist_id,
cover_url,
cover_blob,
cover_blob_hash,
title_romaji,
title_english,
episodes_total,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(videoId, null, null, legacyBlob, null, null, null, null, 1, 1, 1);
assert.doesNotThrow(() => ensureSchema(db));
const mediaArtRow = db
.prepare(
'SELECT cover_blob AS coverBlob, cover_blob_hash AS coverBlobHash FROM imm_media_art',
)
.get() as {
coverBlob: ArrayBuffer | Uint8Array | Buffer | null;
coverBlobHash: string | null;
} | null;
assert.ok(mediaArtRow);
assert.ok(mediaArtRow?.coverBlobHash);
assert.equal(
parseCoverBlobReference(normalizeCoverBlobBytes(mediaArtRow?.coverBlob)),
mediaArtRow?.coverBlobHash,
);
const sharedBlobRow = db
.prepare('SELECT cover_blob AS coverBlob FROM imm_cover_art_blobs WHERE blob_hash = ?')
.get(mediaArtRow?.coverBlobHash) as {
coverBlob: ArrayBuffer | Uint8Array | Buffer;
} | null;
assert.ok(sharedBlobRow);
assert.equal(normalizeCoverBlobBytes(sharedBlobRow?.coverBlob)?.toString('hex'), 'deadbeef');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
canonicalTitle: 'Episode 5',
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
canonicalTitle: 'Episode 6',
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const provisionalAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, firstVideoId, {
animeId: provisionalAnimeId,
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":5}',
});
const reusedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: ' little witch academia ',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, secondVideoId, {
animeId: reusedAnimeId,
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":6}',
});
assert.equal(reusedAnimeId, provisionalAnimeId);
const upgradedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia TV',
anilistId: 33_435,
titleRomaji: 'Little Witch Academia',
titleEnglish: 'Little Witch Academia',
titleNative: 'リトルウィッチアカデミア',
metadataJson: '{"source":"anilist"}',
});
assert.equal(upgradedAnimeId, provisionalAnimeId);
const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
anime_id: number;
normalized_title_key: string;
canonical_title: string;
anilist_id: number | null;
title_romaji: string | null;
title_english: string | null;
title_native: string | null;
metadata_json: string | null;
}>;
assert.equal(animeRows.length, 1);
assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
assert.equal(animeRows[0]?.anilist_id, 33_435);
assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
const linkedVideos = db
.prepare(
`
SELECT anime_id, parsed_title, parsed_season, parsed_episode
FROM imm_videos
WHERE video_id IN (?, ?)
ORDER BY video_id
`,
)
.all(firstVideoId, secondVideoId) as Array<{
anime_id: number | null;
parsed_title: string | null;
parsed_season: number | null;
parsed_episode: number | null;
}>;
assert.deepEqual(linkedVideos, [
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 5,
},
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 6,
},
]);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('start/finalize session updates ended_at and status', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -116,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
}
});
test('finalize session persists ended media position', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
canonicalTitle: 'Slice A Ended Media',
sourcePath: '/tmp/slice-a-ended-media.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const startedAtMs = 1_234_567_000;
const endedAtMs = startedAtMs + 8_500;
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
state.lastMediaMs = 91_000;
finalizeSessionRecord(db, state, endedAtMs);
const row = db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as {
ended_media_ms: number | null;
} | null;
assert.ok(row);
assert.equal(row?.ended_media_ms, 91_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('executeQueuedWrite inserts event and telemetry rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -139,11 +796,11 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
totalWatchedMs: 1_000,
activeWatchedMs: 900,
linesSeen: 3,
wordsSeen: 6,
tokensSeen: 6,
cardsMined: 1,
lookupCount: 2,
lookupHits: 1,
yomitanLookupCount: 0,
pauseCount: 1,
pauseMs: 50,
seekForwardCount: 0,
@@ -161,7 +818,7 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
lineIndex: 1,
segmentStartMs: 0,
segmentEndMs: 800,
wordsDelta: 2,
tokensDelta: 2,
cardsDelta: 0,
payloadJson: '{"event":"subtitle-line"}',
},
@@ -191,18 +848,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
const wordRow = db
.prepare(
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
`SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
FROM imm_words WHERE headword = ?`,
)
.get('猫') as {
headword: string;
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
first_seen: number;
last_seen: number;
} | null;
@@ -218,6 +879,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
assert.ok(wordRow);
assert.ok(kanjiRow);
assert.equal(wordRow?.frequency, 2);
assert.equal(wordRow?.part_of_speech, 'noun');
assert.equal(wordRow?.pos1, '名詞');
assert.equal(wordRow?.pos2, '一般');
assert.equal(kanjiRow?.frequency, 2);
assert.equal(wordRow?.first_seen, 5);
assert.equal(wordRow?.last_seen, 15);
@@ -228,3 +892,54 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
cleanupDbPath(dbPath);
}
});
test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'other',
'動詞',
'自立',
'',
10,
10,
);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'verb',
'動詞',
'自立',
'',
11,
12,
);
const row = db
.prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
.get('知っている') as {
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
} | null;
assert.ok(row);
assert.equal(row?.frequency, 2);
assert.equal(row?.part_of_speech, 'verb');
assert.equal(row?.pos1, '動詞');
assert.equal(row?.pos2, '自立');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
File diff suppressed because it is too large Load Diff
+392 -10
View File
@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 3;
export const SCHEMA_VERSION = 15;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -7,6 +7,7 @@ const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
export const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
export const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
export const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_SESSIONS_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MAX_PAYLOAD_BYTES = 256;
@@ -25,10 +26,14 @@ export const EVENT_SEEK_FORWARD = 5;
export const EVENT_SEEK_BACKWARD = 6;
export const EVENT_PAUSE_START = 7;
export const EVENT_PAUSE_END = 8;
export const EVENT_YOMITAN_LOOKUP = 9;
export interface ImmersionTrackerOptions {
dbPath: string;
policy?: ImmersionTrackerPolicy;
resolveLegacyVocabularyPos?: (
row: LegacyVocabularyPosRow,
) => Promise<LegacyVocabularyPosResolution | null>;
}
export interface ImmersionTrackerPolicy {
@@ -40,6 +45,7 @@ export interface ImmersionTrackerPolicy {
retention?: {
eventsDays?: number;
telemetryDays?: number;
sessionsDays?: number;
dailyRollupsDays?: number;
monthlyRollupsDays?: number;
vacuumIntervalDays?: number;
@@ -50,11 +56,11 @@ export interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
@@ -72,20 +78,22 @@ export interface SessionState extends TelemetryAccumulator {
lastPauseStartMs: number | null;
isPaused: boolean;
pendingTelemetry: boolean;
markedWatched: boolean;
}
interface QueuedTelemetryWrite {
kind: 'telemetry';
sessionId: number;
sampleMs?: number;
lastMediaMs?: number | null;
totalWatchedMs?: number;
activeWatchedMs?: number;
linesSeen?: number;
wordsSeen?: number;
tokensSeen?: number;
cardsMined?: number;
lookupCount?: number;
lookupHits?: number;
yomitanLookupCount?: number;
pauseCount?: number;
pauseMs?: number;
seekForwardCount?: number;
@@ -95,7 +103,7 @@ interface QueuedTelemetryWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -108,7 +116,7 @@ interface QueuedEventWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -118,8 +126,13 @@ interface QueuedWordWrite {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
firstSeen: number;
lastSeen: number;
frequencyRank: number | null;
}
interface QueuedKanjiWrite {
@@ -129,11 +142,44 @@ interface QueuedKanjiWrite {
lastSeen: number;
}
export interface CountedWordOccurrence {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
occurrenceCount: number;
frequencyRank: number | null;
}
export interface CountedKanjiOccurrence {
kanji: string;
occurrenceCount: number;
}
interface QueuedSubtitleLineWrite {
kind: 'subtitleLine';
sessionId: number;
videoId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
secondaryText?: string | null;
wordOccurrences: CountedWordOccurrence[];
kanjiOccurrences: CountedKanjiOccurrence[];
firstSeen: number;
lastSeen: number;
}
export type QueuedWrite =
| QueuedTelemetryWrite
| QueuedEventWrite
| QueuedWordWrite
| QueuedKanjiWrite;
| QueuedKanjiWrite
| QueuedSubtitleLineWrite;
export interface VideoMetadata {
sourceType: number;
@@ -152,18 +198,173 @@ export interface VideoMetadata {
metadataJson: string | null;
}
export interface ParsedAnimeVideoMetadata {
animeId: number | null;
parsedBasename: string | null;
parsedTitle: string | null;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: string | null;
parserConfidence: number | null;
parseMetadataJson: string | null;
}
export interface ParsedAnimeVideoGuess {
parsedBasename: string | null;
parsedTitle: string;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: 'guessit' | 'fallback';
parserConfidence: number;
parseMetadataJson: string;
}
export interface SessionSummaryQueryRow {
sessionId: number;
videoId: number | null;
canonicalTitle: string | null;
animeId: number | null;
animeTitle: string | null;
startedAtMs: number;
endedAtMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
knownWordsSeen?: number;
knownWordRate?: number;
}
export interface LifetimeGlobalRow {
totalSessions: number;
totalActiveMs: number;
totalCards: number;
activeDays: number;
episodesStarted: number;
episodesCompleted: number;
animeCompleted: number;
lastRebuiltMs: number | null;
}
export interface LifetimeAnimeRow {
animeId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
episodesStarted: number;
episodesCompleted: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface LifetimeMediaRow {
videoId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
completed: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface AppliedSessionRow {
sessionId: number;
appliedAtMs: number;
}
export interface LifetimeRebuildSummary {
appliedSessions: number;
rebuiltAtMs: number;
}
export interface VocabularyStatsRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
frequencyRank: number | null;
animeCount: number;
firstSeen: number;
lastSeen: number;
}
export interface VocabularyCleanupSummary {
scanned: number;
kept: number;
deleted: number;
repaired: number;
}
export interface LegacyVocabularyPosRow {
headword: string;
word: string;
reading: string | null;
}
export interface LegacyVocabularyPosResolution {
headword: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
}
export interface KanjiStatsRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface KanjiOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface SessionEventRow {
eventType: number;
tsMs: number;
payload: string | null;
}
export interface SessionTimelineRow {
@@ -171,7 +372,6 @@ export interface SessionTimelineRow {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
}
@@ -182,11 +382,10 @@ export interface ImmersionSessionRollupRow {
totalSessions: number;
totalActiveMin: number;
totalLinesSeen: number;
totalWordsSeen: number;
totalTokensSeen: number;
totalCards: number;
cardsPerHour: number | null;
wordsPerMin: number | null;
tokensPerMin: number | null;
lookupHitRate: number | null;
}
@@ -200,3 +399,186 @@ export interface ProbeMetadata {
bitrateKbps: number | null;
audioCodecId: number | null;
}
export interface MediaArtRow {
videoId: number;
anilistId: number | null;
coverUrl: string | null;
coverBlob: Buffer | null;
titleRomaji: string | null;
titleEnglish: string | null;
episodesTotal: number | null;
fetchedAtMs: number;
}
export interface MediaLibraryRow {
videoId: number;
canonicalTitle: string;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
lastWatchedMs: number;
hasCoverArt: number;
}
export interface MediaDetailRow {
videoId: number;
canonicalTitle: string;
animeId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
}
export interface AnimeLibraryRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
episodeCount: number;
episodesTotal: number | null;
lastWatchedMs: number;
}
export interface AnimeDetailRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
titleRomaji: string | null;
titleEnglish: string | null;
titleNative: string | null;
description: string | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
episodeCount: number;
lastWatchedMs: number;
}
export interface AnimeAnilistEntryRow {
anilistId: number;
titleRomaji: string | null;
titleEnglish: string | null;
season: number | null;
}
export interface AnimeEpisodeRow {
animeId: number;
videoId: number;
canonicalTitle: string;
parsedTitle: string | null;
season: number | null;
episode: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalYomitanLookupCount: number;
lastWatchedMs: number;
}
export interface StreakCalendarRow {
epochDay: number;
totalActiveMin: number;
}
export interface AnimeWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
frequency: number;
}
export interface EpisodesPerDayRow {
epochDay: number;
episodeCount: number;
}
export interface NewAnimePerDayRow {
epochDay: number;
newAnimeCount: number;
}
export interface WatchTimePerAnimeRow {
epochDay: number;
animeId: number;
animeTitle: string;
totalActiveMin: number;
}
export interface WordDetailRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface SimilarWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface KanjiDetailRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface KanjiAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface KanjiWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface EpisodeCardEventRow {
eventId: number;
sessionId: number;
tsMs: number;
cardsDelta: number;
noteIds: number[];
}
+4 -1
View File
@@ -29,7 +29,10 @@ export {
} from './startup';
export { openYomitanSettingsWindow } from './yomitan-settings';
export { createTokenizerDepsRuntime, tokenizeSubtitle } from './tokenizer';
export { clearYomitanParserCachesForWindow } from './tokenizer/yomitan-parser-runtime';
export {
addYomitanNoteViaSearch,
clearYomitanParserCachesForWindow,
} from './tokenizer/yomitan-parser-runtime';
export {
deleteYomitanDictionaryByTitle,
getYomitanDictionaryInfo,
+284 -83
View File
@@ -1,7 +1,7 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { createIpcDepsRuntime, registerIpcHandlers } from './ipc';
import { createIpcDepsRuntime, registerIpcHandlers, type IpcServiceDeps } from './ipc';
import { IPC_CHANNELS } from '../../shared/ipc/contracts';
interface FakeIpcRegistrar {
@@ -77,6 +77,90 @@ function createControllerConfigFixture() {
};
}
function createRegisterIpcDeps(overrides: Partial<IpcServiceDeps> = {}): IpcServiceDeps {
return {
onOverlayModalClosed: () => {},
openYomitanSettings: () => {},
quitApp: () => {},
toggleDevTools: () => {},
getVisibleOverlayVisibility: () => false,
toggleVisibleOverlay: () => {},
tokenizeCurrentSubtitle: async () => null,
getCurrentSubtitleRaw: () => '',
getCurrentSubtitleAss: () => '',
getPlaybackPaused: () => false,
getSubtitlePosition: () => null,
getSubtitleStyle: () => null,
saveSubtitlePosition: () => {},
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
setMecabEnabled: () => {},
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async () => {},
getSecondarySubMode: () => 'hover',
getCurrentSecondarySub: () => '',
focusMainWindow: () => {},
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
getAnkiConnectStatus: () => false,
getRuntimeOptions: () => [],
setRuntimeOption: () => ({ ok: true }),
cycleRuntimeOption: () => ({ ok: true }),
reportOverlayContentBounds: () => {},
getAnilistStatus: () => ({}),
clearAnilistToken: () => {},
openAnilistSetup: () => {},
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
immersionTracker: null,
...overrides,
};
}
function createFakeImmersionTracker(
overrides: Partial<NonNullable<IpcServiceDeps['immersionTracker']>> = {},
): NonNullable<IpcServiceDeps['immersionTracker']> {
return {
recordYomitanLookup: () => {},
getSessionSummaries: async () => [],
getDailyRollups: async () => [],
getMonthlyRollups: async () => [],
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalActiveMin: 0,
totalCards: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async () => [],
getSessionEvents: async () => [],
getVocabularyStats: async () => [],
getKanjiStats: async () => [],
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
...overrides,
};
}
test('createIpcDepsRuntime wires AniList handlers', async () => {
const calls: string[] = [];
const deps = createIpcDepsRuntime({
@@ -97,6 +181,8 @@ test('createIpcDepsRuntime wires AniList handlers', async () => {
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: () => {},
saveControllerPreference: () => {},
@@ -164,6 +250,8 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: () => {},
saveControllerPreference: () => {},
@@ -232,6 +320,194 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
);
});
test('registerIpcHandlers forwards yomitan lookup tracking commands to immersion tracker', () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: string[] = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: createFakeImmersionTracker({
recordYomitanLookup: () => {
calls.push('lookup');
},
}),
}),
registrar,
);
const handler = handlers.on.get(IPC_CHANNELS.command.recordYomitanLookup);
assert.equal(typeof handler, 'function');
handler?.({}, null);
assert.deepEqual(calls, ['lookup']);
});
test('registerIpcHandlers returns empty stats overview shape without a tracker', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
registerIpcHandlers(createRegisterIpcDeps(), registrar);
const overviewHandler = handlers.handle.get(IPC_CHANNELS.request.statsGetOverview);
assert.ok(overviewHandler);
assert.deepEqual(await overviewHandler!({}), {
sessions: [],
rollups: [],
hints: {
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
},
});
});
test('registerIpcHandlers validates and clamps stats request limits', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: Array<[string, number, number?]> = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: {
recordYomitanLookup: () => {},
getSessionSummaries: async (limit = 0) => {
calls.push(['sessions', limit]);
return [];
},
getDailyRollups: async (limit = 0) => {
calls.push(['daily', limit]);
return [];
},
getMonthlyRollups: async (limit = 0) => {
calls.push(['monthly', limit]);
return [];
},
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async (sessionId: number, limit = 0) => {
calls.push(['timeline', limit, sessionId]);
return [];
},
getSessionEvents: async (sessionId: number, limit = 0) => {
calls.push(['events', limit, sessionId]);
return [];
},
getVocabularyStats: async (limit = 0) => {
calls.push(['vocabulary', limit]);
return [];
},
getKanjiStats: async (limit = 0) => {
calls.push(['kanji', limit]);
return [];
},
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
},
}),
registrar,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetDailyRollups)!({}, -1);
await handlers.handle.get(IPC_CHANNELS.request.statsGetMonthlyRollups)!(
{},
Number.POSITIVE_INFINITY,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessions)!({}, 9999);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, 12.5);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionEvents)!({}, 7, 0);
await handlers.handle.get(IPC_CHANNELS.request.statsGetVocabulary)!({}, 1000);
await handlers.handle.get(IPC_CHANNELS.request.statsGetKanji)!({}, NaN);
assert.deepEqual(calls, [
['daily', 60],
['monthly', 24],
['sessions', 500],
['timeline', 200, 7],
['events', 500, 7],
['vocabulary', 500],
['kanji', 100],
]);
});
test('registerIpcHandlers requests the full timeline when no limit is provided', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const calls: Array<[string, number | undefined, number]> = [];
registerIpcHandlers(
createRegisterIpcDeps({
immersionTracker: {
recordYomitanLookup: () => {},
getSessionSummaries: async () => [],
getDailyRollups: async () => [],
getMonthlyRollups: async () => [],
getQueryHints: async () => ({
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalCards: 0,
totalActiveMin: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
}),
getSessionTimeline: async (sessionId: number, limit?: number) => {
calls.push(['timeline', limit, sessionId]);
return [];
},
getSessionEvents: async () => [],
getVocabularyStats: async () => [],
getKanjiStats: async () => [],
getMediaLibrary: async () => [],
getMediaDetail: async () => null,
getMediaSessions: async () => [],
getMediaDailyRollups: async () => [],
getCoverArt: async () => null,
markActiveVideoWatched: async () => false,
},
}),
registrar,
);
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, undefined);
assert.deepEqual(calls, [['timeline', undefined, 7]]);
});
test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const saves: unknown[] = [];
@@ -265,10 +541,10 @@ test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: (update) => {
controllerSaves.push(update);
},
saveControllerConfig: () => {},
saveControllerPreference: (update) => {
controllerSaves.push(update);
},
@@ -329,6 +605,8 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async (update) => {
@@ -376,85 +654,6 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
]);
});
test('registerIpcHandlers awaits saveControllerConfig through request-response IPC', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
const controllerConfigSaves: unknown[] = [];
registerIpcHandlers(
{
onOverlayModalClosed: () => {},
openYomitanSettings: () => {},
quitApp: () => {},
toggleDevTools: () => {},
getVisibleOverlayVisibility: () => false,
toggleVisibleOverlay: () => {},
tokenizeCurrentSubtitle: async () => null,
getCurrentSubtitleRaw: () => '',
getCurrentSubtitleAss: () => '',
getPlaybackPaused: () => false,
getSubtitlePosition: () => null,
getSubtitleStyle: () => null,
saveSubtitlePosition: () => {},
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
setMecabEnabled: () => {},
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async (update) => {
await Promise.resolve();
controllerConfigSaves.push(update);
},
saveControllerPreference: async () => {},
getSecondarySubMode: () => 'hover',
getCurrentSecondarySub: () => '',
focusMainWindow: () => {},
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
getAnkiConnectStatus: () => false,
getRuntimeOptions: () => [],
setRuntimeOption: () => ({ ok: true }),
cycleRuntimeOption: () => ({ ok: true }),
reportOverlayContentBounds: () => {},
getAnilistStatus: () => ({}),
clearAnilistToken: () => {},
openAnilistSetup: () => {},
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
},
registrar,
);
const saveHandler = handlers.handle.get(IPC_CHANNELS.command.saveControllerConfig);
assert.ok(saveHandler);
await assert.rejects(
async () => {
await saveHandler!({}, { bindings: { toggleLookup: { kind: 'button', buttonIndex: -1 } } });
},
/Invalid controller config payload/,
);
await saveHandler!({}, {
preferredGamepadId: 'pad-2',
bindings: {
toggleLookup: { kind: 'button', buttonIndex: 11 },
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
},
});
assert.deepEqual(controllerConfigSaves, [
{
preferredGamepadId: 'pad-2',
bindings: {
toggleLookup: { kind: 'button', buttonIndex: 11 },
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
},
},
]);
});
test('registerIpcHandlers rejects malformed controller preference payloads', async () => {
const { registrar, handlers } = createFakeIpcRegistrar();
registerIpcHandlers(
@@ -477,6 +676,8 @@ test('registerIpcHandlers rejects malformed controller preference payloads', asy
handleMpvCommand: () => {},
getKeybindings: () => [],
getConfiguredShortcuts: () => ({}),
getStatsToggleKey: () => 'Backquote',
getMarkWatchedKey: () => 'KeyW',
getControllerConfig: () => createControllerConfigFixture(),
saveControllerConfig: async () => {},
saveControllerPreference: async () => {},
+188
View File
@@ -50,6 +50,8 @@ export interface IpcServiceDeps {
handleMpvCommand: (command: Array<string | number>) => void;
getKeybindings: () => unknown;
getConfiguredShortcuts: () => unknown;
getStatsToggleKey: () => string;
getMarkWatchedKey: () => string;
getControllerConfig: () => ResolvedControllerConfig;
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
@@ -68,6 +70,39 @@ export interface IpcServiceDeps {
getAnilistQueueStatus: () => unknown;
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
immersionTracker?: {
recordYomitanLookup: () => void;
getSessionSummaries: (limit?: number) => Promise<unknown>;
getDailyRollups: (limit?: number) => Promise<unknown>;
getMonthlyRollups: (limit?: number) => Promise<unknown>;
getQueryHints: () => Promise<{
totalSessions: number;
activeSessions: number;
episodesToday: number;
activeAnimeCount: number;
totalActiveMin: number;
totalCards: number;
activeDays: number;
totalEpisodesWatched: number;
totalAnimeCompleted: number;
totalTokensSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
newWordsToday: number;
newWordsThisWeek: number;
}>;
getSessionTimeline: (sessionId: number, limit?: number) => Promise<unknown>;
getSessionEvents: (sessionId: number, limit?: number) => Promise<unknown>;
getVocabularyStats: (limit?: number) => Promise<unknown>;
getKanjiStats: (limit?: number) => Promise<unknown>;
getMediaLibrary: () => Promise<unknown>;
getMediaDetail: (videoId: number) => Promise<unknown>;
getMediaSessions: (videoId: number, limit?: number) => Promise<unknown>;
getMediaDailyRollups: (videoId: number, limit?: number) => Promise<unknown>;
getCoverArt: (videoId: number) => Promise<unknown>;
markActiveVideoWatched: () => Promise<boolean>;
} | null;
}
interface WindowLike {
@@ -116,6 +151,8 @@ export interface IpcDepsRuntimeOptions {
handleMpvCommand: (command: Array<string | number>) => void;
getKeybindings: () => unknown;
getConfiguredShortcuts: () => unknown;
getStatsToggleKey: () => string;
getMarkWatchedKey: () => string;
getControllerConfig: () => ResolvedControllerConfig;
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
@@ -134,6 +171,7 @@ export interface IpcDepsRuntimeOptions {
getAnilistQueueStatus: () => unknown;
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
getImmersionTracker?: () => IpcServiceDeps['immersionTracker'];
}
export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcServiceDeps {
@@ -170,6 +208,8 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
handleMpvCommand: options.handleMpvCommand,
getKeybindings: options.getKeybindings,
getConfiguredShortcuts: options.getConfiguredShortcuts,
getStatsToggleKey: options.getStatsToggleKey,
getMarkWatchedKey: options.getMarkWatchedKey,
getControllerConfig: options.getControllerConfig,
saveControllerConfig: options.saveControllerConfig,
saveControllerPreference: options.saveControllerPreference,
@@ -192,10 +232,31 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
getAnilistQueueStatus: options.getAnilistQueueStatus,
retryAnilistQueueNow: options.retryAnilistQueueNow,
appendClipboardVideoToQueue: options.appendClipboardVideoToQueue,
get immersionTracker() {
return options.getImmersionTracker?.() ?? null;
},
};
}
export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar = ipcMain): void {
const parsePositiveIntLimit = (
value: unknown,
defaultValue: number,
maxValue: number,
): number => {
if (!Number.isInteger(value) || (value as number) < 1) {
return defaultValue;
}
return Math.min(value as number, maxValue);
};
const parsePositiveInteger = (value: unknown): number | null => {
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
return null;
}
return value;
};
ipc.on(
IPC_CHANNELS.command.setIgnoreMouseEvents,
(event: unknown, ignore: unknown, options: unknown = {}) => {
@@ -224,6 +285,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
deps.openYomitanSettings();
});
ipc.on(IPC_CHANNELS.command.recordYomitanLookup, () => {
deps.immersionTracker?.recordYomitanLookup();
});
ipc.handle(IPC_CHANNELS.command.markActiveVideoWatched, async () => {
return (await deps.immersionTracker?.markActiveVideoWatched()) ?? false;
});
ipc.on(IPC_CHANNELS.command.quitApp, () => {
deps.quitApp();
});
@@ -312,6 +381,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
return deps.getConfiguredShortcuts();
});
ipc.handle(IPC_CHANNELS.request.getStatsToggleKey, () => {
return deps.getStatsToggleKey();
});
ipc.handle(IPC_CHANNELS.request.getMarkWatchedKey, () => {
return deps.getMarkWatchedKey();
});
ipc.handle(IPC_CHANNELS.request.getControllerConfig, () => {
return deps.getControllerConfig();
});
@@ -397,4 +474,115 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
ipc.handle(IPC_CHANNELS.request.appendClipboardVideoToQueue, () => {
return deps.appendClipboardVideoToQueue();
});
// Stats request handlers
ipc.handle(IPC_CHANNELS.request.statsGetOverview, async () => {
const tracker = deps.immersionTracker;
if (!tracker) {
return {
sessions: [],
rollups: [],
hints: {
totalSessions: 0,
activeSessions: 0,
episodesToday: 0,
activeAnimeCount: 0,
totalActiveMin: 0,
totalCards: 0,
activeDays: 0,
totalEpisodesWatched: 0,
totalAnimeCompleted: 0,
totalTokensSeen: 0,
totalLookupCount: 0,
totalLookupHits: 0,
totalYomitanLookupCount: 0,
newWordsToday: 0,
newWordsThisWeek: 0,
},
};
}
const [sessions, rollups, hints] = await Promise.all([
tracker.getSessionSummaries(5),
tracker.getDailyRollups(14),
tracker.getQueryHints(),
]);
return { sessions, rollups, hints };
});
ipc.handle(IPC_CHANNELS.request.statsGetDailyRollups, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 60, 500);
return deps.immersionTracker?.getDailyRollups(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMonthlyRollups, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 24, 120);
return deps.immersionTracker?.getMonthlyRollups(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetSessions, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 50, 500);
return deps.immersionTracker?.getSessionSummaries(parsedLimit) ?? [];
});
ipc.handle(
IPC_CHANNELS.request.statsGetSessionTimeline,
async (_event, sessionId: unknown, limit: unknown) => {
const parsedSessionId = parsePositiveInteger(sessionId);
if (parsedSessionId === null) return [];
const parsedLimit = limit === undefined ? undefined : parsePositiveIntLimit(limit, 200, 1000);
return deps.immersionTracker?.getSessionTimeline(parsedSessionId, parsedLimit) ?? [];
},
);
ipc.handle(
IPC_CHANNELS.request.statsGetSessionEvents,
async (_event, sessionId: unknown, limit: unknown) => {
const parsedSessionId = parsePositiveInteger(sessionId);
if (parsedSessionId === null) return [];
const parsedLimit = parsePositiveIntLimit(limit, 500, 1000);
return deps.immersionTracker?.getSessionEvents(parsedSessionId, parsedLimit) ?? [];
},
);
ipc.handle(IPC_CHANNELS.request.statsGetVocabulary, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getVocabularyStats(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetKanji, async (_event, limit: unknown) => {
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getKanjiStats(parsedLimit) ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMediaLibrary, async () => {
return deps.immersionTracker?.getMediaLibrary() ?? [];
});
ipc.handle(IPC_CHANNELS.request.statsGetMediaDetail, async (_event, videoId: unknown) => {
if (typeof videoId !== 'number') return null;
return deps.immersionTracker?.getMediaDetail(videoId) ?? null;
});
ipc.handle(
IPC_CHANNELS.request.statsGetMediaSessions,
async (_event, videoId: unknown, limit: unknown) => {
if (typeof videoId !== 'number') return [];
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
return deps.immersionTracker?.getMediaSessions(videoId, parsedLimit) ?? [];
},
);
ipc.handle(
IPC_CHANNELS.request.statsGetMediaDailyRollups,
async (_event, videoId: unknown, limit: unknown) => {
if (typeof videoId !== 'number') return [];
const parsedLimit = parsePositiveIntLimit(limit, 90, 500);
return deps.immersionTracker?.getMediaDailyRollups(videoId, parsedLimit) ?? [];
},
);
ipc.handle(IPC_CHANNELS.request.statsGetMediaCover, async (_event, videoId: unknown) => {
if (typeof videoId !== 'number') return null;
return deps.immersionTracker?.getCoverArt(videoId) ?? null;
});
}
+3
View File
@@ -59,9 +59,12 @@ const MPV_SUBTITLE_PROPERTY_OBSERVATIONS: string[] = [
'sub-ass-override',
'sub-use-margins',
'pause',
'duration',
'media-title',
'secondary-sub-visibility',
'sub-visibility',
'sid',
'track-list',
];
const MPV_INITIAL_PROPERTY_REQUESTS: Array<MpvProtocolCommand> = [
+18
View File
@@ -60,6 +60,8 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
emitSubtitleAssChange: (payload) => state.events.push(payload),
emitSubtitleTiming: (payload) => state.events.push(payload),
emitSecondarySubtitleChange: (payload) => state.events.push(payload),
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
getCurrentSubText: () => state.subText,
setCurrentSubText: (text) => {
state.subText = text;
@@ -87,6 +89,7 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
getPauseAtTime: () => null,
setPauseAtTime: () => {},
emitTimePosChange: () => {},
emitDurationChange: () => {},
emitPauseChange: () => {},
autoLoadSecondarySubTrack: () => {},
setCurrentVideoPath: () => {},
@@ -119,6 +122,21 @@ test('dispatchMpvProtocolMessage emits subtitle text on property change', async
assert.deepEqual(state.events, [{ text: '字幕', isOverlayVisible: false }]);
});
test('dispatchMpvProtocolMessage emits subtitle track changes', async () => {
const { deps, state } = createDeps({
emitSubtitleTrackChange: (payload) => state.events.push(payload),
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
});
await dispatchMpvProtocolMessage({ event: 'property-change', name: 'sid', data: '3' }, deps);
await dispatchMpvProtocolMessage(
{ event: 'property-change', name: 'track-list', data: [{ type: 'sub', id: 3 }] },
deps,
);
assert.deepEqual(state.events, [{ sid: 3 }, { trackList: [{ type: 'sub', id: 3 }] }]);
});
test('dispatchMpvProtocolMessage enforces sub-visibility hidden when overlay suppression is enabled', async () => {
const { deps, state } = createDeps({
isVisibleOverlayVisible: () => true,
+20
View File
@@ -52,6 +52,8 @@ export interface MpvProtocolHandleMessageDeps {
emitSubtitleAssChange: (payload: { text: string }) => void;
emitSubtitleTiming: (payload: { text: string; start: number; end: number }) => void;
emitSecondarySubtitleChange: (payload: { text: string }) => void;
emitSubtitleTrackChange: (payload: { sid: number | null }) => void;
emitSubtitleTrackListChange: (payload: { trackList: unknown[] | null }) => void;
getCurrentSubText: () => string;
setCurrentSubText: (text: string) => void;
setCurrentSubStart: (value: number) => void;
@@ -61,6 +63,7 @@ export interface MpvProtocolHandleMessageDeps {
emitMediaPathChange: (payload: { path: string }) => void;
emitMediaTitleChange: (payload: { title: string | null }) => void;
emitTimePosChange: (payload: { time: number }) => void;
emitDurationChange: (payload: { duration: number }) => void;
emitPauseChange: (payload: { paused: boolean }) => void;
emitSubtitleMetricsChange: (payload: Partial<MpvSubtitleRenderMetrics>) => void;
setCurrentSecondarySubText: (text: string) => void;
@@ -159,6 +162,18 @@ export async function dispatchMpvProtocolMessage(
const nextSubText = (msg.data as string) || '';
deps.setCurrentSecondarySubText(nextSubText);
deps.emitSecondarySubtitleChange({ text: nextSubText });
} else if (msg.name === 'sid') {
const sid =
typeof msg.data === 'number'
? msg.data
: typeof msg.data === 'string'
? Number(msg.data)
: null;
deps.emitSubtitleTrackChange({ sid: sid !== null && Number.isFinite(sid) ? sid : null });
} else if (msg.name === 'track-list') {
deps.emitSubtitleTrackListChange({
trackList: Array.isArray(msg.data) ? (msg.data as unknown[]) : null,
});
} else if (msg.name === 'aid') {
deps.setCurrentAudioTrackId(typeof msg.data === 'number' ? (msg.data as number) : null);
deps.syncCurrentAudioStreamIndex();
@@ -172,6 +187,11 @@ export async function dispatchMpvProtocolMessage(
deps.setPauseAtTime(null);
deps.sendCommand({ command: ['set_property', 'pause', true] });
}
} else if (msg.name === 'duration') {
const duration = typeof msg.data === 'number' ? msg.data : 0;
if (duration > 0) {
deps.emitDurationChange({ duration });
}
} else if (msg.name === 'pause') {
deps.emitPauseChange({ paused: asBoolean(msg.data, false) });
} else if (msg.name === 'media-title') {
+12
View File
@@ -115,8 +115,11 @@ export interface MpvIpcClientEventMap {
'subtitle-ass-change': { text: string };
'subtitle-timing': { text: string; start: number; end: number };
'time-pos-change': { time: number };
'duration-change': { duration: number };
'pause-change': { paused: boolean };
'secondary-subtitle-change': { text: string };
'subtitle-track-change': { sid: number | null };
'subtitle-track-list-change': { trackList: unknown[] | null };
'media-path-change': { path: string };
'media-title-change': { title: string | null };
'subtitle-metrics-change': { patch: Partial<MpvSubtitleRenderMetrics> };
@@ -314,6 +317,9 @@ export class MpvIpcClient implements MpvClient {
emitTimePosChange: (payload) => {
this.emit('time-pos-change', payload);
},
emitDurationChange: (payload) => {
this.emit('duration-change', payload);
},
emitPauseChange: (payload) => {
this.playbackPaused = payload.paused;
this.emit('pause-change', payload);
@@ -321,6 +327,12 @@ export class MpvIpcClient implements MpvClient {
emitSecondarySubtitleChange: (payload) => {
this.emit('secondary-subtitle-change', payload);
},
emitSubtitleTrackChange: (payload) => {
this.emit('subtitle-track-change', payload);
},
emitSubtitleTrackListChange: (payload) => {
this.emit('subtitle-track-list-change', payload);
},
getCurrentSubText: () => this.currentSubText,
setCurrentSubText: (text: string) => {
this.currentSubText = text;
@@ -109,6 +109,60 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayRuntime can skip starting Anki integration transport', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;
let setIntegrationCalls = 0;
initializeOverlayRuntime({
backendOverride: null,
createMainWindow: () => {},
registerGlobalShortcuts: () => {},
updateVisibleOverlayBounds: () => {},
isVisibleOverlayVisible: () => false,
updateVisibleOverlayVisibility: () => {},
getOverlayWindows: () => [],
syncOverlayShortcuts: () => {},
setWindowTracker: () => {},
getMpvSocketPath: () => '/tmp/mpv.sock',
createWindowTracker: () => null,
getResolvedConfig: () => ({
ankiConnect: { enabled: true } as never,
}),
getSubtitleTimingTracker: () => ({}),
getMpvClient: () => ({
send: () => {},
}),
getRuntimeOptionsManager: () => ({
getEffectiveAnkiConnectConfig: (config) => config as never,
}),
createAnkiIntegration: () => {
createdIntegrations += 1;
return {
start: () => {
startedIntegrations += 1;
},
};
},
setAnkiIntegration: () => {
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 7,
deleteNoteId: 8,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
shouldStartAnkiIntegration: () => false,
});
assert.equal(createdIntegrations, 1);
assert.equal(startedIntegrations, 0);
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayRuntime merges shared ai config with Anki overrides', () => {
initializeOverlayRuntime({
backendOverride: null,
@@ -213,3 +267,49 @@ test('initializeOverlayRuntime re-syncs overlay shortcuts when tracker focus cha
tracker.onWindowFocusChange?.(true);
assert.equal(syncCalls, 1);
});
test('initializeOverlayRuntime refreshes visible overlay when tracker focus changes while overlay is shown', () => {
let visibilityRefreshCalls = 0;
const tracker = {
onGeometryChange: null as ((...args: unknown[]) => void) | null,
onWindowFound: null as ((...args: unknown[]) => void) | null,
onWindowLost: null as (() => void) | null,
onWindowFocusChange: null as ((focused: boolean) => void) | null,
start: () => {},
};
initializeOverlayRuntime({
backendOverride: null,
createMainWindow: () => {},
registerGlobalShortcuts: () => {},
updateVisibleOverlayBounds: () => {},
isVisibleOverlayVisible: () => true,
updateVisibleOverlayVisibility: () => {
visibilityRefreshCalls += 1;
},
getOverlayWindows: () => [],
syncOverlayShortcuts: () => {},
setWindowTracker: () => {},
getMpvSocketPath: () => '/tmp/mpv.sock',
createWindowTracker: () => tracker as never,
getResolvedConfig: () => ({
ankiConnect: { enabled: false } as never,
}),
getSubtitleTimingTracker: () => null,
getMpvClient: () => null,
getRuntimeOptionsManager: () => null,
setAnkiIntegration: () => {},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 1,
deleteNoteId: 2,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
tracker.onWindowFocusChange?.(true);
assert.equal(visibilityRefreshCalls, 2);
});
+7 -4
View File
@@ -75,6 +75,7 @@ export function initializeOverlayRuntime(options: {
data: KikuFieldGroupingRequestData,
) => Promise<KikuFieldGroupingChoice>;
getKnownWordCacheStatePath: () => string;
shouldStartAnkiIntegration?: () => boolean;
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
}): void {
options.createMainWindow();
@@ -90,9 +91,6 @@ export function initializeOverlayRuntime(options: {
windowTracker.onGeometryChange = (geometry: WindowGeometry) => {
options.updateVisibleOverlayBounds(geometry);
};
windowTracker.onTargetWindowFocusChange = () => {
options.syncOverlayShortcuts();
};
windowTracker.onWindowFound = (geometry: WindowGeometry) => {
options.updateVisibleOverlayBounds(geometry);
if (options.isVisibleOverlayVisible()) {
@@ -106,6 +104,9 @@ export function initializeOverlayRuntime(options: {
options.syncOverlayShortcuts();
};
windowTracker.onWindowFocusChange = () => {
if (options.isVisibleOverlayVisible()) {
options.updateVisibleOverlayVisibility();
}
options.syncOverlayShortcuts();
};
windowTracker.start();
@@ -135,7 +136,9 @@ export function initializeOverlayRuntime(options: {
createFieldGroupingCallback: options.createFieldGroupingCallback,
knownWordCacheStatePath: options.getKnownWordCacheStatePath(),
});
integration.start();
if (options.shouldStartAnkiIntegration?.() !== false) {
integration.start();
}
options.setAnkiIntegration(integration);
}
+151 -1
View File
@@ -200,6 +200,81 @@ test('Windows visible overlay stays click-through and does not steal focus while
assert.ok(!calls.includes('focus'));
});
test('macOS tracked visible overlay stays visible without passively stealing focus', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: tracker as never,
trackerNotReadyWarningShown: false,
setTrackerNotReadyWarningShown: () => {},
updateVisibleOverlayBounds: () => {
calls.push('update-bounds');
},
ensureOverlayWindowLevel: () => {
calls.push('ensure-level');
},
syncPrimaryOverlayWindowLayer: () => {
calls.push('sync-layer');
},
enforceOverlayLayerOrder: () => {
calls.push('enforce-order');
},
syncOverlayShortcuts: () => {
calls.push('sync-shortcuts');
},
isMacOSPlatform: true,
isWindowsPlatform: false,
} as never);
assert.ok(calls.includes('mouse-ignore:false:plain'));
assert.ok(calls.includes('show'));
assert.ok(!calls.includes('focus'));
});
test('forced mouse passthrough keeps macOS tracked overlay passive while visible', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: tracker as never,
trackerNotReadyWarningShown: false,
setTrackerNotReadyWarningShown: () => {},
updateVisibleOverlayBounds: () => {
calls.push('update-bounds');
},
ensureOverlayWindowLevel: () => {
calls.push('ensure-level');
},
syncPrimaryOverlayWindowLayer: () => {
calls.push('sync-layer');
},
enforceOverlayLayerOrder: () => {
calls.push('enforce-order');
},
syncOverlayShortcuts: () => {
calls.push('sync-shortcuts');
},
isMacOSPlatform: true,
isWindowsPlatform: false,
forceMousePassthrough: true,
} as never);
assert.ok(calls.includes('mouse-ignore:true:forward'));
assert.ok(calls.includes('show'));
assert.ok(!calls.includes('focus'));
});
test('Windows keeps visible overlay hidden while tracker is not ready', () => {
const { window, calls } = createMainWindowRecorder();
let trackerWarning = false;
@@ -283,6 +358,59 @@ test('macOS keeps visible overlay hidden while tracker is not initialized yet',
assert.ok(!calls.includes('update-bounds'));
});
test('macOS suppresses immediate repeat loading OSD after tracker recovery until cooldown expires', () => {
const { window } = createMainWindowRecorder();
const osdMessages: string[] = [];
let trackerWarning = false;
let lastLoadingOsdAtMs: number | null = null;
let nowMs = 1_000;
const hiddenTracker: WindowTrackerStub = {
isTracking: () => false,
getGeometry: () => null,
};
const trackedTracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
const run = (windowTracker: WindowTrackerStub) =>
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
mainWindow: window as never,
windowTracker: windowTracker as never,
trackerNotReadyWarningShown: trackerWarning,
setTrackerNotReadyWarningShown: (shown: boolean) => {
trackerWarning = shown;
},
updateVisibleOverlayBounds: () => {},
ensureOverlayWindowLevel: () => {},
syncPrimaryOverlayWindowLayer: () => {},
enforceOverlayLayerOrder: () => {},
syncOverlayShortcuts: () => {},
isMacOSPlatform: true,
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
} as never);
run(hiddenTracker);
run(trackedTracker);
nowMs = 2_000;
run(hiddenTracker);
run(trackedTracker);
nowMs = 6_500;
run(hiddenTracker);
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);
});
test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly', () => {
const calls: string[] = [];
setVisibleOverlayVisible({
@@ -298,10 +426,12 @@ test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly'
assert.deepEqual(calls, ['state:true', 'update']);
});
test('macOS loading OSD can show again after overlay is hidden and retried', () => {
test('macOS explicit hide resets loading OSD suppression before retry', () => {
const { window, calls } = createMainWindowRecorder();
const osdMessages: string[] = [];
let trackerWarning = false;
let lastLoadingOsdAtMs: number | null = null;
let nowMs = 1_000;
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
@@ -331,8 +461,17 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
nowMs = 1_500;
updateVisibleOverlayVisibility({
visibleOverlayVisible: false,
mainWindow: window as never,
@@ -349,6 +488,9 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
syncOverlayShortcuts: () => {},
isMacOSPlatform: true,
showOverlayLoadingOsd: () => {},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
updateVisibleOverlayVisibility({
@@ -379,6 +521,14 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
showOverlayLoadingOsd: (message: string) => {
osdMessages.push(message);
},
shouldShowOverlayLoadingOsd: () =>
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
markOverlayLoadingOsdShown: () => {
lastLoadingOsdAtMs = nowMs;
},
resetOverlayLoadingOsdSuppression: () => {
lastLoadingOsdAtMs = null;
},
} as never);
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);
+21 -8
View File
@@ -4,6 +4,7 @@ import { WindowGeometry } from '../../types';
export function updateVisibleOverlayVisibility(args: {
visibleOverlayVisible: boolean;
forceMousePassthrough?: boolean;
mainWindow: BrowserWindow | null;
windowTracker: BaseWindowTracker | null;
trackerNotReadyWarningShown: boolean;
@@ -16,6 +17,9 @@ export function updateVisibleOverlayVisibility(args: {
isMacOSPlatform?: boolean;
isWindowsPlatform?: boolean;
showOverlayLoadingOsd?: (message: string) => void;
shouldShowOverlayLoadingOsd?: () => boolean;
markOverlayLoadingOsdShown?: () => void;
resetOverlayLoadingOsdSuppression?: () => void;
resolveFallbackBounds?: () => WindowGeometry;
}): void {
if (!args.mainWindow || args.mainWindow.isDestroyed()) {
@@ -25,20 +29,33 @@ export function updateVisibleOverlayVisibility(args: {
const mainWindow = args.mainWindow;
const showPassiveVisibleOverlay = (): void => {
if (args.isWindowsPlatform) {
const forceMousePassthrough = args.forceMousePassthrough === true;
if (args.isWindowsPlatform || forceMousePassthrough) {
mainWindow.setIgnoreMouseEvents(true, { forward: true });
} else {
mainWindow.setIgnoreMouseEvents(false);
}
args.ensureOverlayWindowLevel(mainWindow);
mainWindow.show();
if (!args.isWindowsPlatform) {
if (!args.isWindowsPlatform && !args.isMacOSPlatform && !forceMousePassthrough) {
mainWindow.focus();
}
};
const maybeShowOverlayLoadingOsd = (): void => {
if (!args.isMacOSPlatform || !args.showOverlayLoadingOsd) {
return;
}
if (args.shouldShowOverlayLoadingOsd && !args.shouldShowOverlayLoadingOsd()) {
return;
}
args.showOverlayLoadingOsd('Overlay loading...');
args.markOverlayLoadingOsdShown?.();
};
if (!args.visibleOverlayVisible) {
args.setTrackerNotReadyWarningShown(false);
args.resetOverlayLoadingOsdSuppression?.();
mainWindow.hide();
args.syncOverlayShortcuts();
return;
@@ -61,9 +78,7 @@ export function updateVisibleOverlayVisibility(args: {
if (args.isMacOSPlatform || args.isWindowsPlatform) {
if (!args.trackerNotReadyWarningShown) {
args.setTrackerNotReadyWarningShown(true);
if (args.isMacOSPlatform) {
args.showOverlayLoadingOsd?.('Overlay loading...');
}
maybeShowOverlayLoadingOsd();
}
mainWindow.hide();
args.syncOverlayShortcuts();
@@ -79,9 +94,7 @@ export function updateVisibleOverlayVisibility(args: {
if (!args.trackerNotReadyWarningShown) {
args.setTrackerNotReadyWarningShown(true);
if (args.isMacOSPlatform) {
args.showOverlayLoadingOsd?.('Overlay loading...');
}
maybeShowOverlayLoadingOsd();
}
mainWindow.hide();
+1
View File
@@ -46,6 +46,7 @@ export function ensureOverlayWindowLevel(window: BrowserWindow): void {
window.setAlwaysOnTop(true, 'screen-saver', 1);
window.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
window.setFullScreenable(false);
window.moveTop();
return;
}
if (process.platform === 'win32') {
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
anilistSetup: false,
anilistRetryQueue: false,
dictionary: false,
stats: false,
jellyfin: false,
jellyfinLogin: false,
jellyfinLogout: false,
+196
View File
@@ -0,0 +1,196 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { runAppReadyRuntime } from './startup';
test('runAppReadyRuntime minimal startup skips Yomitan and first-run setup while still handling CLI args', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({}),
getConfigWarnings: () => [],
logConfigWarning: () => {
calls.push('config-warning');
},
setLogLevel: () => {
calls.push('set-log-level');
},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {
calls.push('set-secondary-sub-mode');
},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {
calls.push('subtitle-ws');
},
startAnnotationWebsocket: () => {
calls.push('annotation-ws');
},
startTexthooker: () => {
calls.push('texthooker');
},
log: () => {
calls.push('log');
},
createMecabTokenizerAndCheck: async () => {
calls.push('mecab');
},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {
calls.push('immersion');
},
startJellyfinRemoteSession: async () => {
calls.push('jellyfin');
},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {
calls.push('first-run');
},
prewarmSubtitleDictionaries: async () => {
calls.push('prewarm');
},
startBackgroundWarmups: () => {
calls.push('warmups');
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
setVisibleOverlayVisible: () => {
calls.push('visible-overlay');
},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldUseMinimalStartup: () => true,
shouldSkipHeavyStartup: () => false,
});
assert.deepEqual(calls, ['bootstrap', 'reload-config', 'handle-initial-args']);
});
test('runAppReadyRuntime headless refresh bootstraps Anki runtime without UI startup', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({}),
getConfigWarnings: () => [],
logConfigWarning: () => {
calls.push('config-warning');
},
setLogLevel: () => {
calls.push('set-log-level');
},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {
calls.push('set-secondary-sub-mode');
},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {
calls.push('subtitle-ws');
},
startAnnotationWebsocket: () => {
calls.push('annotation-ws');
},
startTexthooker: () => {
calls.push('texthooker');
},
log: () => {
calls.push('log');
},
createMecabTokenizerAndCheck: async () => {
calls.push('mecab');
},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {
calls.push('immersion');
},
startJellyfinRemoteSession: async () => {
calls.push('jellyfin');
},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {
calls.push('first-run');
},
prewarmSubtitleDictionaries: async () => {
calls.push('prewarm');
},
startBackgroundWarmups: () => {
calls.push('warmups');
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
setVisibleOverlayVisible: () => {
calls.push('visible-overlay');
},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
runHeadlessInitialCommand: async () => {
calls.push('run-headless-command');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldRunHeadlessInitialCommand: () => true,
shouldUseMinimalStartup: () => false,
shouldSkipHeavyStartup: () => false,
});
assert.deepEqual(calls, [
'bootstrap',
'reload-config',
'init-runtime-options',
'run-headless-command',
]);
});
+29
View File
@@ -131,10 +131,13 @@ export interface AppReadyRuntimeDeps {
shouldAutoInitializeOverlayRuntimeFromConfig: () => boolean;
setVisibleOverlayVisible: (visible: boolean) => void;
initializeOverlayRuntime: () => void;
runHeadlessInitialCommand?: () => Promise<void>;
handleInitialArgs: () => void;
logDebug?: (message: string) => void;
onCriticalConfigErrors?: (errors: string[]) => void;
now?: () => number;
shouldRunHeadlessInitialCommand?: () => boolean;
shouldUseMinimalStartup?: () => boolean;
shouldSkipHeavyStartup?: () => boolean;
}
@@ -183,6 +186,32 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
const now = deps.now ?? (() => Date.now());
const startupStartedAtMs = now();
deps.ensureDefaultConfigBootstrap();
if (deps.shouldRunHeadlessInitialCommand?.()) {
deps.reloadConfig();
deps.initRuntimeOptionsManager();
if (deps.runHeadlessInitialCommand) {
await deps.runHeadlessInitialCommand();
} else {
deps.createMpvClient();
deps.createSubtitleTimingTracker();
deps.initializeOverlayRuntime();
deps.handleInitialArgs();
}
return;
}
if (deps.texthookerOnlyMode) {
deps.reloadConfig();
deps.handleInitialArgs();
return;
}
if (deps.shouldUseMinimalStartup?.()) {
deps.reloadConfig();
deps.handleInitialArgs();
return;
}
if (deps.shouldSkipHeavyStartup?.()) {
await deps.loadYomitanExtension();
deps.reloadConfig();
File diff suppressed because it is too large Load Diff
+88
View File
@@ -0,0 +1,88 @@
import type { BrowserWindow, BrowserWindowConstructorOptions } from 'electron';
import type { WindowGeometry } from '../../types';
const DEFAULT_STATS_WINDOW_WIDTH = 900;
const DEFAULT_STATS_WINDOW_HEIGHT = 700;
type StatsWindowLevelController = Pick<BrowserWindow, 'setAlwaysOnTop' | 'moveTop'> &
Partial<Pick<BrowserWindow, 'setVisibleOnAllWorkspaces' | 'setFullScreenable'>>;
function isBareToggleKeyInput(input: Electron.Input, toggleKey: string): boolean {
return (
input.type === 'keyDown' &&
input.code === toggleKey &&
!input.control &&
!input.alt &&
!input.meta &&
!input.shift &&
!input.isAutoRepeat
);
}
export function shouldHideStatsWindowForInput(input: Electron.Input, toggleKey: string): boolean {
return (
(input.type === 'keyDown' && input.key === 'Escape') || isBareToggleKeyInput(input, toggleKey)
);
}
export function buildStatsWindowOptions(options: {
preloadPath: string;
bounds?: WindowGeometry | null;
}): BrowserWindowConstructorOptions {
return {
x: options.bounds?.x,
y: options.bounds?.y,
width: options.bounds?.width ?? DEFAULT_STATS_WINDOW_WIDTH,
height: options.bounds?.height ?? DEFAULT_STATS_WINDOW_HEIGHT,
frame: false,
transparent: true,
alwaysOnTop: true,
resizable: false,
skipTaskbar: true,
hasShadow: false,
focusable: true,
acceptFirstMouse: true,
fullscreenable: false,
backgroundColor: '#1e1e2e',
show: false,
webPreferences: {
nodeIntegration: false,
contextIsolation: true,
preload: options.preloadPath,
sandbox: true,
},
};
}
export function promoteStatsWindowLevel(
window: StatsWindowLevelController,
platform: NodeJS.Platform = process.platform,
): void {
if (platform === 'darwin') {
window.setAlwaysOnTop(true, 'screen-saver', 2);
window.setVisibleOnAllWorkspaces?.(true, { visibleOnFullScreen: true });
window.setFullScreenable?.(false);
window.moveTop();
return;
}
if (platform === 'win32') {
window.setAlwaysOnTop(true, 'screen-saver', 2);
window.moveTop();
return;
}
window.setAlwaysOnTop(true);
window.moveTop();
}
export function buildStatsWindowLoadFileOptions(apiBaseUrl?: string): {
query: Record<string, string>;
} {
return {
query: {
overlay: '1',
...(apiBaseUrl ? { apiBase: apiBaseUrl } : {}),
},
};
}
+202
View File
@@ -0,0 +1,202 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
buildStatsWindowLoadFileOptions,
buildStatsWindowOptions,
promoteStatsWindowLevel,
shouldHideStatsWindowForInput,
} from './stats-window-runtime';
test('buildStatsWindowOptions uses tracked overlay bounds and preload-friendly web preferences', () => {
const options = buildStatsWindowOptions({
preloadPath: '/tmp/preload-stats.js',
bounds: {
x: 120,
y: 80,
width: 1440,
height: 900,
},
});
assert.equal(options.x, 120);
assert.equal(options.y, 80);
assert.equal(options.width, 1440);
assert.equal(options.height, 900);
assert.equal(options.frame, false);
assert.equal(options.transparent, true);
assert.equal(options.resizable, false);
assert.equal(options.webPreferences?.preload, '/tmp/preload-stats.js');
assert.equal(options.webPreferences?.contextIsolation, true);
assert.equal(options.webPreferences?.nodeIntegration, false);
assert.equal(options.webPreferences?.sandbox, true);
});
test('shouldHideStatsWindowForInput matches Escape and configured bare toggle key', () => {
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: 'Escape',
code: 'Escape',
} as Electron.Input,
'Backquote',
),
true,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
} as Electron.Input,
'Backquote',
),
true,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
control: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
alt: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
meta: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
isAutoRepeat: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyDown',
key: '`',
code: 'Backquote',
shift: true,
} as Electron.Input,
'Backquote',
),
false,
);
assert.equal(
shouldHideStatsWindowForInput(
{
type: 'keyUp',
key: '`',
code: 'Backquote',
} as Electron.Input,
'Backquote',
),
false,
);
});
test('buildStatsWindowLoadFileOptions enables overlay rendering mode', () => {
assert.deepEqual(buildStatsWindowLoadFileOptions(), {
query: {
overlay: '1',
},
});
});
test('buildStatsWindowLoadFileOptions includes provided stats API base URL', () => {
assert.deepEqual(buildStatsWindowLoadFileOptions('http://127.0.0.1:6123'), {
query: {
overlay: '1',
apiBase: 'http://127.0.0.1:6123',
},
});
});
test('promoteStatsWindowLevel raises stats above overlay level on macOS', () => {
const calls: string[] = [];
promoteStatsWindowLevel(
{
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
},
setVisibleOnAllWorkspaces: (
visible: boolean,
options?: { visibleOnFullScreen?: boolean },
) => {
calls.push(
`all-workspaces:${visible}:${options?.visibleOnFullScreen === true ? 'fullscreen' : 'plain'}`,
);
},
setFullScreenable: (fullscreenable: boolean) => {
calls.push(`fullscreenable:${fullscreenable}`);
},
moveTop: () => {
calls.push('move-top');
},
} as never,
'darwin',
);
assert.deepEqual(calls, [
'always-on-top:true:screen-saver:2',
'all-workspaces:true:fullscreen',
'fullscreenable:false',
'move-top',
]);
});
test('promoteStatsWindowLevel raises stats above overlay level on Windows', () => {
const calls: string[] = [];
promoteStatsWindowLevel(
{
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
},
moveTop: () => {
calls.push('move-top');
},
} as never,
'win32',
);
assert.deepEqual(calls, ['always-on-top:true:screen-saver:2', 'move-top']);
});
+118
View File
@@ -0,0 +1,118 @@
import { BrowserWindow, ipcMain } from 'electron';
import * as path from 'path';
import type { WindowGeometry } from '../../types.js';
import { IPC_CHANNELS } from '../../shared/ipc/contracts.js';
import {
buildStatsWindowLoadFileOptions,
buildStatsWindowOptions,
promoteStatsWindowLevel,
shouldHideStatsWindowForInput,
} from './stats-window-runtime.js';
let statsWindow: BrowserWindow | null = null;
let toggleRegistered = false;
export interface StatsWindowOptions {
/** Absolute path to stats/dist/ directory */
staticDir: string;
/** Absolute path to the compiled preload-stats.js */
preloadPath: string;
/** Resolve the active stats API base URL */
getApiBaseUrl?: () => string;
/** Resolve the active stats toggle key from config */
getToggleKey: () => string;
/** Resolve the tracked overlay/mpv bounds */
resolveBounds: () => WindowGeometry | null;
/** Notify the main process when the stats overlay becomes visible/hidden */
onVisibilityChanged?: (visible: boolean) => void;
}
function syncStatsWindowBounds(window: BrowserWindow, bounds: WindowGeometry | null): void {
if (!bounds || window.isDestroyed()) return;
window.setBounds({
x: bounds.x,
y: bounds.y,
width: bounds.width,
height: bounds.height,
});
}
function showStatsWindow(window: BrowserWindow, options: StatsWindowOptions): void {
syncStatsWindowBounds(window, options.resolveBounds());
promoteStatsWindowLevel(window);
window.show();
window.focus();
options.onVisibilityChanged?.(true);
promoteStatsWindowLevel(window);
}
/**
* Toggle the stats overlay window: create on first call, then show/hide.
* The React app stays mounted across toggles state is preserved.
*/
export function toggleStatsOverlay(options: StatsWindowOptions): void {
if (!statsWindow) {
statsWindow = new BrowserWindow(
buildStatsWindowOptions({
preloadPath: options.preloadPath,
bounds: options.resolveBounds(),
}),
);
const indexPath = path.join(options.staticDir, 'index.html');
statsWindow.loadFile(indexPath, buildStatsWindowLoadFileOptions(options.getApiBaseUrl?.()));
statsWindow.on('closed', () => {
options.onVisibilityChanged?.(false);
statsWindow = null;
});
statsWindow.webContents.on('before-input-event', (event, input) => {
if (shouldHideStatsWindowForInput(input, options.getToggleKey())) {
event.preventDefault();
statsWindow?.hide();
options.onVisibilityChanged?.(false);
}
});
statsWindow.once('ready-to-show', () => {
if (!statsWindow) return;
showStatsWindow(statsWindow, options);
});
statsWindow.on('blur', () => {
if (!statsWindow || statsWindow.isDestroyed() || !statsWindow.isVisible()) {
return;
}
promoteStatsWindowLevel(statsWindow);
});
} else if (statsWindow.isVisible()) {
statsWindow.hide();
options.onVisibilityChanged?.(false);
} else {
showStatsWindow(statsWindow, options);
}
}
/**
* Register the IPC command handler for toggling the overlay.
* Call this once during app initialization.
*/
export function registerStatsOverlayToggle(options: StatsWindowOptions): void {
if (toggleRegistered) return;
toggleRegistered = true;
ipcMain.on(IPC_CHANNELS.command.toggleStatsOverlay, () => {
toggleStatsOverlay(options);
});
}
/**
* Clean up destroy the stats window if it exists.
* Call during app quit.
*/
export function destroyStatsWindow(): void {
if (statsWindow && !statsWindow.isDestroyed()) {
statsWindow.destroy();
statsWindow = null;
}
}
@@ -0,0 +1,245 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { parseSrtCues, parseAssCues, parseSubtitleCues } from './subtitle-cue-parser';
import type { SubtitleCue } from './subtitle-cue-parser';
test('parseSrtCues parses basic SRT content', () => {
const content = [
'1',
'00:00:01,000 --> 00:00:04,000',
'こんにちは',
'',
'2',
'00:00:05,000 --> 00:00:08,500',
'元気ですか',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseSrtCues handles multi-line subtitle text', () => {
const content = ['1', '00:01:00,000 --> 00:01:05,000', 'これは', 'テストです', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは\nテストです');
});
test('parseSrtCues handles hours in timestamps', () => {
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseSrtCues handles VTT-style dot separator', () => {
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTスタイル', ''].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
});
test('parseSrtCues returns empty array for empty content', () => {
assert.deepEqual(parseSrtCues(''), []);
assert.deepEqual(parseSrtCues(' \n\n '), []);
});
test('parseSrtCues skips malformed timing lines gracefully', () => {
const content = [
'1',
'NOT A TIMING LINE',
'テスト',
'',
'2',
'00:00:01,000 --> 00:00:02,000',
'有効',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, '有効');
});
test('parseAssCues parses basic ASS dialogue lines', () => {
const content = [
'[Script Info]',
'Title: Test',
'',
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,こんにちは',
'Dialogue: 0,0:00:05.00,0:00:08.50,Default,,0,0,0,,元気ですか',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 2);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, 'こんにちは');
assert.equal(cues[1]!.startTime, 5.0);
assert.equal(cues[1]!.endTime, 8.5);
assert.equal(cues[1]!.text, '元気ですか');
});
test('parseAssCues strips override tags from text', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\b1}太字{\\b0}テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '太字テスト');
});
test('parseAssCues handles text containing commas', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,はい、そうです、ね',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, 'はい、そうです、ね');
});
test('parseAssCues handles \\N line breaks', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,一行目\\N二行目',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '一行目\\N二行目');
});
test('parseAssCues returns empty for content without Events section', () => {
const content = ['[Script Info]', 'Title: Test'].join('\n');
assert.deepEqual(parseAssCues(content), []);
});
test('parseAssCues skips Comment lines', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Comment: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,これはコメント',
'Dialogue: 0,0:00:05.00,0:00:08.00,Default,,0,0,0,,これは字幕',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは字幕');
});
test('parseAssCues handles hour timestamps', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,1:30:00.00,1:30:05.00,Default,,0,0,0,,テスト',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.startTime, 5400.0);
assert.equal(cues[0]!.endTime, 5405.0);
});
test('parseAssCues respects dynamic field ordering from the Format row', () => {
const content = [
'[Events]',
'Format: Layer, Style, Start, End, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,Default,0:00:01.00,0:00:04.00,,0,0,0,,順番が違う',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.startTime, 1.0);
assert.equal(cues[0]!.endTime, 4.0);
assert.equal(cues[0]!.text, '順番が違う');
});
test('parseSubtitleCues auto-detects SRT format', () => {
const content = ['1', '00:00:01,000 --> 00:00:04,000', 'SRTテスト', ''].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'SRTテスト');
});
test('parseSubtitleCues auto-detects ASS format', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,ASSテスト',
].join('\n');
const cues = parseSubtitleCues(content, 'test.ass');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'ASSテスト');
});
test('parseSubtitleCues auto-detects VTT format', () => {
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTテスト', ''].join('\n');
const cues = parseSubtitleCues(content, 'test.vtt');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'VTTテスト');
});
test('parseSubtitleCues returns empty for unknown format', () => {
assert.deepEqual(parseSubtitleCues('random content', 'test.xyz'), []);
});
test('parseSubtitleCues returns cues sorted by start time', () => {
const content = [
'1',
'00:00:10,000 --> 00:00:14,000',
'二番目',
'',
'2',
'00:00:01,000 --> 00:00:04,000',
'一番目',
'',
].join('\n');
const cues = parseSubtitleCues(content, 'test.srt');
assert.equal(cues[0]!.text, '一番目');
assert.equal(cues[1]!.text, '二番目');
});
test('parseSubtitleCues detects subtitle formats from remote URLs', () => {
const assContent = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,URLテスト',
].join('\n');
const cues = parseSubtitleCues(assContent, 'https://host/subs.ass?lang=ja#track');
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'URLテスト');
});
+191
View File
@@ -0,0 +1,191 @@
export interface SubtitleCue {
startTime: number;
endTime: number;
text: string;
}
const SRT_TIMING_PATTERN =
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
function parseTimestamp(
hours: string | undefined,
minutes: string,
seconds: string,
millis: string,
): number {
return (
Number(hours || 0) * 3600 +
Number(minutes) * 60 +
Number(seconds) +
Number(millis.padEnd(3, '0')) / 1000
);
}
export function parseSrtCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let i = 0;
while (i < lines.length) {
const line = lines[i]!;
const timingMatch = SRT_TIMING_PATTERN.exec(line);
if (!timingMatch) {
i += 1;
continue;
}
const startTime = parseTimestamp(
timingMatch[1],
timingMatch[2]!,
timingMatch[3]!,
timingMatch[4]!,
);
const endTime = parseTimestamp(
timingMatch[5],
timingMatch[6]!,
timingMatch[7]!,
timingMatch[8]!,
);
i += 1;
const textLines: string[] = [];
while (i < lines.length && lines[i]!.trim() !== '') {
textLines.push(lines[i]!);
i += 1;
}
const text = textLines.join('\n').trim();
if (text) {
cues.push({ startTime, endTime, text });
}
}
return cues;
}
const ASS_OVERRIDE_TAG_PATTERN = /\{[^}]*\}/g;
const ASS_TIMING_PATTERN = /^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/;
const ASS_FORMAT_PREFIX = 'Format:';
const ASS_DIALOGUE_PREFIX = 'Dialogue:';
function parseAssTimestamp(raw: string): number | null {
const match = ASS_TIMING_PATTERN.exec(raw.trim());
if (!match) {
return null;
}
const hours = Number(match[1]);
const minutes = Number(match[2]);
const seconds = Number(match[3]);
const centiseconds = Number(match[4]!.padEnd(2, '0'));
return hours * 3600 + minutes * 60 + seconds + centiseconds / 100;
}
export function parseAssCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
let inEventsSection = false;
let startFieldIndex = -1;
let endFieldIndex = -1;
let textFieldIndex = -1;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
inEventsSection = trimmed.toLowerCase() === '[events]';
if (!inEventsSection) {
startFieldIndex = -1;
endFieldIndex = -1;
textFieldIndex = -1;
}
continue;
}
if (!inEventsSection) {
continue;
}
if (trimmed.startsWith(ASS_FORMAT_PREFIX)) {
const formatFields = trimmed
.slice(ASS_FORMAT_PREFIX.length)
.split(',')
.map((field) => field.trim().toLowerCase());
startFieldIndex = formatFields.indexOf('start');
endFieldIndex = formatFields.indexOf('end');
textFieldIndex = formatFields.indexOf('text');
continue;
}
if (!trimmed.startsWith(ASS_DIALOGUE_PREFIX)) {
continue;
}
if (startFieldIndex < 0 || endFieldIndex < 0 || textFieldIndex < 0) {
continue;
}
const fields = trimmed.slice(ASS_DIALOGUE_PREFIX.length).split(',');
if (
startFieldIndex >= fields.length ||
endFieldIndex >= fields.length ||
textFieldIndex >= fields.length
) {
continue;
}
const startTime = parseAssTimestamp(fields[startFieldIndex]!);
const endTime = parseAssTimestamp(fields[endFieldIndex]!);
if (startTime === null || endTime === null) {
continue;
}
const rawText = fields
.slice(textFieldIndex)
.join(',')
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
.trim();
if (rawText) {
cues.push({ startTime, endTime, text: rawText });
}
}
return cues;
}
function detectSubtitleFormat(source: string): 'srt' | 'vtt' | 'ass' | 'ssa' | null {
const [normalizedSource = source] =
(() => {
try {
return /^[a-z]+:\/\//i.test(source) ? new URL(source).pathname : source;
} catch {
return source;
}
})().split(/[?#]/, 1)[0] ?? '';
const ext = normalizedSource.split('.').pop()?.toLowerCase() ?? '';
if (ext === 'srt') return 'srt';
if (ext === 'vtt') return 'vtt';
if (ext === 'ass' || ext === 'ssa') return 'ass';
return null;
}
export function parseSubtitleCues(content: string, filename: string): SubtitleCue[] {
const format = detectSubtitleFormat(filename);
let cues: SubtitleCue[];
switch (format) {
case 'srt':
case 'vtt':
cues = parseSrtCues(content);
break;
case 'ass':
case 'ssa':
cues = parseAssCues(content);
break;
default:
return [];
}
cues.sort((a, b) => a.startTime - b.startTime);
return cues;
}
+244
View File
@@ -0,0 +1,244 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { computePriorityWindow, createSubtitlePrefetchService } from './subtitle-prefetch';
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
function makeCues(count: number, startOffset = 0): SubtitleCue[] {
return Array.from({ length: count }, (_, i) => ({
startTime: startOffset + i * 5,
endTime: startOffset + i * 5 + 4,
text: `line-${i}`,
}));
}
test('computePriorityWindow returns next N cues from current position', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 12.0, 5);
assert.equal(window.length, 5);
// Position 12.0 falls during cue 2, so the active cue should be warmed first.
assert.equal(window[0]!.text, 'line-2');
assert.equal(window[4]!.text, 'line-6');
});
test('computePriorityWindow clamps to remaining cues at end of file', () => {
const cues = makeCues(5);
const window = computePriorityWindow(cues, 18.0, 10);
// Position 18.0 is during cue 3 (start=15), so cue 3 and cue 4 remain.
assert.equal(window.length, 2);
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[1]!.text, 'line-4');
});
test('computePriorityWindow returns empty when past all cues', () => {
const cues = makeCues(3);
const window = computePriorityWindow(cues, 999.0, 10);
assert.equal(window.length, 0);
});
test('computePriorityWindow at position 0 returns first N cues', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 0, 5);
assert.equal(window.length, 5);
assert.equal(window[0]!.text, 'line-0');
});
test('computePriorityWindow includes the active cue when current position is mid-line', () => {
const cues = makeCues(20);
const window = computePriorityWindow(cues, 18.0, 3);
assert.equal(window.length, 3);
assert.equal(window[0]!.text, 'line-3');
assert.equal(window[1]!.text, 'line-4');
assert.equal(window[2]!.text, 'line-5');
});
function flushMicrotasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
}
test('prefetch service tokenizes priority window cues and caches them', async () => {
const cues = makeCues(20);
const cached: Map<string, SubtitleData> = new Map();
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: (text, data) => {
cached.set(text, data);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Allow all async tokenization to complete
for (let i = 0; i < 25; i += 1) {
await flushMicrotasks();
}
service.stop();
// Priority window (first 3) should be cached
assert.ok(cached.has('line-0'));
assert.ok(cached.has('line-1'));
assert.ok(cached.has('line-2'));
});
test('prefetch service stops when cache is full', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
let cacheSize = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {
cacheSize += 1;
},
isCacheFull: () => cacheSize >= 5,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// Should have stopped at 5 (cache full), not tokenized all 20
assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
});
test('prefetch service can be stopped mid-flight', async () => {
const cues = makeCues(100);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.stop();
const callsAtStop = tokenizeCalls;
// Wait more to confirm no further calls
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
});
test('prefetch service onSeek re-prioritizes from new position', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
// Let a few cues process
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Seek to near the end
service.onSeek(80.0);
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
// After seek to 80.0, cues starting after 80.0 (line-17, line-18, line-19) should appear in cached
const hasPostSeekCue = cachedTexts.some(
(t) => t === 'line-17' || t === 'line-18' || t === 'line-19',
);
assert.ok(hasPostSeekCue, 'Should have cached cues after seek position');
});
test('prefetch service still warms the priority window when cache is full', async () => {
const cues = makeCues(20);
const cachedTexts: string[] = [];
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
preCacheTokenization: (text) => {
cachedTexts.push(text);
},
isCacheFull: () => true,
priorityWindowSize: 3,
});
service.start(0);
for (let i = 0; i < 10; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.deepEqual(cachedTexts.slice(0, 3), ['line-0', 'line-1', 'line-2']);
});
test('prefetch service pause/resume halts and continues tokenization', async () => {
const cues = makeCues(20);
let tokenizeCalls = 0;
const service = createSubtitlePrefetchService({
cues,
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
preCacheTokenization: () => {},
isCacheFull: () => false,
priorityWindowSize: 3,
});
service.start(0);
await flushMicrotasks();
await flushMicrotasks();
service.pause();
const callsWhenPaused = tokenizeCalls;
// Wait while paused
for (let i = 0; i < 5; i += 1) {
await flushMicrotasks();
}
// Should not have advanced much (may have 1 in-flight)
assert.ok(tokenizeCalls <= callsWhenPaused + 1, 'Should not tokenize much while paused');
service.resume();
for (let i = 0; i < 30; i += 1) {
await flushMicrotasks();
}
service.stop();
assert.ok(tokenizeCalls > callsWhenPaused + 1, 'Should resume tokenizing after unpause');
});
+153
View File
@@ -0,0 +1,153 @@
import type { SubtitleCue } from './subtitle-cue-parser';
import type { SubtitleData } from '../../types';
export interface SubtitlePrefetchServiceDeps {
cues: SubtitleCue[];
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
preCacheTokenization: (text: string, data: SubtitleData) => void;
isCacheFull: () => boolean;
priorityWindowSize?: number;
}
export interface SubtitlePrefetchService {
start: (currentTimeSeconds: number) => void;
stop: () => void;
onSeek: (newTimeSeconds: number) => void;
pause: () => void;
resume: () => void;
}
const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
export function computePriorityWindow(
cues: SubtitleCue[],
currentTimeSeconds: number,
windowSize: number,
): SubtitleCue[] {
if (cues.length === 0) {
return [];
}
// Find the first cue whose end time is after the current position.
// This includes the currently active cue when playback starts or seeks
// mid-line, while still skipping cues that have already finished.
let startIndex = -1;
for (let i = 0; i < cues.length; i += 1) {
if (cues[i]!.endTime > currentTimeSeconds) {
startIndex = i;
break;
}
}
if (startIndex < 0) {
// All cues are before current time
return [];
}
return cues.slice(startIndex, startIndex + windowSize);
}
export function createSubtitlePrefetchService(
deps: SubtitlePrefetchServiceDeps,
): SubtitlePrefetchService {
const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
let stopped = true;
let paused = false;
let currentRunId = 0;
async function tokenizeCueList(
cuesToProcess: SubtitleCue[],
runId: number,
options: { allowWhenCacheFull?: boolean } = {},
): Promise<void> {
for (const cue of cuesToProcess) {
if (stopped || runId !== currentRunId) {
return;
}
// Wait while paused
while (paused && !stopped && runId === currentRunId) {
await new Promise((resolve) => setTimeout(resolve, 10));
}
if (stopped || runId !== currentRunId) {
return;
}
if (!options.allowWhenCacheFull && deps.isCacheFull()) {
return;
}
try {
const result = await deps.tokenizeSubtitle(cue.text);
if (result && !stopped && runId === currentRunId) {
deps.preCacheTokenization(cue.text, result);
}
} catch {
// Skip failed cues, continue prefetching
}
// Yield to allow live processing to take priority
await new Promise((resolve) => setTimeout(resolve, 0));
}
}
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
const cues = deps.cues;
// Phase 1: Priority window
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
await tokenizeCueList(priorityCues, runId, { allowWhenCacheFull: true });
if (stopped || runId !== currentRunId) {
return;
}
// Phase 2: Background - remaining cues forward from current position
const priorityTexts = new Set(priorityCues.map((c) => c.text));
const remainingCues = cues.filter(
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(remainingCues, runId);
if (stopped || runId !== currentRunId) {
return;
}
// Phase 3: Background - earlier cues (for rewind support)
const earlierCues = cues.filter(
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
);
await tokenizeCueList(earlierCues, runId);
}
return {
start(currentTimeSeconds: number) {
stopped = false;
paused = false;
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(currentTimeSeconds, runId);
},
stop() {
stopped = true;
currentRunId += 1;
},
onSeek(newTimeSeconds: number) {
// Cancel current run and restart from new position
currentRunId += 1;
const runId = currentRunId;
void startPrefetching(newTimeSeconds, runId);
},
pause() {
paused = true;
},
resume() {
paused = false;
},
};
}
@@ -170,3 +170,87 @@ test('subtitle processing cache invalidation only affects future subtitle events
assert.equal(callsByText.get('same'), 2);
});
test('preCacheTokenization stores entry that is returned on next subtitle change', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('予め', { text: '予め', tokens: [] });
controller.onSubtitleChange('予め');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'should not call tokenize when pre-cached');
assert.deepEqual(emitted, [{ text: '予め', tokens: [] }]);
});
test('preCacheTokenization reuses normalized subtitle text across ASS linebreak variants', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('一行目\\N二行目', { text: '一行目\n二行目', tokens: [] });
controller.onSubtitleChange('一行目\n二行目');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'should not call tokenize when normalized text matches');
assert.deepEqual(emitted, [{ text: '一行目\n二行目', tokens: [] }]);
});
test('consumeCachedSubtitle returns prefetched payload and prevents reprocessing same line', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => {
tokenizeCalls += 1;
return { text, tokens: [] };
},
emitSubtitle: (payload) => emitted.push(payload),
});
controller.preCacheTokenization('猫\\Nです', { text: '猫\nです', tokens: [] });
const immediate = controller.consumeCachedSubtitle('猫\nです');
assert.deepEqual(immediate, { text: '猫\nです', tokens: [] });
controller.onSubtitleChange('猫\nです');
await flushMicrotasks();
assert.equal(tokenizeCalls, 0, 'same cached subtitle should not reprocess after immediate consume');
assert.deepEqual(emitted, []);
});
test('isCacheFull returns false when cache is below limit', () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: null }),
emitSubtitle: () => {},
});
assert.equal(controller.isCacheFull(), false);
});
test('isCacheFull returns true when cache reaches limit', async () => {
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
emitSubtitle: () => {},
});
// Fill cache to the 256 limit
for (let i = 0; i < 256; i += 1) {
controller.preCacheTokenization(`line-${i}`, { text: `line-${i}`, tokens: [] });
}
assert.equal(controller.isCacheFull(), true);
});
@@ -11,6 +11,13 @@ export interface SubtitleProcessingController {
onSubtitleChange: (text: string) => void;
refreshCurrentSubtitle: (textOverride?: string) => void;
invalidateTokenizationCache: () => void;
preCacheTokenization: (text: string, data: SubtitleData) => void;
consumeCachedSubtitle: (text: string) => SubtitleData | null;
isCacheFull: () => boolean;
}
function normalizeSubtitleCacheKey(text: string): string {
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
}
export function createSubtitleProcessingController(
@@ -26,18 +33,19 @@ export function createSubtitleProcessingController(
const now = deps.now ?? (() => Date.now());
const getCachedTokenization = (text: string): SubtitleData | null => {
const cached = tokenizationCache.get(text);
const cacheKey = normalizeSubtitleCacheKey(text);
const cached = tokenizationCache.get(cacheKey);
if (!cached) {
return null;
}
tokenizationCache.delete(text);
tokenizationCache.set(text, cached);
tokenizationCache.delete(cacheKey);
tokenizationCache.set(cacheKey, cached);
return cached;
};
const setCachedTokenization = (text: string, payload: SubtitleData): void => {
tokenizationCache.set(text, payload);
tokenizationCache.set(normalizeSubtitleCacheKey(text), payload);
while (tokenizationCache.size > SUBTITLE_TOKENIZATION_CACHE_LIMIT) {
const firstKey = tokenizationCache.keys().next().value;
if (firstKey !== undefined) {
@@ -130,5 +138,22 @@ export function createSubtitleProcessingController(
invalidateTokenizationCache: () => {
tokenizationCache.clear();
},
preCacheTokenization: (text: string, data: SubtitleData) => {
setCachedTokenization(text, data);
},
consumeCachedSubtitle: (text: string) => {
const cached = getCachedTokenization(text);
if (!cached) {
return null;
}
latestText = text;
lastEmittedText = text;
refreshRequested = false;
return cached;
},
isCacheFull: () => {
return tokenizationCache.size >= SUBTITLE_TOKENIZATION_CACHE_LIMIT;
},
};
}
+29 -2
View File
@@ -108,8 +108,9 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
frequencyRank: 12,
},
],
@@ -122,9 +123,35 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
);
assert.match(
markup,
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア" data-frequency-rank="12">アレクシア<\/span>/,
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア">アレクシア<\/span>/,
);
assert.doesNotMatch(markup, /word-name-match word-known|word-known word-name-match/);
assert.doesNotMatch(markup, /word-name-match word-n-plus-one|word-n-plus-one word-name-match/);
assert.doesNotMatch(markup, /data-frequency-rank="12"|data-jlpt-level="N5"|word-jlpt-n5/);
});
test('serializeSubtitleMarkup keeps filtered tokens hoverable without annotation attrs', () => {
const payload: SubtitleData = {
text: 'は',
tokens: [
{
surface: 'は',
reading: 'は',
headword: 'は',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
},
],
};
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
assert.equal(markup, '<span class="word" data-reading="は" data-headword="は">は</span>');
});
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
+19 -7
View File
@@ -47,10 +47,15 @@ function escapeHtml(text: string): string {
.replaceAll("'", '&#39;');
}
function hasPrioritizedNameMatch(token: MergedToken): boolean {
return token.isNameMatch === true;
}
function computeFrequencyClass(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): string | null {
if (hasPrioritizedNameMatch(token)) return null;
if (!options.enabled) return null;
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
@@ -70,6 +75,7 @@ function getFrequencyRankLabel(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): string | null {
if (hasPrioritizedNameMatch(token)) return null;
if (!options.enabled) return null;
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
@@ -79,21 +85,25 @@ function getFrequencyRankLabel(
}
function getJlptLevelLabel(token: MergedToken): string | null {
if (hasPrioritizedNameMatch(token)) {
return null;
}
return token.jlptLevel ?? null;
}
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
const classes = ['word'];
if (token.isNPlusOneTarget) {
classes.push('word-n-plus-one');
} else if (token.isNameMatch) {
if (hasPrioritizedNameMatch(token)) {
classes.push('word-name-match');
} else if (token.isNPlusOneTarget) {
classes.push('word-n-plus-one');
} else if (token.isKnown) {
classes.push('word-known');
}
if (token.jlptLevel) {
if (!hasPrioritizedNameMatch(token) && token.jlptLevel) {
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
}
@@ -137,6 +147,8 @@ function serializeSubtitleToken(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): SerializedSubtitleToken {
const prioritizedNameMatch = hasPrioritizedNameMatch(token);
return {
surface: token.surface,
reading: token.reading,
@@ -146,10 +158,10 @@ function serializeSubtitleToken(
partOfSpeech: token.partOfSpeech,
isMerged: token.isMerged,
isKnown: token.isKnown,
isNPlusOneTarget: token.isNPlusOneTarget,
isNPlusOneTarget: prioritizedNameMatch ? false : token.isNPlusOneTarget,
isNameMatch: token.isNameMatch ?? false,
jlptLevel: token.jlptLevel,
frequencyRank: token.frequencyRank,
jlptLevel: prioritizedNameMatch ? undefined : token.jlptLevel,
frequencyRank: prioritizedNameMatch ? undefined : token.frequencyRank,
className: computeWordClass(token, options),
frequencyRankLabel: getFrequencyRankLabel(token, options),
jlptLevelLabel: getJlptLevelLabel(token),
+57 -8
View File
@@ -1,23 +1,72 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { injectTexthookerBootstrapHtml } from './texthooker';
import { injectTexthookerBootstrapHtml, type TexthookerBootstrapSettings } from './texthooker';
test('injectTexthookerBootstrapHtml injects websocket bootstrap before head close', () => {
const html = '<html><head><title>Texthooker</title></head><body></body></html>';
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678');
const settings: TexthookerBootstrapSettings = {
enableKnownWordColoring: true,
enableNPlusOneColoring: true,
enableNameMatchColoring: true,
enableFrequencyColoring: true,
enableJlptColoring: true,
characterDictionaryEnabled: true,
knownWordColor: '#a6da95',
nPlusOneColor: '#c6a0f6',
nameMatchColor: '#f5bde6',
hoverTokenColor: '#f4dbd6',
hoverTokenBackgroundColor: 'rgba(54, 58, 79, 0.84)',
jlptColors: {
N1: '#ed8796',
N2: '#f5a97f',
N3: '#f9e2af',
N4: '#a6e3a1',
N5: '#8aadf4',
},
frequencyDictionary: {
singleColor: '#f5a97f',
bandedColors: ['#ed8796', '#f5a97f', '#f9e2af', '#8bd5ca', '#8aadf4'],
},
};
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678', settings);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-websocketUrl', "ws:\/\/127\.0\.0\.1:6678"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableKnownWordColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableNPlusOneColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableNameMatchColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableFrequencyColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-enableJlptColoring', "1"\)/,
);
assert.match(
actual,
/window\.localStorage\.setItem\('bannou-texthooker-characterDictionaryEnabled', "1"\)/,
);
assert.match(actual, /--subminer-known-word-color:\s*#a6da95;/);
assert.match(actual, /--subminer-n-plus-one-color:\s*#c6a0f6;/);
assert.match(actual, /--subminer-name-match-color:\s*#f5bde6;/);
assert.match(actual, /--subminer-jlpt-n1-color:\s*#ed8796;/);
assert.match(actual, /--subminer-frequency-band-4-color:\s*#8bd5ca;/);
assert.match(actual, /--sm-token-hover-bg:\s*rgba\(54, 58, 79, 0\.84\);/);
assert.doesNotMatch(actual, /p \.word\.word-known\s*\{/);
assert.ok(actual.indexOf('</script></head>') !== -1);
assert.ok(actual.includes('bannou-texthooker-websocketUrl'));
assert.ok(!actual.includes('bannou-texthooker-enableKnownWordColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableNPlusOneColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableNameMatchColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableFrequencyColoring'));
assert.ok(!actual.includes('bannou-texthooker-enableJlptColoring'));
});
test('injectTexthookerBootstrapHtml leaves html unchanged without websocketUrl', () => {
+85 -9
View File
@@ -5,23 +5,92 @@ import { createLogger } from '../../logger';
const logger = createLogger('main:texthooker');
export function injectTexthookerBootstrapHtml(html: string, websocketUrl?: string): string {
if (!websocketUrl) {
export type TexthookerBootstrapSettings = {
enableKnownWordColoring: boolean;
enableNPlusOneColoring: boolean;
enableNameMatchColoring: boolean;
enableFrequencyColoring: boolean;
enableJlptColoring: boolean;
characterDictionaryEnabled: boolean;
knownWordColor: string;
nPlusOneColor: string;
nameMatchColor: string;
hoverTokenColor: string;
hoverTokenBackgroundColor: string;
jlptColors: {
N1: string;
N2: string;
N3: string;
N4: string;
N5: string;
};
frequencyDictionary: {
singleColor: string;
bandedColors: readonly [string, string, string, string, string];
};
};
function buildTexthookerBootstrapScript(
websocketUrl?: string,
settings?: TexthookerBootstrapSettings,
): string {
const statements: string[] = [];
if (websocketUrl) {
statements.push(
`window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(websocketUrl)});`,
);
}
if (settings) {
const booleanStorageValue = (enabled: boolean): '"1"' | '"0"' => (enabled ? '"1"' : '"0"');
statements.push(
`window.localStorage.setItem('bannou-texthooker-enableKnownWordColoring', ${booleanStorageValue(settings.enableKnownWordColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableNPlusOneColoring', ${booleanStorageValue(settings.enableNPlusOneColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableNameMatchColoring', ${booleanStorageValue(settings.enableNameMatchColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableFrequencyColoring', ${booleanStorageValue(settings.enableFrequencyColoring)});`,
`window.localStorage.setItem('bannou-texthooker-enableJlptColoring', ${booleanStorageValue(settings.enableJlptColoring)});`,
`window.localStorage.setItem('bannou-texthooker-characterDictionaryEnabled', ${booleanStorageValue(settings.characterDictionaryEnabled)});`,
);
}
return statements.length > 0 ? `<script>${statements.join('')}</script>` : '';
}
function buildTexthookerBootstrapStyle(settings?: TexthookerBootstrapSettings): string {
if (!settings) {
return '';
}
const [band1, band2, band3, band4, band5] = settings.frequencyDictionary.bandedColors;
return `<style id="subminer-texthooker-bootstrap-style">:root{--subminer-known-word-color:${settings.knownWordColor};--subminer-n-plus-one-color:${settings.nPlusOneColor};--subminer-name-match-color:${settings.nameMatchColor};--subminer-jlpt-n1-color:${settings.jlptColors.N1};--subminer-jlpt-n2-color:${settings.jlptColors.N2};--subminer-jlpt-n3-color:${settings.jlptColors.N3};--subminer-jlpt-n4-color:${settings.jlptColors.N4};--subminer-jlpt-n5-color:${settings.jlptColors.N5};--subminer-frequency-single-color:${settings.frequencyDictionary.singleColor};--subminer-frequency-band-1-color:${band1};--subminer-frequency-band-2-color:${band2};--subminer-frequency-band-3-color:${band3};--subminer-frequency-band-4-color:${band4};--subminer-frequency-band-5-color:${band5};--sm-token-hover-bg:${settings.hoverTokenBackgroundColor};--sm-token-hover-text:${settings.hoverTokenColor};}</style>`;
}
export function injectTexthookerBootstrapHtml(
html: string,
websocketUrl?: string,
settings?: TexthookerBootstrapSettings,
): string {
const bootstrapStyle = buildTexthookerBootstrapStyle(settings);
const bootstrapScript = buildTexthookerBootstrapScript(websocketUrl, settings);
if (!bootstrapStyle && !bootstrapScript) {
return html;
}
const bootstrapScript = `<script>window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(
websocketUrl,
)});</script>`;
if (html.includes('</head>')) {
return html.replace('</head>', `${bootstrapScript}</head>`);
return html.replace('</head>', `${bootstrapStyle}${bootstrapScript}</head>`);
}
return `${bootstrapScript}${html}`;
return `${bootstrapStyle}${bootstrapScript}${html}`;
}
export class Texthooker {
constructor(
private readonly getBootstrapSettings?: () => TexthookerBootstrapSettings | undefined,
) {}
private server: http.Server | null = null;
public isRunning(): boolean {
@@ -62,9 +131,16 @@ export class Texthooker {
res.end('Not found');
return;
}
const bootstrapSettings = this.getBootstrapSettings?.();
const responseData =
urlPath === '/' || urlPath === '/index.html'
? Buffer.from(injectTexthookerBootstrapHtml(data.toString('utf-8'), websocketUrl))
? Buffer.from(
injectTexthookerBootstrapHtml(
data.toString('utf-8'),
websocketUrl,
bootstrapSettings,
),
)
: data;
res.writeHead(200, { 'Content-Type': mimeTypes[ext] || 'text/plain' });
res.end(responseData);
File diff suppressed because it is too large Load Diff
+177 -40
View File
@@ -23,6 +23,7 @@ import {
requestYomitanScanTokens,
requestYomitanTermFrequencies,
} from './tokenizer/yomitan-parser-runtime';
import type { YomitanTermFrequency } from './tokenizer/yomitan-parser-runtime';
const logger = createLogger('main:tokenizer');
@@ -177,6 +178,19 @@ async function applyAnnotationStage(
);
}
async function stripSubtitleAnnotationMetadata(tokens: MergedToken[]): Promise<MergedToken[]> {
if (tokens.length === 0) {
return tokens;
}
if (!annotationStageModulePromise) {
annotationStageModulePromise = import('./tokenizer/annotation-stage');
}
const annotationStage = await annotationStageModulePromise;
return tokens.map((token) => annotationStage.stripSubtitleAnnotationMetadata(token));
}
export function createTokenizerDepsRuntime(
options: TokenizerDepsRuntimeOptions,
): TokenizerServiceDeps {
@@ -225,7 +239,13 @@ export function createTokenizerDepsRuntime(
return null;
}
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode(), false);
return mergeTokens(
rawTokens,
options.isKnownWord,
options.getKnownWordMatchMode(),
false,
text,
);
},
enrichTokensWithMecab: async (tokens, mecabTokens) =>
enrichTokensWithMecabAsync(tokens, mecabTokens),
@@ -336,56 +356,162 @@ function resolveFrequencyLookupText(
return token.surface;
}
function resolveYomitanFrequencyLookupTexts(
token: MergedToken,
matchMode: FrequencyDictionaryMatchMode,
): string[] {
const primaryLookupText = resolveFrequencyLookupText(token, matchMode).trim();
if (!primaryLookupText) {
return [];
}
if (matchMode !== 'headword') {
return [primaryLookupText];
}
const normalizedHeadword = token.headword.trim();
const normalizedSurface = token.surface.trim();
if (
!normalizedHeadword ||
!normalizedSurface ||
normalizedSurface === normalizedHeadword ||
normalizedSurface === primaryLookupText
) {
return [primaryLookupText];
}
return [primaryLookupText, normalizedSurface];
}
function buildYomitanFrequencyTermReadingList(
tokens: MergedToken[],
matchMode: FrequencyDictionaryMatchMode,
): Array<{ term: string; reading: string | null }> {
const termReadingList: Array<{ term: string; reading: string | null }> = [];
for (const token of tokens) {
const term = resolveFrequencyLookupText(token, matchMode).trim();
if (!term) {
continue;
}
const readingRaw =
token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null;
termReadingList.push({ term, reading: readingRaw });
for (const term of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
termReadingList.push({ term, reading: readingRaw });
}
}
return termReadingList;
}
function buildYomitanFrequencyRankMap(
frequencies: ReadonlyArray<{ term: string; frequency: number; dictionaryPriority?: number }>,
): Map<string, number> {
const rankByTerm = new Map<string, { rank: number; dictionaryPriority: number }>();
function makeYomitanFrequencyPairKey(term: string, reading: string | null): string {
return `${term}\u0000${reading ?? ''}`;
}
interface NormalizedYomitanTermFrequency extends YomitanTermFrequency {
reading: string | null;
frequency: number;
}
interface YomitanFrequencyIndex {
byPair: Map<string, NormalizedYomitanTermFrequency[]>;
byTerm: Map<string, NormalizedYomitanTermFrequency[]>;
}
function appendYomitanFrequencyEntry(
map: Map<string, NormalizedYomitanTermFrequency[]>,
key: string,
entry: NormalizedYomitanTermFrequency,
): void {
const existing = map.get(key);
if (existing) {
existing.push(entry);
return;
}
map.set(key, [entry]);
}
function buildYomitanFrequencyIndex(
frequencies: ReadonlyArray<YomitanTermFrequency>,
): YomitanFrequencyIndex {
const byPair = new Map<string, NormalizedYomitanTermFrequency[]>();
const byTerm = new Map<string, NormalizedYomitanTermFrequency[]>();
for (const frequency of frequencies) {
const normalizedTerm = frequency.term.trim();
const term = frequency.term.trim();
const rank = normalizePositiveFrequencyRank(frequency.frequency);
if (!normalizedTerm || rank === null) {
if (!term || rank === null) {
continue;
}
const dictionaryPriority =
typeof frequency.dictionaryPriority === 'number' &&
Number.isFinite(frequency.dictionaryPriority)
? Math.max(0, Math.floor(frequency.dictionaryPriority))
: Number.MAX_SAFE_INTEGER;
const current = rankByTerm.get(normalizedTerm);
const reading =
typeof frequency.reading === 'string' && frequency.reading.trim().length > 0
? frequency.reading.trim()
: null;
const normalizedEntry: NormalizedYomitanTermFrequency = {
...frequency,
term,
reading,
frequency: rank,
};
appendYomitanFrequencyEntry(
byPair,
makeYomitanFrequencyPairKey(term, reading),
normalizedEntry,
);
appendYomitanFrequencyEntry(byTerm, term, normalizedEntry);
}
return { byPair, byTerm };
}
function selectBestYomitanFrequencyRank(
entries: ReadonlyArray<NormalizedYomitanTermFrequency>,
): number | null {
let bestEntry: NormalizedYomitanTermFrequency | null = null;
for (const entry of entries) {
if (
current === undefined ||
dictionaryPriority < current.dictionaryPriority ||
(dictionaryPriority === current.dictionaryPriority && rank < current.rank)
bestEntry === null ||
entry.dictionaryPriority < bestEntry.dictionaryPriority ||
(entry.dictionaryPriority === bestEntry.dictionaryPriority &&
entry.frequency < bestEntry.frequency)
) {
rankByTerm.set(normalizedTerm, { rank, dictionaryPriority });
bestEntry = entry;
}
}
const collapsedRankByTerm = new Map<string, number>();
for (const [term, entry] of rankByTerm.entries()) {
collapsedRankByTerm.set(term, entry.rank);
return bestEntry?.frequency ?? null;
}
function getYomitanFrequencyRank(
token: MergedToken,
candidateText: string,
matchMode: FrequencyDictionaryMatchMode,
frequencyIndex: YomitanFrequencyIndex,
): number | null {
const normalizedCandidateText = candidateText.trim();
if (!normalizedCandidateText) {
return null;
}
return collapsedRankByTerm;
const reading =
typeof token.reading === 'string' && token.reading.trim().length > 0
? token.reading.trim()
: null;
const pairEntries =
frequencyIndex.byPair.get(makeYomitanFrequencyPairKey(normalizedCandidateText, reading)) ?? [];
const candidateEntries =
pairEntries.length > 0
? pairEntries
: (frequencyIndex.byTerm.get(normalizedCandidateText) ?? []);
if (candidateEntries.length === 0) {
return null;
}
const normalizedHeadword = token.headword.trim();
const normalizedSurface = token.surface.trim();
const isInflectedHeadwordFallback =
matchMode === 'headword' &&
normalizedCandidateText === normalizedHeadword &&
normalizedSurface.length > 0 &&
normalizedSurface !== normalizedHeadword;
return selectBestYomitanFrequencyRank(candidateEntries);
}
function getLocalFrequencyRank(
@@ -416,7 +542,7 @@ function getLocalFrequencyRank(
function applyFrequencyRanks(
tokens: MergedToken[],
matchMode: FrequencyDictionaryMatchMode,
yomitanRankByTerm: Map<string, number>,
yomitanFrequencyIndex: YomitanFrequencyIndex,
getFrequencyRank: FrequencyDictionaryLookup | undefined,
): MergedToken[] {
if (tokens.length === 0) {
@@ -441,12 +567,19 @@ function applyFrequencyRanks(
};
}
const yomitanRank = yomitanRankByTerm.get(lookupText);
if (yomitanRank !== undefined) {
return {
...token,
frequencyRank: yomitanRank,
};
for (const candidateText of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
const yomitanRank = getYomitanFrequencyRank(
token,
candidateText,
matchMode,
yomitanFrequencyIndex,
);
if (yomitanRank !== null) {
return {
...token,
frequencyRank: yomitanRank,
};
}
}
if (!getFrequencyRank) {
@@ -501,6 +634,7 @@ async function parseWithYomitanInternalParser(
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: token.isNameMatch ?? false,
frequencyRank: token.frequencyRank,
}),
),
);
@@ -510,7 +644,7 @@ async function parseWithYomitanInternalParser(
}
deps.onTokenizationReady?.(text);
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
const frequencyRankPromise: Promise<YomitanFrequencyIndex> = options.frequencyEnabled
? (async () => {
const frequencyMatchMode = options.frequencyMatchMode;
const termReadingList = buildYomitanFrequencyTermReadingList(
@@ -522,9 +656,9 @@ async function parseWithYomitanInternalParser(
deps,
logger,
);
return buildYomitanFrequencyRankMap(yomitanFrequencies);
return buildYomitanFrequencyIndex(yomitanFrequencies);
})()
: Promise.resolve(new Map<string, number>());
: Promise.resolve({ byPair: new Map(), byTerm: new Map() });
const mecabEnrichmentPromise: Promise<MergedToken[]> = needsMecabPosEnrichment(options)
? (async () => {
@@ -545,7 +679,7 @@ async function parseWithYomitanInternalParser(
})()
: Promise.resolve(normalizedSelectedTokens);
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
const [yomitanFrequencyIndex, enrichedTokens] = await Promise.all([
frequencyRankPromise,
mecabEnrichmentPromise,
]);
@@ -554,7 +688,7 @@ async function parseWithYomitanInternalParser(
return applyFrequencyRanks(
enrichedTokens,
options.frequencyMatchMode,
yomitanRankByTerm,
yomitanFrequencyIndex,
deps.getFrequencyRank,
);
}
@@ -585,9 +719,12 @@ export async function tokenizeSubtitle(
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
if (yomitanTokens && yomitanTokens.length > 0) {
const annotatedTokens = await stripSubtitleAnnotationMetadata(
await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
);
return {
text: displayText,
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
};
}
@@ -1,7 +1,12 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { MergedToken, PartOfSpeech } from '../../../types';
import { annotateTokens, AnnotationStageDeps } from './annotation-stage';
import {
annotateTokens,
AnnotationStageDeps,
shouldExcludeTokenFromSubtitleAnnotations,
stripSubtitleAnnotationMetadata,
} from './annotation-stage';
function makeToken(overrides: Partial<MergedToken> = {}): MergedToken {
return {
@@ -50,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
assert.equal(surfaceResult[0]?.isKnown, false);
});
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
const tokens = [
makeToken({
surface: '大体',
headword: '大体',
reading: 'だいたい',
frequencyRank: 1895,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'だいたい',
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
}),
);
assert.equal(result[0]?.isKnown, true);
assert.equal(result[0]?.jlptLevel, 'N4');
assert.equal(result[0]?.frequencyRank, 1895);
});
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
const tokens = [
makeToken({
@@ -150,6 +178,278 @@ test('annotateTokens handles JLPT disabled and eligibility exclusion paths', ()
assert.equal(excludedLookupCalls, 0);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory ending variants', () => {
const tokens = [
makeToken({
surface: 'んです',
headword: 'ん',
reading: 'ンデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のだ',
headword: 'の',
reading: 'ノダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'んだ',
headword: 'ん',
reading: 'ンダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のです',
headword: 'の',
reading: 'ノデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'なんです',
headword: 'だ',
reading: 'ナンデス',
pos1: '助動詞|名詞|助動詞',
pos2: '|非自立',
}),
makeToken({
surface: 'んでした',
headword: 'ん',
reading: 'ンデシタ',
pos1: '助動詞|助動詞|助動詞',
}),
makeToken({
surface: 'のでは',
headword: 'の',
reading: 'ノデハ',
pos1: '助詞|接続詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory pondering endings', () => {
const token = makeToken({
surface: 'のかな',
headword: 'の',
reading: 'ノカナ',
pos1: '名詞|助動詞',
pos2: '非自立',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes auxiliary-stem そうだ grammar tails', () => {
const token = makeToken({
surface: 'そうだ',
headword: 'そうだ',
reading: 'ソウダ',
pos1: '名詞|助動詞',
pos2: '特殊',
pos3: '助動詞語幹',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside explanatory ending family', () => {
const token = makeToken({
surface: '問題',
headword: '問題',
reading: 'モンダイ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
pos2: '一般',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
const tokens = [
makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
}),
makeToken({
surface: 'です',
headword: 'です',
reading: 'デス',
partOfSpeech: PartOfSpeech.bound_auxiliary,
pos1: '助動詞',
}),
makeToken({
surface: 'この',
headword: 'この',
reading: 'コノ',
partOfSpeech: PartOfSpeech.other,
pos1: '連体詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps mixed content tokens with trailing helpers', () => {
const token = makeToken({
surface: '行きます',
headword: '行く',
reading: 'イキマス',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞|助動詞',
pos2: '自立',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes merged lexical tokens with trailing quote particles', () => {
const token = makeToken({
surface: 'どうしてもって',
headword: 'どうしても',
reading: 'ドウシテモッテ',
partOfSpeech: PartOfSpeech.other,
pos1: '副詞|助詞',
pos2: '一般|格助詞',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes kana-only demonstrative helper merges', () => {
const token = makeToken({
surface: 'これで',
headword: 'これ',
reading: 'コレデ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞|助詞',
pos2: '代名詞|格助詞',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
const token = makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isKnown: true,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
frequencyRank: 12,
});
assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
});
});
test('stripSubtitleAnnotationMetadata leaves content tokens unchanged', () => {
const token = makeToken({
surface: '猫',
headword: '猫',
reading: 'ネコ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isKnown: true,
jlptLevel: 'N5',
frequencyRank: 42,
});
assert.strictEqual(stripSubtitleAnnotationMetadata(token), token);
});
test('annotateTokens prioritizes name matches over n+1, frequency, and JLPT when enabled', () => {
let jlptLookupCalls = 0;
const tokens = [
makeToken({
surface: 'オリヴィア',
reading: 'オリヴィア',
headword: 'オリヴィア',
isNameMatch: true,
frequencyRank: 42,
startPos: 0,
endPos: 5,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
getJlptLevel: () => {
jlptLookupCalls += 1;
return 'N2';
},
}),
{
nameMatchEnabled: true,
minSentenceWordsForNPlusOne: 1,
},
);
assert.equal(result[0]?.isNameMatch, true);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
assert.equal(jlptLookupCalls, 0);
});
test('annotateTokens keeps other annotations for name matches when name highlighting is disabled', () => {
let jlptLookupCalls = 0;
const tokens = [
makeToken({
surface: 'オリヴィア',
reading: 'オリヴィア',
headword: 'オリヴィア',
isNameMatch: true,
frequencyRank: 42,
startPos: 0,
endPos: 5,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
getJlptLevel: () => {
jlptLookupCalls += 1;
return 'N2';
},
}),
{
nameMatchEnabled: false,
minSentenceWordsForNPlusOne: 1,
},
);
assert.equal(result[0]?.isNameMatch, true);
assert.equal(result[0]?.isNPlusOneTarget, true);
assert.equal(result[0]?.frequencyRank, 42);
assert.equal(result[0]?.jlptLevel, 'N2');
assert.equal(jlptLookupCalls, 1);
});
test('annotateTokens N+1 handoff marks expected target when threshold is satisfied', () => {
const tokens = [
makeToken({ surface: '私', headword: '私', startPos: 0, endPos: 1 }),
@@ -206,8 +506,8 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
);
assert.equal(result[0]?.isKnown, false);
assert.equal(result[1]?.isKnown, true);
assert.equal(result[2]?.isKnown, true);
assert.equal(result[1]?.isKnown, false);
assert.equal(result[2]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
});
@@ -293,6 +593,32 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+
assert.equal(result[0]?.isNPlusOneTarget, false);
});
test('annotateTokens clears all annotations for non-independent kanji noun tokens under unified gate', () => {
const tokens = [
makeToken({
surface: '者',
reading: 'もの',
headword: '者',
partOfSpeech: PartOfSpeech.other,
pos1: '名詞',
pos2: '非自立',
pos3: '一般',
startPos: 0,
endPos: 1,
frequencyRank: 475,
}),
];
const result = annotateTokens(tokens, makeDeps(), {
minSentenceWordsForNPlusOne: 1,
});
assert.equal(result[0]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
});
test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => {
const tokens = [
makeToken({
@@ -444,3 +770,33 @@ test('annotateTokens excludes composite tokens when all component pos tags are e
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.isNPlusOneTarget, false);
});
test('annotateTokens applies one shared exclusion gate across known N+1 frequency and JLPT', () => {
const tokens = [
makeToken({
surface: 'これで',
headword: 'これ',
reading: 'コレデ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞|助詞',
pos2: '代名詞|格助詞',
startPos: 0,
endPos: 3,
frequencyRank: 9,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'これ',
getJlptLevel: (text) => (text === 'これ' ? 'N5' : null),
}),
{ minSentenceWordsForNPlusOne: 1 },
);
assert.equal(result[0]?.isKnown, false);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[0]?.frequencyRank, undefined);
assert.equal(result[0]?.jlptLevel, undefined);
});
+387 -88
View File
@@ -9,11 +9,65 @@ import {
} from '../../../token-pos2-exclusions';
import { JlptLevel, MergedToken, NPlusOneMatchMode, PartOfSpeech } from '../../../types';
import { shouldIgnoreJlptByTerm, shouldIgnoreJlptForMecabPos1 } from '../jlpt-token-filter';
import {
shouldExcludeTokenFromSubtitleAnnotations as sharedShouldExcludeTokenFromSubtitleAnnotations,
stripSubtitleAnnotationMetadata as sharedStripSubtitleAnnotationMetadata,
} from './subtitle-annotation-filter';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'ああ',
'ええ',
'うう',
'おお',
'はあ',
'はは',
'へえ',
'ふう',
'ほう',
]);
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
'だ',
'です',
'でした',
'だった',
'では',
'じゃ',
'でしょう',
'だろう',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
'',
'か',
'ね',
'よ',
'な',
'よね',
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
(particle) => `${prefix}${core}${particle}`,
),
),
),
);
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
'って',
'ってよ',
'ってね',
'ってな',
'ってさ',
'ってか',
'ってば',
]);
const jlptLevelLookupCaches = new WeakMap<
(text: string) => JlptLevel | null,
@@ -28,6 +82,7 @@ export interface AnnotationStageDeps {
export interface AnnotationStageOptions {
nPlusOneEnabled?: boolean;
nameMatchEnabled?: boolean;
jlptEnabled?: boolean;
frequencyEnabled?: boolean;
minSentenceWordsForNPlusOne?: number;
@@ -43,33 +98,27 @@ function resolveKnownWordText(
return matchMode === 'surface' ? surface : headword;
}
function applyKnownWordMarking(
tokens: MergedToken[],
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): MergedToken[] {
return tokens.map((token) => {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
return {
...token,
isKnown: token.isKnown || (matchText ? isKnownWord(matchText) : false),
};
});
}
function normalizePos1Tag(pos1: string | undefined): string {
return typeof pos1 === 'string' ? pos1.trim() : '';
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const SUBTITLE_ANNOTATION_EXCLUDED_POS1 = new Set(['感動詞']);
const SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1 = new Set(['助詞', '助動詞', '連体詞']);
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
return false;
return [];
}
const parts = normalizedTag
return normalizedTag
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const parts = splitNormalizedTagParts(normalizedTag);
if (parts.length === 0) {
return false;
}
@@ -78,6 +127,50 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
return parts.some((part) => exclusions.has(part));
}
function isExcludedFromSubtitleAnnotationsByPos1(normalizedPos1: string): boolean {
const parts = splitNormalizedTagParts(normalizedPos1);
if (parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part))) {
return true;
}
return parts.length > 0 && parts.every((part) => SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(part));
}
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
const normalizedSurface = normalizeJlptTextForExclusion(token.surface);
const normalizedHeadword = normalizeJlptTextForExclusion(token.headword);
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
return false;
}
const suffix = normalizedSurface.slice(normalizedHeadword.length);
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
if (pos1Parts.length < 2) {
return false;
}
const [leadingPos1, ...trailingPos1] = pos1Parts;
if (!leadingPos1 || SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(leadingPos1)) {
return false;
}
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
}
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
return false;
}
const pos3Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos3));
return pos3Parts.includes('助動詞語幹');
}
function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
if (options.pos1Exclusions) {
return options.pos1Exclusions;
@@ -98,6 +191,61 @@ function normalizePos2Tag(pos2: string | undefined): string {
return typeof pos2 === 'string' ? pos2.trim() : '';
}
function hasKanjiChar(text: string): boolean {
for (const char of text) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (
(code >= 0x3400 && code <= 0x4dbf) ||
(code >= 0x4e00 && code <= 0x9fff) ||
(code >= 0xf900 && code <= 0xfaff)
) {
return true;
}
}
return false;
}
function isExcludedComponent(
pos1: string | undefined,
pos2: string | undefined,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
return (
(typeof pos1 === 'string' && pos1Exclusions.has(pos1)) ||
(typeof pos2 === 'string' && pos2Exclusions.has(pos2))
);
}
function shouldAllowContentLedMergedTokenFrequency(
normalizedPos1: string,
normalizedPos2: string,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): boolean {
const pos1Parts = splitNormalizedTagParts(normalizedPos1);
if (pos1Parts.length < 2) {
return false;
}
const pos2Parts = splitNormalizedTagParts(normalizedPos2);
if (isExcludedComponent(pos1Parts[0], pos2Parts[0], pos1Exclusions, pos2Exclusions)) {
return false;
}
const componentCount = Math.max(pos1Parts.length, pos2Parts.length);
for (let index = 1; index < componentCount; index += 1) {
if (!isExcludedComponent(pos1Parts[index], pos2Parts[index], pos1Exclusions, pos2Exclusions)) {
return false;
}
}
return true;
}
function isFrequencyExcludedByPos(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
@@ -109,13 +257,20 @@ function isFrequencyExcludedByPos(
const normalizedPos1 = normalizePos1Tag(token.pos1);
const hasPos1 = normalizedPos1.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
const allowContentLedMergedToken = shouldAllowContentLedMergedTokenFrequency(
normalizedPos1,
normalizedPos2,
pos1Exclusions,
pos2Exclusions,
);
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions) && !allowContentLedMergedToken) {
return true;
}
const normalizedPos2 = normalizePos2Tag(token.pos2);
const hasPos2 = normalizedPos2.length > 0;
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions) && !allowContentLedMergedToken) {
return true;
}
@@ -133,26 +288,43 @@ function isFrequencyExcludedByPos(
);
}
function applyFrequencyMarking(
tokens: MergedToken[],
function shouldKeepFrequencyForNonIndependentKanjiNoun(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): MergedToken[] {
return tokens.map((token) => {
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
return { ...token, frequencyRank: undefined };
}
): boolean {
if (pos1Exclusions.has('名詞')) {
return false;
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
const rank = Math.max(1, Math.floor(token.frequencyRank));
return { ...token, frequencyRank: rank };
}
const rank =
typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)
? Math.max(1, Math.floor(token.frequencyRank))
: null;
if (rank === null) {
return false;
}
return {
...token,
frequencyRank: undefined,
};
});
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
const pos2Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos2));
if (pos1Parts.length !== 1 || pos2Parts.length !== 1) {
return false;
}
if (pos1Parts[0] !== '名詞' || pos2Parts[0] !== '非自立') {
return false;
}
return hasKanjiChar(token.surface) || hasKanjiChar(token.headword);
}
export function shouldExcludeTokenFromVocabularyPersistence(
token: MergedToken,
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
): boolean {
return isFrequencyExcludedByPos(
token,
resolvePos1Exclusions(options),
resolvePos2Exclusions(options),
);
}
function getCachedJlptLevel(
@@ -312,6 +484,23 @@ function isReduplicatedKanaSfx(text: string): boolean {
return chars.slice(0, half).join('') === chars.slice(half).join('');
}
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
const normalized = normalizeJlptTextForExclusion(text);
if (!normalized) {
return false;
}
if (isReduplicatedKanaSfx(normalized)) {
return true;
}
if (normalized.length <= 1 || !normalized.endsWith('と')) {
return false;
}
return isReduplicatedKanaSfx(normalized.slice(0, -1));
}
function hasAdjacentKanaRepeat(text: string): boolean {
const normalized = normalizeJlptTextForExclusion(text);
if (!normalized) {
@@ -386,12 +575,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return false;
}
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.reading,
token.headword,
].filter(
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -414,24 +598,110 @@ function isJlptEligibleToken(token: MergedToken): boolean {
return true;
}
function applyJlptMarking(
tokens: MergedToken[],
getJlptLevel: (text: string) => JlptLevel | null,
): MergedToken[] {
return tokens.map((token) => {
if (!isJlptEligibleToken(token)) {
return { ...token, jlptLevel: undefined };
function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
const candidates = [token.surface, token.reading, resolveJlptLookupText(token)].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
for (const candidate of candidates) {
const trimmedCandidate = candidate.trim();
if (!trimmedCandidate) {
continue;
}
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
const normalizedCandidate = normalizeJlptTextForExclusion(trimmedCandidate);
if (!normalizedCandidate) {
continue;
}
return {
...token,
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
};
});
if (
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalizedCandidate)
) {
return true;
}
if (
isTrailingSmallTsuKanaSfx(trimmedCandidate) ||
isTrailingSmallTsuKanaSfx(normalizedCandidate) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmedCandidate) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalizedCandidate)
) {
return true;
}
}
return false;
}
export function shouldExcludeTokenFromSubtitleAnnotations(token: MergedToken): boolean {
return sharedShouldExcludeTokenFromSubtitleAnnotations(token);
}
export function stripSubtitleAnnotationMetadata(token: MergedToken): MergedToken {
return sharedStripSubtitleAnnotationMetadata(token);
}
function computeTokenKnownStatus(
token: MergedToken,
isKnownWord: (text: string) => boolean,
knownWordMatchMode: NPlusOneMatchMode,
): boolean {
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
return true;
}
const normalizedReading = token.reading.trim();
if (!normalizedReading) {
return false;
}
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
}
function filterTokenFrequencyRank(
token: MergedToken,
pos1Exclusions: ReadonlySet<string>,
pos2Exclusions: ReadonlySet<string>,
): number | undefined {
if (
isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions) &&
!shouldKeepFrequencyForNonIndependentKanjiNoun(token, pos1Exclusions)
) {
return undefined;
}
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
return Math.max(1, Math.floor(token.frequencyRank));
}
return undefined;
}
function computeTokenJlptLevel(
token: MergedToken,
getJlptLevel: (text: string) => JlptLevel | null,
): JlptLevel | undefined {
if (!isJlptEligibleToken(token)) {
return undefined;
}
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
const fallbackLevel =
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
const level = primaryLevel ?? fallbackLevel ?? token.jlptLevel;
return level ?? undefined;
}
function hasPrioritizedNameMatch(
token: MergedToken,
options: Pick<AnnotationStageOptions, 'nameMatchEnabled'>,
): boolean {
return options.nameMatchEnabled !== false && token.isNameMatch === true;
}
export function annotateTokens(
@@ -442,36 +712,50 @@ export function annotateTokens(
const pos1Exclusions = resolvePos1Exclusions(options);
const pos2Exclusions = resolvePos2Exclusions(options);
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
const knownMarkedTokens = nPlusOneEnabled
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
: tokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
const nameMatchEnabled = options.nameMatchEnabled !== false;
const frequencyEnabled = options.frequencyEnabled !== false;
const frequencyMarkedTokens = frequencyEnabled
? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
: knownMarkedTokens.map((token) => ({
...token,
frequencyRank: undefined,
}));
const jlptEnabled = options.jlptEnabled !== false;
const jlptMarkedTokens = jlptEnabled
? applyJlptMarking(frequencyMarkedTokens, deps.getJlptLevel)
: frequencyMarkedTokens.map((token) => ({
...token,
jlptLevel: undefined,
}));
// Single pass: compute known word status, frequency filtering, and JLPT level together
const annotated = tokens.map((token) => {
if (
sharedShouldExcludeTokenFromSubtitleAnnotations(token, {
pos1Exclusions,
pos2Exclusions,
})
) {
return sharedStripSubtitleAnnotationMetadata(token, {
pos1Exclusions,
pos2Exclusions,
});
}
const prioritizedNameMatch = nameMatchEnabled && token.isNameMatch === true;
const isKnown = nPlusOneEnabled
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
: false;
const frequencyRank =
frequencyEnabled && !prioritizedNameMatch
? filterTokenFrequencyRank(token, pos1Exclusions, pos2Exclusions)
: undefined;
const jlptLevel =
jlptEnabled && !prioritizedNameMatch
? computeTokenJlptLevel(token, deps.getJlptLevel)
: undefined;
return {
...token,
isKnown,
isNPlusOneTarget: nPlusOneEnabled && !prioritizedNameMatch ? token.isNPlusOneTarget : false,
frequencyRank,
jlptLevel,
};
});
if (!nPlusOneEnabled) {
return jlptMarkedTokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
return annotated;
}
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
@@ -482,10 +766,25 @@ export function annotateTokens(
? minSentenceWordsForNPlusOne
: 3;
return markNPlusOneTargets(
jlptMarkedTokens,
const nPlusOneMarked = markNPlusOneTargets(
annotated,
sanitizedMinSentenceWordsForNPlusOne,
pos1Exclusions,
pos2Exclusions,
);
if (!nameMatchEnabled) {
return nPlusOneMarked;
}
return nPlusOneMarked.map((token) =>
hasPrioritizedNameMatch(token, options)
? {
...token,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
}
: token,
);
}
@@ -212,3 +212,57 @@ test('merges trailing katakana continuation without headword into previous token
],
);
});
// Regression: merged content+function token candidate must not beat a multi-token split
// candidate that preserves the content token as a standalone frequency-eligible unit.
// Background: Yomitan scanning can produce a single-token candidate where a content word
// is merged with trailing function particles (e.g. かかってこいよ → headword かかってくる).
// When a competing multi-token candidate splits content and function separately, the
// multi-token candidate should win so the content token remains frequency-highlightable.
test('multi-token candidate beats single merged content+function token candidate (frequency regression)', () => {
// Candidate A: single merged token — content verb fused with trailing sentence-final particle
// This is the "bad" candidate: downstream annotation would exclude frequency for the whole
// token because the merged pos1 would contain a function-word component.
const mergedCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
]);
// Candidate B: two tokens — content verb surface + particle separately.
// The content token is frequency-eligible on its own.
const splitCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
]);
// When merged candidate comes first in the array, multi-token split still wins.
const tokens = selectYomitanParseTokens(
[mergedCandidate, splitCandidate],
() => false,
'headword',
);
assert.equal(tokens?.length, 2);
assert.equal(tokens?.[0]?.surface, 'かかってこい');
assert.equal(tokens?.[0]?.headword, 'かかってくる');
assert.equal(tokens?.[1]?.surface, 'よ');
});
test('multi-token candidate beats single merged content+function token regardless of input order', () => {
const mergedCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
]);
const splitCandidate = makeParseItem('scanning-parser', [
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
]);
// Split candidate comes first — should still win over merged.
const tokens = selectYomitanParseTokens(
[splitCandidate, mergedCandidate],
() => false,
'headword',
);
assert.equal(tokens?.length, 2);
assert.equal(tokens?.[0]?.surface, 'かかってこい');
assert.equal(tokens?.[1]?.surface, 'よ');
});
@@ -0,0 +1,56 @@
import { PartOfSpeech } from '../../../types';
function normalizePosTag(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
}
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
switch (normalizePosTag(pos1)) {
case '名詞':
return PartOfSpeech.noun;
case '動詞':
return PartOfSpeech.verb;
case '形容詞':
return PartOfSpeech.i_adjective;
case '形状詞':
case '形容動詞':
return PartOfSpeech.na_adjective;
case '助詞':
return PartOfSpeech.particle;
case '助動詞':
return PartOfSpeech.bound_auxiliary;
case '記号':
case '補助記号':
return PartOfSpeech.symbol;
default:
return PartOfSpeech.other;
}
}
export function deriveStoredPartOfSpeech(input: {
partOfSpeech?: string | null;
pos1?: string | null;
}): PartOfSpeech {
const pos1Parts = normalizePosTag(input.pos1)
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
if (pos1Parts.length > 0) {
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
if (derivedParts.length === 1) {
return derivedParts[0]!;
}
return PartOfSpeech.other;
}
if (isPartOfSpeechValue(input.partOfSpeech)) {
return input.partOfSpeech;
}
return PartOfSpeech.other;
}
@@ -0,0 +1,352 @@
import {
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
resolveAnnotationPos1ExclusionSet,
} from '../../../token-pos1-exclusions';
import {
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
resolveAnnotationPos2ExclusionSet,
} from '../../../token-pos2-exclusions';
import { MergedToken, PartOfSpeech } from '../../../types';
import { shouldIgnoreJlptByTerm } from '../jlpt-token-filter';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'ああ',
'ええ',
'うう',
'おお',
'はあ',
'はは',
'へえ',
'ふう',
'ほう',
]);
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
'だ',
'です',
'でした',
'だった',
'では',
'じゃ',
'でしょう',
'だろう',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
'',
'か',
'ね',
'よ',
'な',
'よね',
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES = ['か', 'かな', 'かね'] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
(particle) => `${prefix}${core}${particle}`,
),
),
),
);
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
'って',
'ってよ',
'ってね',
'ってな',
'ってさ',
'ってか',
'ってば',
]);
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
export interface SubtitleAnnotationFilterOptions {
pos1Exclusions?: ReadonlySet<string>;
pos2Exclusions?: ReadonlySet<string>;
}
function normalizePosTag(pos: string | undefined): string {
return typeof pos === 'string' ? pos.trim() : '';
}
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
return [];
}
return normalizedTag
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
}
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
const parts = splitNormalizedTagParts(normalizedTag);
if (parts.length === 0) {
return false;
}
return parts.every((part) => exclusions.has(part));
}
function resolvePos1Exclusions(
options: SubtitleAnnotationFilterOptions = {},
): ReadonlySet<string> {
if (options.pos1Exclusions) {
return options.pos1Exclusions;
}
return resolveAnnotationPos1ExclusionSet(DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG);
}
function resolvePos2Exclusions(
options: SubtitleAnnotationFilterOptions = {},
): ReadonlySet<string> {
if (options.pos2Exclusions) {
return options.pos2Exclusions;
}
return resolveAnnotationPos2ExclusionSet(DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG);
}
function normalizeKana(text: string): string {
const raw = text.trim();
if (!raw) {
return '';
}
let normalized = '';
for (const char of raw) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
continue;
}
normalized += char;
}
return normalized;
}
function isKanaChar(char: string): boolean {
const code = char.codePointAt(0);
if (code === undefined) {
return false;
}
return (
(code >= 0x3041 && code <= 0x3096) ||
(code >= 0x309b && code <= 0x309f) ||
code === 0x30fc ||
(code >= 0x30a0 && code <= 0x30fa) ||
(code >= 0x30fd && code <= 0x30ff)
);
}
function isTrailingSmallTsuKanaSfx(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
const chars = [...normalized];
if (chars.length < 2 || chars.length > 4) {
return false;
}
if (!chars.every(isKanaChar)) {
return false;
}
return chars[chars.length - 1] === 'っ';
}
function isReduplicatedKanaSfx(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
const chars = [...normalized];
if (chars.length < 4 || chars.length % 2 !== 0) {
return false;
}
if (!chars.every(isKanaChar)) {
return false;
}
const half = chars.length / 2;
return chars.slice(0, half).join('') === chars.slice(half).join('');
}
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
const normalized = normalizeKana(text);
if (!normalized) {
return false;
}
if (isReduplicatedKanaSfx(normalized)) {
return true;
}
if (normalized.length <= 1 || !normalized.endsWith('と')) {
return false;
}
return isReduplicatedKanaSfx(normalized.slice(0, -1));
}
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
const normalizedSurface = normalizeKana(token.surface);
const normalizedHeadword = normalizeKana(token.headword);
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
return false;
}
const suffix = normalizedSurface.slice(normalizedHeadword.length);
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
if (pos1Parts.length < 2) {
return false;
}
const [leadingPos1, ...trailingPos1] = pos1Parts;
if (!leadingPos1 || resolvePos1Exclusions().has(leadingPos1)) {
return false;
}
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
}
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
return false;
}
const pos3Parts = splitNormalizedTagParts(normalizePosTag(token.pos3));
return pos3Parts.includes('助動詞語幹');
}
function isExcludedByTerm(token: MergedToken): boolean {
const candidates = [token.surface, token.reading, token.headword].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
for (const candidate of candidates) {
const trimmed = candidate.trim();
if (!trimmed) {
continue;
}
const normalized = normalizeKana(trimmed);
if (!normalized) {
continue;
}
if (
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.some((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES.some(
(suffix) => normalized === `${prefix}${suffix}`,
),
)
) {
return true;
}
if (
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmed) ||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalized) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmed) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalized) ||
shouldIgnoreJlptByTerm(trimmed) ||
shouldIgnoreJlptByTerm(normalized)
) {
return true;
}
if (
isTrailingSmallTsuKanaSfx(trimmed) ||
isTrailingSmallTsuKanaSfx(normalized) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmed) ||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalized)
) {
return true;
}
}
return false;
}
export function shouldExcludeTokenFromSubtitleAnnotations(
token: MergedToken,
options: SubtitleAnnotationFilterOptions = {},
): boolean {
const pos1Exclusions = resolvePos1Exclusions(options);
const pos2Exclusions = resolvePos2Exclusions(options);
const normalizedPos1 = normalizePosTag(token.pos1);
const normalizedPos2 = normalizePosTag(token.pos2);
const hasPos1 = normalizedPos1.length > 0;
const hasPos2 = normalizedPos2.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
return true;
}
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
return true;
}
if (
!hasPos1 &&
!hasPos2 &&
(token.partOfSpeech === PartOfSpeech.particle ||
token.partOfSpeech === PartOfSpeech.bound_auxiliary ||
token.partOfSpeech === PartOfSpeech.symbol)
) {
return true;
}
if (isAuxiliaryStemGrammarTailToken(token)) {
return true;
}
if (isExcludedTrailingParticleMergedToken(token)) {
return true;
}
return isExcludedByTerm(token);
}
export function stripSubtitleAnnotationMetadata(
token: MergedToken,
options: SubtitleAnnotationFilterOptions = {},
): MergedToken {
if (!shouldExcludeTokenFromSubtitleAnnotations(token, options)) {
return token;
}
return {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
};
}
@@ -188,6 +188,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
{
term: '猫',
reading: 'ねこ',
hasReading: true,
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 77,
@@ -197,6 +198,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
{
term: '鍛える',
reading: 'きたえる',
hasReading: false,
dictionary: 'freq-dict',
dictionaryPriority: 1,
frequency: 46961,
@@ -217,9 +219,11 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
assert.equal(result.length, 2);
assert.equal(result[0]?.term, '猫');
assert.equal(result[0]?.hasReading, true);
assert.equal(result[0]?.frequency, 77);
assert.equal(result[0]?.dictionaryPriority, 0);
assert.equal(result[1]?.term, '鍛える');
assert.equal(result[1]?.hasReading, false);
assert.equal(result[1]?.frequency, 2847);
assert.match(scriptValue, /getTermFrequencies/);
assert.match(scriptValue, /optionsGetFull/);
@@ -247,6 +251,96 @@ test('requestYomitanTermFrequencies prefers primary rank from displayValue array
assert.equal(result[0]?.frequency, 7141);
});
test('requestYomitanTermFrequencies prefers primary rank from displayValue string pair when raw frequency matches trailing count', async () => {
const deps = createDeps(async () => [
{
term: '潜む',
reading: 'ひそむ',
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 121,
displayValue: '118,121',
displayValueParsed: false,
},
]);
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
error: () => undefined,
});
assert.equal(result.length, 1);
assert.equal(result[0]?.term, '潜む');
assert.equal(result[0]?.frequency, 118);
});
test('requestYomitanTermFrequencies uses leading display digits for displayValue strings', async () => {
const deps = createDeps(async () => [
{
term: '例',
reading: 'れい',
dictionary: 'freq-dict',
dictionaryPriority: 0,
frequency: 1234,
displayValue: '1,234',
displayValueParsed: false,
},
]);
const result = await requestYomitanTermFrequencies([{ term: '例', reading: 'れい' }], deps, {
error: () => undefined,
});
assert.equal(result.length, 1);
assert.equal(result[0]?.term, '例');
assert.equal(result[0]?.frequency, 1);
});
test('requestYomitanTermFrequencies ignores occurrence-based dictionaries for rank tagging', async () => {
let metadataScript = '';
const deps = createDeps(async (script) => {
if (script.includes('getTermFrequencies')) {
return [
{
term: '潜む',
reading: 'ひそむ',
dictionary: 'CC100',
frequency: 118121,
displayValue: null,
displayValueParsed: false,
},
];
}
if (script.includes('optionsGetFull')) {
metadataScript = script;
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['CC100'],
dictionaryPriorityByName: { CC100: 0 },
dictionaryFrequencyModeByName: { CC100: 'occurrence-based' },
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [{ name: 'CC100', enabled: true, id: 0 }],
},
},
],
};
}
return [];
});
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
error: () => undefined,
});
assert.deepEqual(result, []);
assert.match(metadataScript, /getDictionaryInfo/);
});
test('requestYomitanTermFrequencies requests term-only fallback only after reading miss', async () => {
const frequencyScripts: string[] = [];
const deps = createDeps(async (script) => {
@@ -485,6 +579,317 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of
assert.match(scannerScript ?? '', /deinflect:\s*true/);
});
test('requestYomitanScanTokens extracts best frequency rank from selected termsFind entry', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('潜み', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('潜み')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 2,
dictionaryEntries: [
{
headwords: [
{
term: '潜む',
reading: 'ひそむ',
sources: [{ originalText: '潜み', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 20181,
displayValue: '4073,20181句',
},
{
headwordIndex: 0,
dictionary: 'Jiten',
frequency: 28594,
displayValue: '4592,28594句',
},
{
headwordIndex: 0,
dictionary: 'CC100',
frequency: 118121,
displayValue: null,
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '潜み',
reading: 'ひそ',
headword: '潜む',
startPos: 0,
endPos: 2,
isNameMatch: false,
frequencyRank: 4073,
},
]);
});
test('requestYomitanScanTokens uses frequency from later exact-match entry when first exact entry has none', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('者', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('者')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 1,
dictionaryEntries: [
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [],
},
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 79601,
displayValue: '475,79601句',
},
{
headwordIndex: 0,
dictionary: 'Jiten',
frequency: 338,
displayValue: '338',
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '者',
reading: 'もの',
headword: '者',
startPos: 0,
endPos: 1,
isNameMatch: false,
frequencyRank: 475,
},
]);
});
test('requestYomitanScanTokens can use frequency from later exact secondary-match entry', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profileIndex: 0,
scanLength: 40,
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
dictionaryPriorityByName: {
'JPDBv2㋕': 0,
Jiten: 1,
CC100: 2,
},
dictionaryFrequencyModeByName: {
'JPDBv2㋕': 'rank-based',
Jiten: 'rank-based',
CC100: 'rank-based',
},
profiles: [
{
options: {
scanning: { length: 40 },
dictionaries: [
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
{ name: 'Jiten', enabled: true, id: 1 },
{ name: 'CC100', enabled: true, id: 2 },
],
},
},
],
};
}
return null;
});
await requestYomitanScanTokens('者', deps, {
error: () => undefined,
});
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action !== 'termsFind') {
throw new Error(`unexpected action: ${action}`);
}
const text = (params as { text?: string } | undefined)?.text ?? '';
if (!text.startsWith('者')) {
return { originalTextLength: 0, dictionaryEntries: [] };
}
return {
originalTextLength: 1,
dictionaryEntries: [
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
},
],
frequencies: [],
},
{
headwords: [
{
term: '者',
reading: 'もの',
sources: [{ originalText: '者', isPrimary: false, matchType: 'exact' }],
},
],
frequencies: [
{
headwordIndex: 0,
dictionary: 'JPDBv2㋕',
frequency: 79601,
displayValue: '475,79601句',
},
],
},
],
};
});
assert.deepEqual(result, [
{
surface: '者',
reading: 'もの',
headword: '者',
startPos: 0,
endPos: 1,
isNameMatch: false,
frequencyRank: 475,
},
]);
});
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
const deps = createDeps(async (script) => {
if (script.includes('optionsGetFull')) {
@@ -20,19 +20,24 @@ interface YomitanParserRuntimeDeps {
createYomitanExtensionWindow?: (pageName: string) => Promise<BrowserWindow | null>;
}
type YomitanFrequencyMode = 'occurrence-based' | 'rank-based';
export interface YomitanDictionaryInfo {
title: string;
revision?: string | number;
frequencyMode?: YomitanFrequencyMode;
}
export interface YomitanTermFrequency {
term: string;
reading: string | null;
hasReading: boolean;
dictionary: string;
dictionaryPriority: number;
frequency: number;
displayValue: string | null;
displayValueParsed: boolean;
frequencyDerivedFromDisplayValue: boolean;
}
export interface YomitanTermReadingPair {
@@ -47,6 +52,7 @@ export interface YomitanScanToken {
startPos: number;
endPos: number;
isNameMatch?: boolean;
frequencyRank?: number;
}
interface YomitanProfileMetadata {
@@ -54,6 +60,7 @@ interface YomitanProfileMetadata {
scanLength: number;
dictionaries: string[];
dictionaryPriorityByName: Record<string, number>;
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>;
}
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
@@ -78,7 +85,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
typeof entry.headword === 'string' &&
typeof entry.startPos === 'number' &&
typeof entry.endPos === 'number' &&
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean') &&
(entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number'),
)
);
}
@@ -117,24 +125,22 @@ function parsePositiveFrequencyString(value: string): number | null {
return null;
}
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
if (!numericPrefix) {
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) {
return null;
}
const chunks = numericPrefix.split(',');
const normalizedNumber =
chunks.length <= 1
? (chunks[0] ?? '')
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
? chunks.join('')
: (chunks[0] ?? '');
const parsed = Number.parseInt(normalizedNumber, 10);
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) {
return null;
}
return parsed;
const normalized = Math.floor(parsed);
if (!Number.isFinite(normalized) || normalized <= 0) {
return null;
}
return normalized;
}
function parsePositiveFrequencyValue(value: unknown): number | null {
@@ -159,6 +165,19 @@ function parsePositiveFrequencyValue(value: unknown): number | null {
return null;
}
function parseDisplayFrequencyValue(value: unknown): number | null {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) {
return null;
}
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyValue(value);
}
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
if (!isObject(value)) {
return null;
@@ -170,7 +189,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
const displayValueRaw = value.displayValue;
const parsedDisplayFrequency =
displayValueRaw !== null && displayValueRaw !== undefined
? parsePositiveFrequencyValue(displayValueRaw)
? parseDisplayFrequencyValue(displayValueRaw)
: null;
const frequency = parsedDisplayFrequency ?? rawFrequency;
if (!term || !dictionary || frequency === null) {
@@ -184,17 +203,20 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
const reading =
value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
const hasReading = value.hasReading === false ? false : reading !== null;
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
const displayValueParsed = value.displayValueParsed === true;
return {
term,
reading,
hasReading,
dictionary,
dictionaryPriority,
frequency,
displayValue,
displayValueParsed,
frequencyDerivedFromDisplayValue: parsedDisplayFrequency !== null,
};
}
@@ -300,17 +322,34 @@ function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null
}
}
const dictionaryFrequencyModeByNameRaw = value.dictionaryFrequencyModeByName;
const dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>> = {};
if (isObject(dictionaryFrequencyModeByNameRaw)) {
for (const [name, frequencyModeRaw] of Object.entries(dictionaryFrequencyModeByNameRaw)) {
const normalizedName = name.trim();
if (!normalizedName) {
continue;
}
if (frequencyModeRaw !== 'occurrence-based' && frequencyModeRaw !== 'rank-based') {
continue;
}
dictionaryFrequencyModeByName[normalizedName] = frequencyModeRaw;
}
}
return {
profileIndex,
scanLength,
dictionaries,
dictionaryPriorityByName,
dictionaryFrequencyModeByName,
};
}
function normalizeFrequencyEntriesWithPriority(
rawResult: unknown[],
dictionaryPriorityByName: Record<string, number>,
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
): YomitanTermFrequency[] {
const normalized: YomitanTermFrequency[] = [];
for (const entry of rawResult) {
@@ -319,6 +358,10 @@ function normalizeFrequencyEntriesWithPriority(
continue;
}
if (dictionaryFrequencyModeByName[frequency.dictionary] === 'occurrence-based') {
continue;
}
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
normalized.push({
...frequency,
@@ -425,8 +468,34 @@ async function requestYomitanProfileMetadata(
acc[entry.name] = index;
return acc;
}, {});
let dictionaryFrequencyModeByName = {};
try {
const dictionaryInfo = await invoke("getDictionaryInfo", undefined);
dictionaryFrequencyModeByName = Array.isArray(dictionaryInfo)
? dictionaryInfo.reduce((acc, entry) => {
if (!entry || typeof entry !== "object" || typeof entry.title !== "string") {
return acc;
}
if (
entry.frequencyMode === "occurrence-based" ||
entry.frequencyMode === "rank-based"
) {
acc[entry.title] = entry.frequencyMode;
}
return acc;
}, {})
: {};
} catch {
dictionaryFrequencyModeByName = {};
}
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
return {
profileIndex,
scanLength,
dictionaries,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
};
})();
`;
@@ -774,7 +843,133 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
}
return segments;
}
function getPreferredHeadword(dictionaryEntries, token) {
function parsePositiveFrequencyNumber(value) {
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
return Math.max(1, Math.floor(value));
}
if (typeof value === 'string') {
const numericMatch = value.trim().match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
if (!numericMatch) { return null; }
const parsed = Number.parseFloat(numericMatch);
if (!Number.isFinite(parsed) || parsed <= 0) { return null; }
return Math.max(1, Math.floor(parsed));
}
if (Array.isArray(value)) {
for (const item of value) {
const parsed = parsePositiveFrequencyNumber(item);
if (parsed !== null) { return parsed; }
}
}
return null;
}
function parseDisplayFrequencyNumber(value) {
if (typeof value === 'string') {
const leadingDigits = value.trim().match(/^\d+/)?.[0];
if (!leadingDigits) { return null; }
const parsed = Number.parseInt(leadingDigits, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
return parsePositiveFrequencyNumber(value);
}
function getFrequencyDictionaryName(frequency) {
const candidates = [
frequency?.dictionary,
frequency?.dictionaryName,
frequency?.name,
frequency?.title,
frequency?.dictionaryTitle,
frequency?.dictionaryAlias
];
for (const candidate of candidates) {
if (typeof candidate === 'string' && candidate.trim().length > 0) {
return candidate.trim();
}
}
return null;
}
function getBestFrequencyRank(dictionaryEntry, headwordIndex, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
let best = null;
const headwordCount = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords.length : 0;
for (const frequency of dictionaryEntry?.frequencies || []) {
if (!frequency || typeof frequency !== 'object') { continue; }
const frequencyHeadwordIndex = frequency.headwordIndex;
if (typeof frequencyHeadwordIndex === 'number') {
if (frequencyHeadwordIndex !== headwordIndex) { continue; }
} else if (headwordCount > 1) {
continue;
}
const dictionary = getFrequencyDictionaryName(frequency);
if (!dictionary) { continue; }
if (dictionaryFrequencyModeByName[dictionary] === 'occurrence-based') { continue; }
const rank =
parseDisplayFrequencyNumber(frequency.displayValue) ??
parsePositiveFrequencyNumber(frequency.frequency);
if (rank === null) { continue; }
const priorityRaw = dictionaryPriorityByName[dictionary];
const fallbackPriority =
typeof frequency.dictionaryIndex === 'number' && Number.isFinite(frequency.dictionaryIndex)
? Math.max(0, Math.floor(frequency.dictionaryIndex))
: Number.MAX_SAFE_INTEGER;
const priority =
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
? Math.max(0, Math.floor(priorityRaw))
: fallbackPriority;
if (best === null || priority < best.priority || (priority === best.priority && rank < best.rank)) {
best = { priority, rank };
}
}
return best?.rank ?? null;
}
function hasExactSource(headword, token, requirePrimary) {
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (requirePrimary && !src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
return true;
}
return false;
}
function collectExactHeadwordMatches(dictionaryEntries, token, requirePrimary) {
const matches = [];
for (const dictionaryEntry of dictionaryEntries || []) {
const headwords = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords : [];
for (let headwordIndex = 0; headwordIndex < headwords.length; headwordIndex += 1) {
const headword = headwords[headwordIndex];
if (!hasExactSource(headword, token, requirePrimary)) { continue; }
matches.push({ dictionaryEntry, headword, headwordIndex });
}
}
return matches;
}
function sameHeadword(match, preferredMatch) {
if (!match || !preferredMatch) {
return false;
}
if (match.headword?.term !== preferredMatch.headword?.term) {
return false;
}
const matchReading = typeof match.headword?.reading === 'string' ? match.headword.reading : '';
const preferredReading =
typeof preferredMatch.headword?.reading === 'string' ? preferredMatch.headword.reading : '';
return matchReading === preferredReading;
}
function getBestFrequencyRankForMatches(matches, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
let best = null;
for (const match of matches) {
const rank = getBestFrequencyRank(
match.dictionaryEntry,
match.headwordIndex,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
);
if (rank === null) { continue; }
if (best === null || rank < best) {
best = rank;
}
}
return best;
}
function getPreferredHeadword(dictionaryEntries, token, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
function appendDictionaryNames(target, value) {
if (!value || typeof value !== 'object') {
return;
@@ -813,36 +1008,33 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
}
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
}
function hasExactPrimarySource(headword, token) {
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (!src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
return true;
}
return false;
}
const exactPrimaryMatches = collectExactHeadwordMatches(dictionaryEntries, token, true);
let matchedNameDictionary = false;
if (includeNameMatchMetadata) {
for (const dictionaryEntry of dictionaryEntries || []) {
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
for (const headword of dictionaryEntry.headwords || []) {
if (!hasExactPrimarySource(headword, token)) { continue; }
for (const match of exactPrimaryMatches) {
if (match.dictionaryEntry !== dictionaryEntry) { continue; }
matchedNameDictionary = true;
break;
}
if (matchedNameDictionary) { break; }
}
}
for (const dictionaryEntry of dictionaryEntries || []) {
for (const headword of dictionaryEntry.headwords || []) {
if (!hasExactPrimarySource(headword, token)) { continue; }
return {
term: headword.term,
reading: headword.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
};
}
const preferredMatch = exactPrimaryMatches[0];
if (preferredMatch) {
const exactFrequencyMatches = collectExactHeadwordMatches(dictionaryEntries, token, false)
.filter((match) => sameHeadword(match, preferredMatch));
return {
term: preferredMatch.headword.term,
reading: preferredMatch.headword.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(preferredMatch.dictionaryEntry),
frequencyRank: getBestFrequencyRankForMatches(
exactFrequencyMatches.length > 0 ? exactFrequencyMatches : exactPrimaryMatches,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
)
};
}
return null;
}
@@ -853,6 +1045,8 @@ function buildYomitanScanningScript(
profileIndex: number,
scanLength: number,
includeNameMatchMetadata: boolean,
dictionaryPriorityByName: Record<string, number>,
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
): string {
return `
(async () => {
@@ -876,6 +1070,8 @@ function buildYomitanScanningScript(
});
${YOMITAN_SCANNING_HELPERS}
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
const dictionaryPriorityByName = ${JSON.stringify(dictionaryPriorityByName)};
const dictionaryFrequencyModeByName = ${JSON.stringify(dictionaryFrequencyModeByName)};
const text = ${JSON.stringify(text)};
const details = {matchType: "exact", deinflect: true};
const tokens = [];
@@ -889,7 +1085,12 @@ ${YOMITAN_SCANNING_HELPERS}
const originalTextLength = typeof result?.originalTextLength === "number" ? result.originalTextLength : 0;
if (dictionaryEntries.length > 0 && originalTextLength > 0 && (originalTextLength !== character.length || isCodePointJapanese(codePoint))) {
const source = substring.substring(0, originalTextLength);
const preferredHeadword = getPreferredHeadword(dictionaryEntries, source);
const preferredHeadword = getPreferredHeadword(
dictionaryEntries,
source,
dictionaryPriorityByName,
dictionaryFrequencyModeByName
);
if (preferredHeadword && typeof preferredHeadword.term === "string") {
const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : "";
const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source);
@@ -900,6 +1101,10 @@ ${YOMITAN_SCANNING_HELPERS}
startPos: i,
endPos: i + originalTextLength,
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
frequencyRank:
typeof preferredHeadword.frequencyRank === "number" && Number.isFinite(preferredHeadword.frequencyRank)
? Math.max(1, Math.floor(preferredHeadword.frequencyRank))
: undefined,
});
i += originalTextLength;
continue;
@@ -1036,6 +1241,8 @@ export async function requestYomitanScanTokens(
profileIndex,
scanLength,
options?.includeNameMatchMetadata === true,
metadata?.dictionaryPriorityByName ?? {},
metadata?.dictionaryFrequencyModeByName ?? {},
),
true,
);
@@ -1099,7 +1306,11 @@ async function fetchYomitanTermFrequencies(
try {
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
return Array.isArray(rawResult)
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
? normalizeFrequencyEntriesWithPriority(
rawResult,
metadata.dictionaryPriorityByName,
metadata.dictionaryFrequencyModeByName,
)
: [];
} catch (err) {
logger.error('Yomitan term frequency request failed:', (err as Error).message);
@@ -1541,10 +1752,15 @@ export async function getYomitanDictionaryInfo(
.map((entry) => {
const title = typeof entry.title === 'string' ? entry.title.trim() : '';
const revision = entry.revision;
const frequencyMode: YomitanFrequencyMode | undefined =
entry.frequencyMode === 'occurrence-based' || entry.frequencyMode === 'rank-based'
? entry.frequencyMode
: undefined;
return {
title,
revision:
typeof revision === 'string' || typeof revision === 'number' ? revision : undefined,
frequencyMode,
};
})
.filter((entry) => entry.title.length > 0);
@@ -1763,3 +1979,34 @@ export async function removeYomitanDictionarySettings(
return await setYomitanSettingsFull(optionsFull, deps, logger);
}
export async function addYomitanNoteViaSearch(
word: string,
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
): Promise<number | null> {
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
return null;
}
const escapedWord = JSON.stringify(word);
const script = `
(async () => {
if (typeof window.__subminerAddNote !== 'function') {
throw new Error('Yomitan search page bridge not initialized');
}
return await window.__subminerAddNote(${escapedWord});
})();
`;
try {
const noteId = await parserWindow.webContents.executeJavaScript(script, true);
return typeof noteId === 'number' ? noteId : null;
} catch (err) {
logger.error('Yomitan addNoteFromWord failed:', (err as Error).message);
return null;
}
}