mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-22 12:11:27 -07:00
feat(stats): add v1 immersion stats dashboard (#19)
This commit is contained in:
1113
src/core/services/__tests__/stats-server.test.ts
Normal file
1113
src/core/services/__tests__/stats-server.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,7 @@ test('guessAnilistMediaInfo uses guessit output when available', async () => {
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'Guessit Title',
|
||||
season: null,
|
||||
episode: 7,
|
||||
source: 'guessit',
|
||||
});
|
||||
@@ -29,6 +30,7 @@ test('guessAnilistMediaInfo falls back to parser when guessit fails', async () =
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'My Anime',
|
||||
season: 1,
|
||||
episode: 3,
|
||||
source: 'fallback',
|
||||
});
|
||||
@@ -52,6 +54,7 @@ test('guessAnilistMediaInfo uses basename for guessit input', async () => {
|
||||
]);
|
||||
assert.deepEqual(result, {
|
||||
title: 'Rascal Does Not Dream of Bunny Girl Senpai',
|
||||
season: null,
|
||||
episode: 1,
|
||||
source: 'guessit',
|
||||
});
|
||||
@@ -67,6 +70,7 @@ test('guessAnilistMediaInfo joins multi-part guessit titles', async () => {
|
||||
});
|
||||
assert.deepEqual(result, {
|
||||
title: 'Rascal Does not Dream of Bunny Girl Senpai',
|
||||
season: null,
|
||||
episode: 1,
|
||||
source: 'guessit',
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
|
||||
|
||||
export interface AnilistMediaGuess {
|
||||
title: string;
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
source: 'guessit' | 'fallback';
|
||||
}
|
||||
@@ -56,7 +57,7 @@ interface AnilistSaveEntryData {
|
||||
};
|
||||
}
|
||||
|
||||
function runGuessit(target: string): Promise<string> {
|
||||
export function runGuessit(target: string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
childProcess.execFile(
|
||||
'guessit',
|
||||
@@ -73,9 +74,9 @@ function runGuessit(target: string): Promise<string> {
|
||||
});
|
||||
}
|
||||
|
||||
type GuessAnilistMediaInfoDeps = {
|
||||
export interface GuessAnilistMediaInfoDeps {
|
||||
runGuessit: (target: string) => Promise<string>;
|
||||
};
|
||||
}
|
||||
|
||||
function firstString(value: unknown): string | null {
|
||||
if (typeof value === 'string') {
|
||||
@@ -215,8 +216,9 @@ export async function guessAnilistMediaInfo(
|
||||
const parsed = JSON.parse(stdout) as Record<string, unknown>;
|
||||
const title = readGuessitTitle(parsed.title);
|
||||
const episode = firstPositiveInteger(parsed.episode);
|
||||
const season = firstPositiveInteger(parsed.season);
|
||||
if (title) {
|
||||
return { title, episode, source: 'guessit' };
|
||||
return { title, season, episode, source: 'guessit' };
|
||||
}
|
||||
} catch {
|
||||
// Ignore guessit failures and fall back to internal parser.
|
||||
@@ -230,6 +232,7 @@ export async function guessAnilistMediaInfo(
|
||||
}
|
||||
return {
|
||||
title: parsed.title.trim(),
|
||||
season: parsed.season,
|
||||
episode: parsed.episode,
|
||||
source: 'fallback',
|
||||
};
|
||||
|
||||
244
src/core/services/anilist/cover-art-fetcher.test.ts
Normal file
244
src/core/services/anilist/cover-art-fetcher.test.ts
Normal file
@@ -0,0 +1,244 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
import { createCoverArtFetcher, stripFilenameTags } from './cover-art-fetcher.js';
|
||||
import { Database } from '../immersion-tracker/sqlite.js';
|
||||
import { ensureSchema, getOrCreateVideoRecord } from '../immersion-tracker/storage.js';
|
||||
import { getCoverArt, upsertCoverArt } from '../immersion-tracker/query.js';
|
||||
import { SOURCE_TYPE_LOCAL } from '../immersion-tracker/types.js';
|
||||
|
||||
function makeDbPath(): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-cover-art-test-'));
|
||||
return path.join(dir, 'immersion.sqlite');
|
||||
}
|
||||
|
||||
function cleanupDbPath(dbPath: string): void {
|
||||
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
|
||||
}
|
||||
|
||||
test('stripFilenameTags normalizes common media-title formats', () => {
|
||||
assert.equal(
|
||||
stripFilenameTags('[Jellyfin/direct] The Eminence in Shadow S01E05 I Am...'),
|
||||
'The Eminence in Shadow',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags(
|
||||
'[Foxtrot] Kono Subarashii Sekai ni Shukufuku wo! S2 - 05: Servitude for this Masked Knight!',
|
||||
),
|
||||
'Kono Subarashii Sekai ni Shukufuku wo!',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags('Kono Subarashii Sekai ni Shukufuku wo! E03: A Panty Treasure'),
|
||||
'Kono Subarashii Sekai ni Shukufuku wo!',
|
||||
);
|
||||
assert.equal(
|
||||
stripFilenameTags(
|
||||
'Little Witch Academia (2017) - S01E05 - 005 - Pact of the Dragon [Bluray-1080p][10bit][h265][FLAC 2.0][JA]-FumeiRaws.mkv',
|
||||
),
|
||||
'Little Witch Academia',
|
||||
);
|
||||
});
|
||||
|
||||
test('fetchIfMissing backfills a missing blob from an existing cover URL', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-test.mkv', {
|
||||
canonicalTitle: 'Cover Fetcher Test',
|
||||
sourcePath: '/tmp/cover-fetcher-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: 7,
|
||||
coverUrl: 'https://images.test/cover.jpg',
|
||||
coverBlob: null,
|
||||
titleRomaji: 'Test Title',
|
||||
titleEnglish: 'Test Title',
|
||||
episodesTotal: 12,
|
||||
});
|
||||
|
||||
const fetchCalls: string[] = [];
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = (async (input: RequestInfo | URL) => {
|
||||
const url = String(input);
|
||||
fetchCalls.push(url);
|
||||
assert.equal(url, 'https://images.test/cover.jpg');
|
||||
return new Response(new Uint8Array([1, 2, 3, 4]), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'image/jpeg' },
|
||||
});
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(
|
||||
db,
|
||||
videoId,
|
||||
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
|
||||
);
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(fetchCalls.length, 1);
|
||||
assert.equal(stored?.coverBlob?.length, 4);
|
||||
assert.equal(stored?.titleEnglish, 'Test Title');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
function createJsonResponse(payload: unknown): Response {
|
||||
return new Response(JSON.stringify(payload), {
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
});
|
||||
}
|
||||
|
||||
test('fetchIfMissing uses guessit primary title and season when available', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-season-test.mkv', {
|
||||
canonicalTitle:
|
||||
'[Jellyfin] Little Witch Academia S02E05 - 025 - Pact of the Dragon (2020) [1080p].mkv',
|
||||
sourcePath: '/tmp/cover-fetcher-season-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const searchCalls: Array<{ search: string }> = [];
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const raw = (init?.body as string | undefined) ?? '';
|
||||
const payload = JSON.parse(raw) as { variables: { search: string } };
|
||||
const search = payload.variables.search;
|
||||
searchCalls.push({ search });
|
||||
|
||||
if (search.includes('Season 2')) {
|
||||
return Promise.resolve(createJsonResponse({ data: { Page: { media: [] } } }));
|
||||
}
|
||||
|
||||
return Promise.resolve(
|
||||
createJsonResponse({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 19,
|
||||
episodes: 24,
|
||||
coverImage: { large: 'https://images.test/cover.jpg', medium: null },
|
||||
title: {
|
||||
romaji: 'Little Witch Academia',
|
||||
english: 'Little Witch Academia',
|
||||
native: null,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
{
|
||||
runGuessit: async () =>
|
||||
JSON.stringify({ title: 'Little Witch Academia', season: 2, episode: 5 }),
|
||||
},
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(db, videoId, 'School Vlog S01E01');
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(searchCalls.length, 2);
|
||||
assert.equal(searchCalls[0]!.search, 'Little Witch Academia Season 2');
|
||||
assert.equal(stored?.anilistId, 19);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('fetchIfMissing falls back to internal parser when guessit throws', async () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/cover-fetcher-fallback-test.mkv', {
|
||||
canonicalTitle: 'School Vlog S01E01',
|
||||
sourcePath: '/tmp/cover-fetcher-fallback-test.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
let requestCount = 0;
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = ((input: RequestInfo | URL, init?: RequestInit) => {
|
||||
requestCount += 1;
|
||||
const raw = (init?.body as string | undefined) ?? '';
|
||||
const payload = JSON.parse(raw) as { variables: { search: string } };
|
||||
assert.equal(payload.variables.search, 'School Vlog');
|
||||
|
||||
return Promise.resolve(
|
||||
createJsonResponse({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 21,
|
||||
episodes: 12,
|
||||
coverImage: { large: 'https://images.test/fallback-cover.jpg', medium: null },
|
||||
title: { romaji: 'School Vlog', english: 'School Vlog', native: null },
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
}) as typeof fetch;
|
||||
|
||||
try {
|
||||
const fetcher = createCoverArtFetcher(
|
||||
{
|
||||
acquire: async () => {},
|
||||
recordResponse: () => {},
|
||||
},
|
||||
console,
|
||||
{
|
||||
runGuessit: async () => {
|
||||
throw new Error('guessit unavailable');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const fetched = await fetcher.fetchIfMissing(db, videoId, 'Ignored Title');
|
||||
const stored = getCoverArt(db, videoId);
|
||||
|
||||
assert.equal(fetched, true);
|
||||
assert.equal(requestCount, 2);
|
||||
assert.equal(stored?.anilistId, 21);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
435
src/core/services/anilist/cover-art-fetcher.ts
Normal file
435
src/core/services/anilist/cover-art-fetcher.ts
Normal file
@@ -0,0 +1,435 @@
|
||||
import type { AnilistRateLimiter } from './rate-limiter';
|
||||
import type { DatabaseSync } from '../immersion-tracker/sqlite';
|
||||
import { getCoverArt, upsertCoverArt, updateAnimeAnilistInfo } from '../immersion-tracker/query';
|
||||
import {
|
||||
guessAnilistMediaInfo,
|
||||
runGuessit,
|
||||
type GuessAnilistMediaInfoDeps,
|
||||
} from './anilist-updater';
|
||||
|
||||
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
|
||||
const NO_MATCH_RETRY_MS = 5 * 60 * 1000;
|
||||
|
||||
const SEARCH_QUERY = `
|
||||
query ($search: String!) {
|
||||
Page(perPage: 5) {
|
||||
media(search: $search, type: ANIME) {
|
||||
id
|
||||
episodes
|
||||
season
|
||||
seasonYear
|
||||
coverImage { large medium }
|
||||
title { romaji english native }
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
interface AnilistMedia {
|
||||
id: number;
|
||||
episodes: number | null;
|
||||
season: string | null;
|
||||
seasonYear: number | null;
|
||||
coverImage: { large: string | null; medium: string | null } | null;
|
||||
title: { romaji: string | null; english: string | null; native: string | null } | null;
|
||||
}
|
||||
|
||||
interface AnilistSearchResponse {
|
||||
data?: {
|
||||
Page?: {
|
||||
media?: AnilistMedia[];
|
||||
};
|
||||
};
|
||||
errors?: Array<{ message?: string }>;
|
||||
}
|
||||
|
||||
export interface CoverArtFetcher {
|
||||
fetchIfMissing(db: DatabaseSync, videoId: number, canonicalTitle: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
interface Logger {
|
||||
info(msg: string, ...args: unknown[]): void;
|
||||
warn(msg: string, ...args: unknown[]): void;
|
||||
error(msg: string, ...args: unknown[]): void;
|
||||
}
|
||||
|
||||
interface CoverArtCandidate {
|
||||
title: string;
|
||||
source: 'guessit' | 'fallback';
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
}
|
||||
|
||||
interface CoverArtFetcherOptions {
|
||||
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
|
||||
}
|
||||
|
||||
export function stripFilenameTags(raw: string): string {
|
||||
let title = raw.replace(/\.[A-Za-z0-9]{2,4}$/, '');
|
||||
|
||||
title = title.replace(/^(?:\s*\[[^\]]*\]\s*)+/, '');
|
||||
title = title.replace(/[._]+/g, ' ');
|
||||
|
||||
// Remove everything from " - S##E##" or " - ###" onward (season/episode markers)
|
||||
title = title.replace(/\s+-\s+S\d+E\d+.*$/i, '');
|
||||
title = title.replace(/\s+-\s+\d{2,}(\s+-\s+\d+)?(\s+-.+)?$/, '');
|
||||
title = title.replace(/\s+S\d+E\d+.*$/i, '');
|
||||
title = title.replace(/\s+S\d+\s*[- ]\s*\d+[: -].*$/i, '');
|
||||
title = title.replace(/\s+E\d+[: -].*$/i, '');
|
||||
title = title.replace(/^S\d+E\d+\s*[- ]\s*/i, '');
|
||||
|
||||
// Remove bracketed/parenthesized tags: [WEBDL-1080p], (2022), etc.
|
||||
title = title.replace(/\s*\[[^\]]*\]\s*/g, ' ');
|
||||
title = title.replace(/\s*\([^)]*\d{4}[^)]*\)\s*/g, ' ');
|
||||
|
||||
// Remove common codec/source tags that may appear without brackets
|
||||
title = title.replace(
|
||||
/\b(WEBDL|WEBRip|BluRay|BDRip|HDTV|DVDRip|x264|x265|H\.?264|H\.?265|AV1|AAC|FLAC|Opus|10bit|8bit|1080p|720p|480p|2160p|4K)\b[-.\w]*/gi,
|
||||
'',
|
||||
);
|
||||
|
||||
// Remove trailing dashes and group tags like "-Retr0"
|
||||
title = title.replace(/\s*-\s*[\w]+$/, '');
|
||||
|
||||
return title.trim().replace(/\s{2,}/g, ' ');
|
||||
}
|
||||
|
||||
function removeSeasonHint(title: string): string {
|
||||
return title
|
||||
.replace(/\bseason\s*\d+\b/gi, '')
|
||||
.replace(/\s{2,}/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function normalizeTitle(text: string): string {
|
||||
return text.trim().toLowerCase().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function extractCandidateSeasonHints(text: string): Set<number> {
|
||||
const normalized = normalizeTitle(text);
|
||||
const matches = [
|
||||
...normalized.matchAll(/\bseason\s*(\d{1,2})\b/gi),
|
||||
...normalized.matchAll(/\bs(\d{1,2})(?:\b|\D)/gi),
|
||||
];
|
||||
const values = new Set<number>();
|
||||
for (const match of matches) {
|
||||
const value = Number.parseInt(match[1]!, 10);
|
||||
if (Number.isInteger(value)) {
|
||||
values.add(value);
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
function isSeasonMentioned(titles: string[], season: number | null): boolean {
|
||||
if (!season) {
|
||||
return false;
|
||||
}
|
||||
const hints = titles.flatMap((title) => [...extractCandidateSeasonHints(title)]);
|
||||
return hints.includes(season);
|
||||
}
|
||||
|
||||
function pickBestSearchResult(
|
||||
title: string,
|
||||
episode: number | null,
|
||||
season: number | null,
|
||||
media: AnilistMedia[],
|
||||
): { id: number; title: string } | null {
|
||||
const cleanedTitle = removeSeasonHint(title);
|
||||
const targets = [title, cleanedTitle]
|
||||
.map(normalizeTitle)
|
||||
.map((value) => value.trim())
|
||||
.filter((value, index, all) => value.length > 0 && all.indexOf(value) === index);
|
||||
|
||||
const filtered =
|
||||
episode === null
|
||||
? media
|
||||
: media.filter((item) => {
|
||||
const total = item.episodes;
|
||||
return total === null || total >= episode;
|
||||
});
|
||||
const candidates = filtered.length > 0 ? filtered : media;
|
||||
if (candidates.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const scored = candidates.map((item) => {
|
||||
const candidateTitles = [item.title?.romaji, item.title?.english, item.title?.native]
|
||||
.filter((value): value is string => typeof value === 'string')
|
||||
.map((value) => normalizeTitle(value));
|
||||
|
||||
let score = 0;
|
||||
|
||||
for (const target of targets) {
|
||||
if (candidateTitles.includes(target)) {
|
||||
score += 120;
|
||||
continue;
|
||||
}
|
||||
if (candidateTitles.some((itemTitle) => itemTitle.includes(target))) {
|
||||
score += 30;
|
||||
}
|
||||
if (candidateTitles.some((itemTitle) => target.includes(itemTitle))) {
|
||||
score += 10;
|
||||
}
|
||||
}
|
||||
|
||||
if (episode !== null && item.episodes === episode) {
|
||||
score += 20;
|
||||
}
|
||||
|
||||
if (season !== null && isSeasonMentioned(candidateTitles, season)) {
|
||||
score += 15;
|
||||
}
|
||||
|
||||
return { item, score };
|
||||
});
|
||||
|
||||
scored.sort((a, b) => {
|
||||
if (b.score !== a.score) return b.score - a.score;
|
||||
return b.item.id - a.item.id;
|
||||
});
|
||||
|
||||
const selected = scored[0]!;
|
||||
const selectedTitle =
|
||||
selected.item.title?.english ??
|
||||
selected.item.title?.romaji ??
|
||||
selected.item.title?.native ??
|
||||
title;
|
||||
return { id: selected.item.id, title: selectedTitle };
|
||||
}
|
||||
|
||||
function buildSearchCandidates(parsed: CoverArtCandidate): string[] {
|
||||
const candidateTitles = [
|
||||
...(parsed.source === 'guessit' && parsed.season !== null && parsed.season > 1
|
||||
? [`${parsed.title} Season ${parsed.season}`]
|
||||
: []),
|
||||
parsed.title,
|
||||
];
|
||||
return candidateTitles
|
||||
.map((title) => title.trim())
|
||||
.filter((title, index, all) => title.length > 0 && all.indexOf(title) === index);
|
||||
}
|
||||
|
||||
async function searchAnilist(
|
||||
rateLimiter: AnilistRateLimiter,
|
||||
title: string,
|
||||
): Promise<{ media: AnilistMedia[]; rateLimited: boolean }> {
|
||||
await rateLimiter.acquire();
|
||||
|
||||
const res = await fetch(ANILIST_GRAPHQL_URL, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
||||
body: JSON.stringify({ query: SEARCH_QUERY, variables: { search: title } }),
|
||||
});
|
||||
|
||||
rateLimiter.recordResponse(res.headers);
|
||||
|
||||
if (res.status === 429) {
|
||||
return { media: [], rateLimited: true };
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Anilist search failed: ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
const json = (await res.json()) as AnilistSearchResponse;
|
||||
const mediaList = json.data?.Page?.media;
|
||||
if (!mediaList || mediaList.length === 0) {
|
||||
return { media: [], rateLimited: false };
|
||||
}
|
||||
|
||||
return { media: mediaList, rateLimited: false };
|
||||
}
|
||||
|
||||
async function downloadImage(url: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) return null;
|
||||
const arrayBuf = await res.arrayBuffer();
|
||||
return Buffer.from(arrayBuf);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function createCoverArtFetcher(
|
||||
rateLimiter: AnilistRateLimiter,
|
||||
logger: Logger,
|
||||
options: CoverArtFetcherOptions = {},
|
||||
): CoverArtFetcher {
|
||||
const resolveCanonicalTitle = (
|
||||
db: DatabaseSync,
|
||||
videoId: number,
|
||||
fallbackTitle: string,
|
||||
): string => {
|
||||
const row = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT canonical_title AS canonicalTitle
|
||||
FROM imm_videos
|
||||
WHERE video_id = ?
|
||||
LIMIT 1
|
||||
`,
|
||||
)
|
||||
.get(videoId) as { canonicalTitle: string | null } | undefined;
|
||||
return row?.canonicalTitle?.trim() || fallbackTitle;
|
||||
};
|
||||
|
||||
const resolveMediaInfo = async (
|
||||
db: DatabaseSync,
|
||||
videoId: number,
|
||||
canonicalTitle: string,
|
||||
): Promise<CoverArtCandidate | null> => {
|
||||
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
|
||||
const parsed = await guessAnilistMediaInfo(null, effectiveTitle, {
|
||||
runGuessit: options.runGuessit ?? runGuessit,
|
||||
});
|
||||
if (!parsed) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
title: parsed.title,
|
||||
season: parsed.season,
|
||||
episode: parsed.episode,
|
||||
source: parsed.source,
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
async fetchIfMissing(db, videoId, canonicalTitle): Promise<boolean> {
|
||||
const existing = getCoverArt(db, videoId);
|
||||
if (existing?.coverBlob) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (existing?.coverUrl) {
|
||||
const coverBlob = await downloadImage(existing.coverUrl);
|
||||
if (coverBlob) {
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: existing.anilistId,
|
||||
coverUrl: existing.coverUrl,
|
||||
coverBlob,
|
||||
titleRomaji: existing.titleRomaji,
|
||||
titleEnglish: existing.titleEnglish,
|
||||
episodesTotal: existing.episodesTotal,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
existing &&
|
||||
existing.coverUrl === null &&
|
||||
existing.anilistId === null &&
|
||||
Date.now() - existing.fetchedAtMs < NO_MATCH_RETRY_MS
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const effectiveTitle = resolveCanonicalTitle(db, videoId, canonicalTitle);
|
||||
const cleaned = stripFilenameTags(effectiveTitle);
|
||||
if (!cleaned) {
|
||||
logger.warn('cover-art: empty title after stripping tags for videoId=%d', videoId);
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: null,
|
||||
coverUrl: null,
|
||||
coverBlob: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
episodesTotal: null,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
const parsedInfo = await resolveMediaInfo(db, videoId, canonicalTitle);
|
||||
const searchBase = parsedInfo?.title ?? cleaned;
|
||||
const searchCandidates = parsedInfo ? buildSearchCandidates(parsedInfo) : [cleaned];
|
||||
|
||||
const effectiveCandidates = searchCandidates.includes(cleaned)
|
||||
? searchCandidates
|
||||
: [...searchCandidates, cleaned];
|
||||
|
||||
let selected: AnilistMedia | null = null;
|
||||
let rateLimited = false;
|
||||
|
||||
for (const candidate of effectiveCandidates) {
|
||||
logger.info('cover-art: searching Anilist for "%s" (videoId=%d)', candidate, videoId);
|
||||
|
||||
try {
|
||||
const result = await searchAnilist(rateLimiter, candidate);
|
||||
rateLimited = result.rateLimited;
|
||||
if (result.media.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const picked = pickBestSearchResult(
|
||||
searchBase,
|
||||
parsedInfo?.episode ?? null,
|
||||
parsedInfo?.season ?? null,
|
||||
result.media,
|
||||
);
|
||||
if (picked) {
|
||||
const match = result.media.find((media) => media.id === picked.id);
|
||||
if (match) {
|
||||
selected = match;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error('cover-art: Anilist search error for "%s": %s', candidate, err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (rateLimited) {
|
||||
logger.warn('cover-art: rate-limited by Anilist, skipping videoId=%d', videoId);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!selected) {
|
||||
logger.info('cover-art: no Anilist results for "%s", caching no-match', searchBase);
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: null,
|
||||
coverUrl: null,
|
||||
coverBlob: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
episodesTotal: null,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
const coverUrl = selected.coverImage?.large ?? selected.coverImage?.medium ?? null;
|
||||
let coverBlob: Buffer | null = null;
|
||||
if (coverUrl) {
|
||||
coverBlob = await downloadImage(coverUrl);
|
||||
}
|
||||
|
||||
upsertCoverArt(db, videoId, {
|
||||
anilistId: selected.id,
|
||||
coverUrl,
|
||||
coverBlob,
|
||||
titleRomaji: selected.title?.romaji ?? null,
|
||||
titleEnglish: selected.title?.english ?? null,
|
||||
episodesTotal: selected.episodes ?? null,
|
||||
});
|
||||
|
||||
updateAnimeAnilistInfo(db, videoId, {
|
||||
anilistId: selected.id,
|
||||
titleRomaji: selected.title?.romaji ?? null,
|
||||
titleEnglish: selected.title?.english ?? null,
|
||||
titleNative: selected.title?.native ?? null,
|
||||
episodesTotal: selected.episodes ?? null,
|
||||
});
|
||||
|
||||
logger.info(
|
||||
'cover-art: cached art for videoId=%d anilistId=%d title="%s"',
|
||||
videoId,
|
||||
selected.id,
|
||||
selected.title?.romaji ?? searchBase,
|
||||
);
|
||||
|
||||
return true;
|
||||
},
|
||||
};
|
||||
}
|
||||
72
src/core/services/anilist/rate-limiter.ts
Normal file
72
src/core/services/anilist/rate-limiter.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
const DEFAULT_MAX_PER_MINUTE = 20;
|
||||
const WINDOW_MS = 60_000;
|
||||
const SAFETY_REMAINING_THRESHOLD = 5;
|
||||
|
||||
export interface AnilistRateLimiter {
|
||||
acquire(): Promise<void>;
|
||||
recordResponse(headers: Headers): void;
|
||||
}
|
||||
|
||||
export function createAnilistRateLimiter(
|
||||
maxPerMinute = DEFAULT_MAX_PER_MINUTE,
|
||||
): AnilistRateLimiter {
|
||||
const timestamps: number[] = [];
|
||||
let pauseUntilMs = 0;
|
||||
|
||||
function pruneOld(now: number): void {
|
||||
const cutoff = now - WINDOW_MS;
|
||||
while (timestamps.length > 0 && timestamps[0]! < cutoff) {
|
||||
timestamps.shift();
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
async acquire(): Promise<void> {
|
||||
const now = Date.now();
|
||||
|
||||
if (now < pauseUntilMs) {
|
||||
const waitMs = pauseUntilMs - now;
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
}
|
||||
|
||||
pruneOld(Date.now());
|
||||
|
||||
if (timestamps.length >= maxPerMinute) {
|
||||
const oldest = timestamps[0]!;
|
||||
const waitMs = oldest + WINDOW_MS - Date.now() + 100;
|
||||
if (waitMs > 0) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
}
|
||||
pruneOld(Date.now());
|
||||
}
|
||||
|
||||
timestamps.push(Date.now());
|
||||
},
|
||||
|
||||
recordResponse(headers: Headers): void {
|
||||
const remaining = headers.get('x-ratelimit-remaining');
|
||||
if (remaining !== null) {
|
||||
const n = parseInt(remaining, 10);
|
||||
if (Number.isFinite(n) && n < SAFETY_REMAINING_THRESHOLD) {
|
||||
const reset = headers.get('x-ratelimit-reset');
|
||||
if (reset) {
|
||||
const resetMs = parseInt(reset, 10) * 1000;
|
||||
if (Number.isFinite(resetMs)) {
|
||||
pauseUntilMs = Math.max(pauseUntilMs, resetMs);
|
||||
}
|
||||
} else {
|
||||
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + WINDOW_MS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const retryAfter = headers.get('retry-after');
|
||||
if (retryAfter) {
|
||||
const seconds = parseInt(retryAfter, 10);
|
||||
if (Number.isFinite(seconds) && seconds > 0) {
|
||||
pauseUntilMs = Math.max(pauseUntilMs, Date.now() + seconds * 1000);
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||
anilistSetup: false,
|
||||
anilistRetryQueue: false,
|
||||
dictionary: false,
|
||||
stats: false,
|
||||
jellyfin: false,
|
||||
jellyfinLogin: false,
|
||||
jellyfinLogout: false,
|
||||
|
||||
@@ -176,6 +176,22 @@ test('runAppReadyRuntime skips heavy startup when shouldSkipHeavyStartup returns
|
||||
assert.ok(calls.indexOf('handleFirstRunSetup') < calls.indexOf('handleInitialArgs'));
|
||||
});
|
||||
|
||||
test('runAppReadyRuntime uses minimal startup for texthooker-only mode', async () => {
|
||||
const { deps, calls } = makeDeps({
|
||||
texthookerOnlyMode: true,
|
||||
reloadConfig: () => calls.push('reloadConfig'),
|
||||
handleInitialArgs: () => calls.push('handleInitialArgs'),
|
||||
});
|
||||
|
||||
await runAppReadyRuntime(deps);
|
||||
|
||||
assert.deepEqual(calls, [
|
||||
'ensureDefaultConfigBootstrap',
|
||||
'reloadConfig',
|
||||
'handleInitialArgs',
|
||||
]);
|
||||
});
|
||||
|
||||
test('runAppReadyRuntime skips Jellyfin remote startup when dependency is not wired', async () => {
|
||||
const { deps, calls } = makeDeps({
|
||||
startJellyfinRemoteSession: undefined,
|
||||
|
||||
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||
anilistSetup: false,
|
||||
anilistRetryQueue: false,
|
||||
dictionary: false,
|
||||
stats: false,
|
||||
jellyfin: false,
|
||||
jellyfinLogin: false,
|
||||
jellyfinLogout: false,
|
||||
@@ -177,6 +178,9 @@ function createDeps(overrides: Partial<CliCommandServiceDeps> = {}) {
|
||||
mediaTitle: 'Test',
|
||||
entryCount: 10,
|
||||
}),
|
||||
runStatsCommand: async () => {
|
||||
calls.push('runStatsCommand');
|
||||
},
|
||||
runJellyfinCommand: async () => {
|
||||
calls.push('runJellyfinCommand');
|
||||
},
|
||||
@@ -249,6 +253,21 @@ test('handleCliCommand opens first-run setup window for --setup', () => {
|
||||
assert.equal(calls.includes('openYomitanSettingsDelayed:1000'), false);
|
||||
});
|
||||
|
||||
test('handleCliCommand dispatches stats command without overlay startup', async () => {
|
||||
const { deps, calls } = createDeps({
|
||||
runStatsCommand: async () => {
|
||||
calls.push('runStatsCommand');
|
||||
},
|
||||
});
|
||||
|
||||
handleCliCommand(makeArgs({ stats: true }), 'initial', deps);
|
||||
await Promise.resolve();
|
||||
|
||||
assert.ok(calls.includes('runStatsCommand'));
|
||||
assert.equal(calls.includes('initializeOverlayRuntime'), false);
|
||||
assert.equal(calls.includes('connectMpvClient'), false);
|
||||
});
|
||||
|
||||
test('handleCliCommand applies cli log level for second-instance commands', () => {
|
||||
const { deps, calls } = createDeps({
|
||||
setLogLevel: (level) => {
|
||||
@@ -520,8 +539,21 @@ test('handleCliCommand runs refresh-known-words command', () => {
|
||||
assert.ok(calls.includes('refreshKnownWords'));
|
||||
});
|
||||
|
||||
test('handleCliCommand stops app after headless initial refresh-known-words completes', async () => {
|
||||
const { deps, calls } = createDeps({
|
||||
hasMainWindow: () => false,
|
||||
});
|
||||
|
||||
handleCliCommand(makeArgs({ refreshKnownWords: true }), 'initial', deps);
|
||||
await new Promise((resolve) => setImmediate(resolve));
|
||||
|
||||
assert.ok(calls.includes('refreshKnownWords'));
|
||||
assert.ok(calls.includes('stopApp'));
|
||||
});
|
||||
|
||||
test('handleCliCommand reports async refresh-known-words errors to OSD', async () => {
|
||||
const { deps, calls, osd } = createDeps({
|
||||
hasMainWindow: () => false,
|
||||
refreshKnownWords: async () => {
|
||||
throw new Error('refresh boom');
|
||||
},
|
||||
@@ -532,4 +564,5 @@ test('handleCliCommand reports async refresh-known-words errors to OSD', async (
|
||||
|
||||
assert.ok(calls.some((value) => value.startsWith('error:refreshKnownWords failed:')));
|
||||
assert.ok(osd.some((value) => value.includes('Refresh known words failed: refresh boom')));
|
||||
assert.ok(calls.includes('stopApp'));
|
||||
});
|
||||
|
||||
@@ -61,6 +61,7 @@ export interface CliCommandServiceDeps {
|
||||
mediaTitle: string;
|
||||
entryCount: number;
|
||||
}>;
|
||||
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
|
||||
runJellyfinCommand: (args: CliArgs) => Promise<void>;
|
||||
printHelp: () => void;
|
||||
hasMainWindow: () => boolean;
|
||||
@@ -154,6 +155,7 @@ export interface CliCommandDepsRuntimeOptions {
|
||||
};
|
||||
jellyfin: {
|
||||
openSetup: () => void;
|
||||
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
|
||||
runCommand: (args: CliArgs) => Promise<void>;
|
||||
};
|
||||
ui: UiCliRuntime;
|
||||
@@ -222,6 +224,7 @@ export function createCliCommandDepsRuntime(
|
||||
getAnilistQueueStatus: options.anilist.getQueueStatus,
|
||||
retryAnilistQueue: options.anilist.retryQueueNow,
|
||||
generateCharacterDictionary: options.dictionary.generate,
|
||||
runStatsCommand: options.jellyfin.runStatsCommand,
|
||||
runJellyfinCommand: options.jellyfin.runCommand,
|
||||
printHelp: options.ui.printHelp,
|
||||
hasMainWindow: options.app.hasMainWindow,
|
||||
@@ -331,12 +334,18 @@ export function handleCliCommand(
|
||||
'Update failed',
|
||||
);
|
||||
} else if (args.refreshKnownWords) {
|
||||
runAsyncWithOsd(
|
||||
() => deps.refreshKnownWords(),
|
||||
deps,
|
||||
'refreshKnownWords',
|
||||
'Refresh known words failed',
|
||||
);
|
||||
const shouldStopAfterRun = source === 'initial' && !deps.hasMainWindow();
|
||||
deps
|
||||
.refreshKnownWords()
|
||||
.catch((err) => {
|
||||
deps.error('refreshKnownWords failed:', err);
|
||||
deps.showMpvOsd(`Refresh known words failed: ${(err as Error).message}`);
|
||||
})
|
||||
.finally(() => {
|
||||
if (shouldStopAfterRun) {
|
||||
deps.stopApp();
|
||||
}
|
||||
});
|
||||
} else if (args.toggleSecondarySub) {
|
||||
deps.cycleSecondarySubMode();
|
||||
} else if (args.triggerFieldGrouping) {
|
||||
@@ -410,6 +419,8 @@ export function handleCliCommand(
|
||||
deps.stopApp();
|
||||
}
|
||||
});
|
||||
} else if (args.stats) {
|
||||
void deps.runStatsCommand(args, source);
|
||||
} else if (args.anilistRetryQueue) {
|
||||
const queueStatus = deps.getAnilistQueueStatus();
|
||||
deps.log(
|
||||
|
||||
@@ -130,6 +130,56 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r
|
||||
assert.equal(lookup('高み'), 9933);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup uses leading display digits for displayValue strings', async () => {
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||
fs.writeFileSync(
|
||||
bankPath,
|
||||
JSON.stringify([
|
||||
['潜む', 1, { frequency: { value: 121, displayValue: '118,121' } }],
|
||||
['例', 2, { frequency: { value: 1234, displayValue: '1,234' } }],
|
||||
]),
|
||||
);
|
||||
|
||||
const lookup = await createFrequencyDictionaryLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(lookup('潜む'), 118);
|
||||
assert.equal(lookup('例'), 1);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup ignores occurrence-based Yomitan dictionaries', async () => {
|
||||
const logs: string[] = [];
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
fs.writeFileSync(
|
||||
path.join(tempDir, 'index.json'),
|
||||
JSON.stringify({
|
||||
title: 'CC100',
|
||||
revision: '1',
|
||||
frequencyMode: 'occurrence-based',
|
||||
}),
|
||||
);
|
||||
fs.writeFileSync(
|
||||
path.join(tempDir, 'term_meta_bank_1.json'),
|
||||
JSON.stringify([['潜む', 1, { frequency: { value: 118121 } }]]),
|
||||
);
|
||||
|
||||
const lookup = await createFrequencyDictionaryLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(lookup('潜む'), null);
|
||||
assert.equal(
|
||||
logs.some((entry) => entry.includes('occurrence-based') && entry.includes('CC100')),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => {
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||
|
||||
@@ -6,6 +6,8 @@ export interface FrequencyDictionaryLookupOptions {
|
||||
log: (message: string) => void;
|
||||
}
|
||||
|
||||
type FrequencyDictionaryMode = 'occurrence-based' | 'rank-based';
|
||||
|
||||
interface FrequencyDictionaryEntry {
|
||||
rank: number;
|
||||
term: string;
|
||||
@@ -29,30 +31,67 @@ function normalizeFrequencyTerm(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
async function readDictionaryMetadata(
|
||||
dictionaryPath: string,
|
||||
log: (message: string) => void,
|
||||
): Promise<{ title: string | null; frequencyMode: FrequencyDictionaryMode | null }> {
|
||||
const indexPath = path.join(dictionaryPath, 'index.json');
|
||||
let rawText: string;
|
||||
try {
|
||||
rawText = await fs.readFile(indexPath, 'utf-8');
|
||||
} catch (error) {
|
||||
if (isErrorCode(error, 'ENOENT')) {
|
||||
return { title: null, frequencyMode: null };
|
||||
}
|
||||
log(`Failed to read frequency dictionary index ${indexPath}: ${String(error)}`);
|
||||
return { title: null, frequencyMode: null };
|
||||
}
|
||||
|
||||
let rawIndex: unknown;
|
||||
try {
|
||||
rawIndex = JSON.parse(rawText) as unknown;
|
||||
} catch {
|
||||
log(`Failed to parse frequency dictionary index as JSON: ${indexPath}`);
|
||||
return { title: null, frequencyMode: null };
|
||||
}
|
||||
|
||||
if (!rawIndex || typeof rawIndex !== 'object') {
|
||||
return { title: null, frequencyMode: null };
|
||||
}
|
||||
|
||||
const titleRaw = (rawIndex as { title?: unknown }).title;
|
||||
const frequencyModeRaw = (rawIndex as { frequencyMode?: unknown }).frequencyMode;
|
||||
return {
|
||||
title: typeof titleRaw === 'string' && titleRaw.trim().length > 0 ? titleRaw.trim() : null,
|
||||
frequencyMode:
|
||||
frequencyModeRaw === 'occurrence-based' || frequencyModeRaw === 'rank-based'
|
||||
? frequencyModeRaw
|
||||
: null,
|
||||
};
|
||||
}
|
||||
|
||||
function parsePositiveFrequencyString(value: string): number | null {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
|
||||
if (!numericPrefix) {
|
||||
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
|
||||
if (!numericMatch) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const chunks = numericPrefix.split(',');
|
||||
const normalizedNumber =
|
||||
chunks.length <= 1
|
||||
? (chunks[0] ?? '')
|
||||
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
|
||||
? chunks.join('')
|
||||
: (chunks[0] ?? '');
|
||||
const parsed = Number.parseInt(normalizedNumber, 10);
|
||||
const parsed = Number.parseFloat(numericMatch);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
const normalized = Math.floor(parsed);
|
||||
if (!Number.isFinite(normalized) || normalized <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function parsePositiveFrequencyNumber(value: unknown): number | null {
|
||||
@@ -68,18 +107,32 @@ function parsePositiveFrequencyNumber(value: unknown): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseDisplayFrequencyNumber(value: unknown): number | null {
|
||||
if (typeof value === 'string') {
|
||||
const leadingDigits = value.trim().match(/^\d+/)?.[0];
|
||||
if (!leadingDigits) {
|
||||
return null;
|
||||
}
|
||||
const parsed = Number.parseInt(leadingDigits, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
||||
}
|
||||
|
||||
return parsePositiveFrequencyNumber(value);
|
||||
}
|
||||
|
||||
function extractFrequencyDisplayValue(meta: unknown): number | null {
|
||||
if (!meta || typeof meta !== 'object') return null;
|
||||
const frequency = (meta as { frequency?: unknown }).frequency;
|
||||
if (!frequency || typeof frequency !== 'object') return null;
|
||||
const rawValue = (frequency as { value?: unknown }).value;
|
||||
const parsedRawValue = parsePositiveFrequencyNumber(rawValue);
|
||||
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
|
||||
const parsedDisplayValue = parsePositiveFrequencyNumber(displayValue);
|
||||
const parsedDisplayValue = parseDisplayFrequencyNumber(displayValue);
|
||||
if (parsedDisplayValue !== null) {
|
||||
return parsedDisplayValue;
|
||||
}
|
||||
|
||||
const rawValue = (frequency as { value?: unknown }).value;
|
||||
return parsePositiveFrequencyNumber(rawValue);
|
||||
return parsedRawValue;
|
||||
}
|
||||
|
||||
function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null {
|
||||
@@ -141,6 +194,15 @@ async function collectDictionaryFromPath(
|
||||
log: (message: string) => void,
|
||||
): Promise<Map<string, number>> {
|
||||
const terms = new Map<string, number>();
|
||||
const metadata = await readDictionaryMetadata(dictionaryPath, log);
|
||||
if (metadata.frequencyMode === 'occurrence-based') {
|
||||
log(
|
||||
`Skipping occurrence-based frequency dictionary ${
|
||||
metadata.title ?? dictionaryPath
|
||||
}; SubMiner frequency tags require rank-based values.`,
|
||||
);
|
||||
return terms;
|
||||
}
|
||||
|
||||
let fileNames: string[];
|
||||
try {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
2681
src/core/services/immersion-tracker/__tests__/query.test.ts
Normal file
2681
src/core/services/immersion-tracker/__tests__/query.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
71
src/core/services/immersion-tracker/legacy-vocabulary-pos.ts
Normal file
71
src/core/services/immersion-tracker/legacy-vocabulary-pos.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import type { Token } from '../../../types';
|
||||
import type { LegacyVocabularyPosResolution } from './types';
|
||||
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
|
||||
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
|
||||
function normalizeLookupText(value: string | null | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
function katakanaToHiragana(text: string): string {
|
||||
let normalized = '';
|
||||
for (const char of text) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
|
||||
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
|
||||
continue;
|
||||
}
|
||||
normalized += char;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function toResolution(token: Token): LegacyVocabularyPosResolution {
|
||||
return {
|
||||
headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
|
||||
reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
|
||||
partOfSpeech: deriveStoredPartOfSpeech({
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
pos1: token.pos1,
|
||||
}),
|
||||
pos1: normalizeLookupText(token.pos1),
|
||||
pos2: normalizeLookupText(token.pos2),
|
||||
pos3: normalizeLookupText(token.pos3),
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveLegacyVocabularyPosFromTokens(
|
||||
lookupText: string,
|
||||
tokens: Token[] | null,
|
||||
): LegacyVocabularyPosResolution | null {
|
||||
const normalizedLookup = normalizeLookupText(lookupText);
|
||||
if (!normalizedLookup || !tokens || tokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const exactSurfaceMatches = tokens.filter(
|
||||
(token) => normalizeLookupText(token.word) === normalizedLookup,
|
||||
);
|
||||
if (exactSurfaceMatches.length === 1) {
|
||||
return toResolution(exactSurfaceMatches[0]!);
|
||||
}
|
||||
|
||||
const exactHeadwordMatches = tokens.filter(
|
||||
(token) => normalizeLookupText(token.headword) === normalizedLookup,
|
||||
);
|
||||
if (exactHeadwordMatches.length === 1) {
|
||||
return toResolution(exactHeadwordMatches[0]!);
|
||||
}
|
||||
|
||||
if (tokens.length === 1) {
|
||||
return toResolution(tokens[0]!);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
569
src/core/services/immersion-tracker/lifetime.ts
Normal file
569
src/core/services/immersion-tracker/lifetime.ts
Normal file
@@ -0,0 +1,569 @@
|
||||
import type { DatabaseSync } from './sqlite';
|
||||
import { finalizeSessionRecord } from './session';
|
||||
import type { LifetimeRebuildSummary, SessionState } from './types';
|
||||
|
||||
interface TelemetryRow {
|
||||
active_watched_ms: number | null;
|
||||
cards_mined: number | null;
|
||||
lines_seen: number | null;
|
||||
tokens_seen: number | null;
|
||||
}
|
||||
|
||||
interface VideoRow {
|
||||
anime_id: number | null;
|
||||
watched: number;
|
||||
}
|
||||
|
||||
interface AnimeRow {
|
||||
episodes_total: number | null;
|
||||
}
|
||||
|
||||
function asPositiveNumber(value: number | null, fallback: number): number {
|
||||
if (value === null || !Number.isFinite(value)) {
|
||||
return fallback;
|
||||
}
|
||||
return Math.max(0, Math.floor(value));
|
||||
}
|
||||
|
||||
interface ExistenceRow {
|
||||
count: number;
|
||||
}
|
||||
|
||||
interface LifetimeMediaStateRow {
|
||||
completed: number;
|
||||
}
|
||||
|
||||
interface LifetimeAnimeStateRow {
|
||||
episodes_completed: number;
|
||||
}
|
||||
|
||||
interface RetainedSessionRow {
|
||||
sessionId: number;
|
||||
videoId: number;
|
||||
startedAtMs: number;
|
||||
endedAtMs: number;
|
||||
lastMediaMs: number | null;
|
||||
totalWatchedMs: number;
|
||||
activeWatchedMs: number;
|
||||
linesSeen: number;
|
||||
tokensSeen: number;
|
||||
cardsMined: number;
|
||||
lookupCount: number;
|
||||
lookupHits: number;
|
||||
yomitanLookupCount: number;
|
||||
pauseCount: number;
|
||||
pauseMs: number;
|
||||
seekForwardCount: number;
|
||||
seekBackwardCount: number;
|
||||
mediaBufferEvents: number;
|
||||
}
|
||||
|
||||
function hasRetainedPriorSession(
|
||||
db: DatabaseSync,
|
||||
videoId: number,
|
||||
startedAtMs: number,
|
||||
currentSessionId: number,
|
||||
): boolean {
|
||||
return (
|
||||
Number(
|
||||
(
|
||||
db
|
||||
.prepare(
|
||||
`
|
||||
SELECT COUNT(*) AS count
|
||||
FROM imm_sessions
|
||||
WHERE video_id = ?
|
||||
AND (
|
||||
started_at_ms < ?
|
||||
OR (started_at_ms = ? AND session_id < ?)
|
||||
)
|
||||
`,
|
||||
)
|
||||
.get(videoId, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
|
||||
)?.count ?? 0,
|
||||
) > 0
|
||||
);
|
||||
}
|
||||
|
||||
function isFirstSessionForLocalDay(
|
||||
db: DatabaseSync,
|
||||
currentSessionId: number,
|
||||
startedAtMs: number,
|
||||
): boolean {
|
||||
return (
|
||||
(
|
||||
db
|
||||
.prepare(
|
||||
`
|
||||
SELECT COUNT(*) AS count
|
||||
FROM imm_sessions
|
||||
WHERE CAST(strftime('%s', started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
|
||||
= CAST(strftime('%s', ? / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
|
||||
AND (
|
||||
started_at_ms < ?
|
||||
OR (started_at_ms = ? AND session_id < ?)
|
||||
)
|
||||
`,
|
||||
)
|
||||
.get(startedAtMs, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
|
||||
)?.count === 0
|
||||
);
|
||||
}
|
||||
|
||||
function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
|
||||
db.exec(`
|
||||
DELETE FROM imm_lifetime_anime;
|
||||
DELETE FROM imm_lifetime_media;
|
||||
DELETE FROM imm_lifetime_applied_sessions;
|
||||
`);
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_lifetime_global
|
||||
SET
|
||||
total_sessions = 0,
|
||||
total_active_ms = 0,
|
||||
total_cards = 0,
|
||||
active_days = 0,
|
||||
episodes_started = 0,
|
||||
episodes_completed = 0,
|
||||
anime_completed = 0,
|
||||
last_rebuilt_ms = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE global_id = 1
|
||||
`,
|
||||
).run(nowMs, nowMs);
|
||||
}
|
||||
|
||||
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
|
||||
return {
|
||||
sessionId: row.sessionId,
|
||||
videoId: row.videoId,
|
||||
startedAtMs: row.startedAtMs,
|
||||
currentLineIndex: 0,
|
||||
lastWallClockMs: row.endedAtMs,
|
||||
lastMediaMs: row.lastMediaMs,
|
||||
lastPauseStartMs: null,
|
||||
isPaused: false,
|
||||
pendingTelemetry: false,
|
||||
markedWatched: false,
|
||||
totalWatchedMs: Math.max(0, row.totalWatchedMs),
|
||||
activeWatchedMs: Math.max(0, row.activeWatchedMs),
|
||||
linesSeen: Math.max(0, row.linesSeen),
|
||||
tokensSeen: Math.max(0, row.tokensSeen),
|
||||
cardsMined: Math.max(0, row.cardsMined),
|
||||
lookupCount: Math.max(0, row.lookupCount),
|
||||
lookupHits: Math.max(0, row.lookupHits),
|
||||
yomitanLookupCount: Math.max(0, row.yomitanLookupCount),
|
||||
pauseCount: Math.max(0, row.pauseCount),
|
||||
pauseMs: Math.max(0, row.pauseMs),
|
||||
seekForwardCount: Math.max(0, row.seekForwardCount),
|
||||
seekBackwardCount: Math.max(0, row.seekBackwardCount),
|
||||
mediaBufferEvents: Math.max(0, row.mediaBufferEvents),
|
||||
};
|
||||
}
|
||||
|
||||
function getRetainedStaleActiveSessions(db: DatabaseSync): RetainedSessionRow[] {
|
||||
return db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
s.session_id AS sessionId,
|
||||
s.video_id AS videoId,
|
||||
s.started_at_ms AS startedAtMs,
|
||||
COALESCE(t.sample_ms, s.LAST_UPDATE_DATE, s.started_at_ms) AS endedAtMs,
|
||||
s.ended_media_ms AS lastMediaMs,
|
||||
COALESCE(t.total_watched_ms, s.total_watched_ms, 0) AS totalWatchedMs,
|
||||
COALESCE(t.active_watched_ms, s.active_watched_ms, 0) AS activeWatchedMs,
|
||||
COALESCE(t.lines_seen, s.lines_seen, 0) AS linesSeen,
|
||||
COALESCE(t.tokens_seen, s.tokens_seen, 0) AS tokensSeen,
|
||||
COALESCE(t.cards_mined, s.cards_mined, 0) AS cardsMined,
|
||||
COALESCE(t.lookup_count, s.lookup_count, 0) AS lookupCount,
|
||||
COALESCE(t.lookup_hits, s.lookup_hits, 0) AS lookupHits,
|
||||
COALESCE(t.yomitan_lookup_count, s.yomitan_lookup_count, 0) AS yomitanLookupCount,
|
||||
COALESCE(t.pause_count, s.pause_count, 0) AS pauseCount,
|
||||
COALESCE(t.pause_ms, s.pause_ms, 0) AS pauseMs,
|
||||
COALESCE(t.seek_forward_count, s.seek_forward_count, 0) AS seekForwardCount,
|
||||
COALESCE(t.seek_backward_count, s.seek_backward_count, 0) AS seekBackwardCount,
|
||||
COALESCE(t.media_buffer_events, s.media_buffer_events, 0) AS mediaBufferEvents
|
||||
FROM imm_sessions s
|
||||
LEFT JOIN imm_session_telemetry t
|
||||
ON t.telemetry_id = (
|
||||
SELECT telemetry_id
|
||||
FROM imm_session_telemetry
|
||||
WHERE session_id = s.session_id
|
||||
ORDER BY sample_ms DESC, telemetry_id DESC
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE s.ended_at_ms IS NULL
|
||||
ORDER BY s.started_at_ms ASC, s.session_id ASC
|
||||
`,
|
||||
)
|
||||
.all() as RetainedSessionRow[];
|
||||
}
|
||||
|
||||
function upsertLifetimeMedia(
|
||||
db: DatabaseSync,
|
||||
videoId: number,
|
||||
nowMs: number,
|
||||
activeMs: number,
|
||||
cardsMined: number,
|
||||
linesSeen: number,
|
||||
tokensSeen: number,
|
||||
completed: number,
|
||||
startedAtMs: number,
|
||||
endedAtMs: number,
|
||||
): void {
|
||||
db.prepare(
|
||||
`
|
||||
INSERT INTO imm_lifetime_media(
|
||||
video_id,
|
||||
total_sessions,
|
||||
total_active_ms,
|
||||
total_cards,
|
||||
total_lines_seen,
|
||||
total_tokens_seen,
|
||||
completed,
|
||||
first_watched_ms,
|
||||
last_watched_ms,
|
||||
CREATED_DATE,
|
||||
LAST_UPDATE_DATE
|
||||
)
|
||||
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(video_id) DO UPDATE SET
|
||||
total_sessions = total_sessions + 1,
|
||||
total_active_ms = total_active_ms + excluded.total_active_ms,
|
||||
total_cards = total_cards + excluded.total_cards,
|
||||
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
|
||||
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
|
||||
completed = MAX(completed, excluded.completed),
|
||||
first_watched_ms = CASE
|
||||
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
|
||||
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
|
||||
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
|
||||
ELSE first_watched_ms
|
||||
END,
|
||||
last_watched_ms = CASE
|
||||
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
|
||||
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
|
||||
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
|
||||
ELSE last_watched_ms
|
||||
END,
|
||||
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
|
||||
`,
|
||||
).run(
|
||||
videoId,
|
||||
activeMs,
|
||||
cardsMined,
|
||||
linesSeen,
|
||||
tokensSeen,
|
||||
completed,
|
||||
startedAtMs,
|
||||
endedAtMs,
|
||||
nowMs,
|
||||
nowMs,
|
||||
);
|
||||
}
|
||||
|
||||
function upsertLifetimeAnime(
|
||||
db: DatabaseSync,
|
||||
animeId: number,
|
||||
nowMs: number,
|
||||
activeMs: number,
|
||||
cardsMined: number,
|
||||
linesSeen: number,
|
||||
tokensSeen: number,
|
||||
episodesStartedDelta: number,
|
||||
episodesCompletedDelta: number,
|
||||
startedAtMs: number,
|
||||
endedAtMs: number,
|
||||
): void {
|
||||
db.prepare(
|
||||
`
|
||||
INSERT INTO imm_lifetime_anime(
|
||||
anime_id,
|
||||
total_sessions,
|
||||
total_active_ms,
|
||||
total_cards,
|
||||
total_lines_seen,
|
||||
total_tokens_seen,
|
||||
episodes_started,
|
||||
episodes_completed,
|
||||
first_watched_ms,
|
||||
last_watched_ms,
|
||||
CREATED_DATE,
|
||||
LAST_UPDATE_DATE
|
||||
)
|
||||
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(anime_id) DO UPDATE SET
|
||||
total_sessions = total_sessions + 1,
|
||||
total_active_ms = total_active_ms + excluded.total_active_ms,
|
||||
total_cards = total_cards + excluded.total_cards,
|
||||
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
|
||||
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
|
||||
episodes_started = episodes_started + excluded.episodes_started,
|
||||
episodes_completed = episodes_completed + excluded.episodes_completed,
|
||||
first_watched_ms = CASE
|
||||
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
|
||||
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
|
||||
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
|
||||
ELSE first_watched_ms
|
||||
END,
|
||||
last_watched_ms = CASE
|
||||
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
|
||||
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
|
||||
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
|
||||
ELSE last_watched_ms
|
||||
END,
|
||||
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
|
||||
`,
|
||||
).run(
|
||||
animeId,
|
||||
activeMs,
|
||||
cardsMined,
|
||||
linesSeen,
|
||||
tokensSeen,
|
||||
episodesStartedDelta,
|
||||
episodesCompletedDelta,
|
||||
startedAtMs,
|
||||
endedAtMs,
|
||||
nowMs,
|
||||
nowMs,
|
||||
);
|
||||
}
|
||||
|
||||
export function applySessionLifetimeSummary(
|
||||
db: DatabaseSync,
|
||||
session: SessionState,
|
||||
endedAtMs: number,
|
||||
): void {
|
||||
const applyResult = db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO imm_lifetime_applied_sessions (
|
||||
session_id,
|
||||
applied_at_ms,
|
||||
CREATED_DATE,
|
||||
LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
?, ?, ?, ?
|
||||
)
|
||||
ON CONFLICT(session_id) DO NOTHING
|
||||
`,
|
||||
)
|
||||
.run(session.sessionId, endedAtMs, Date.now(), Date.now());
|
||||
|
||||
if ((applyResult.changes ?? 0) <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const telemetry = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
active_watched_ms,
|
||||
cards_mined,
|
||||
lines_seen,
|
||||
tokens_seen
|
||||
FROM imm_session_telemetry
|
||||
WHERE session_id = ?
|
||||
ORDER BY sample_ms DESC, telemetry_id DESC
|
||||
LIMIT 1
|
||||
`,
|
||||
)
|
||||
.get(session.sessionId) as TelemetryRow | null;
|
||||
|
||||
const video = db
|
||||
.prepare('SELECT anime_id, watched FROM imm_videos WHERE video_id = ?')
|
||||
.get(session.videoId) as VideoRow | null;
|
||||
const mediaLifetime =
|
||||
(db
|
||||
.prepare('SELECT completed FROM imm_lifetime_media WHERE video_id = ?')
|
||||
.get(session.videoId) as LifetimeMediaStateRow | null | undefined) ?? null;
|
||||
const animeLifetime = video?.anime_id
|
||||
? ((db
|
||||
.prepare('SELECT episodes_completed FROM imm_lifetime_anime WHERE anime_id = ?')
|
||||
.get(video.anime_id) as LifetimeAnimeStateRow | null | undefined) ?? null)
|
||||
: null;
|
||||
const anime = video?.anime_id
|
||||
? ((db
|
||||
.prepare('SELECT episodes_total FROM imm_anime WHERE anime_id = ?')
|
||||
.get(video.anime_id) as AnimeRow | null | undefined) ?? null)
|
||||
: null;
|
||||
|
||||
const activeMs = telemetry
|
||||
? asPositiveNumber(telemetry.active_watched_ms, session.activeWatchedMs)
|
||||
: session.activeWatchedMs;
|
||||
const cardsMined = telemetry
|
||||
? asPositiveNumber(telemetry.cards_mined, session.cardsMined)
|
||||
: session.cardsMined;
|
||||
const linesSeen = telemetry
|
||||
? asPositiveNumber(telemetry.lines_seen, session.linesSeen)
|
||||
: session.linesSeen;
|
||||
const tokensSeen = telemetry
|
||||
? asPositiveNumber(telemetry.tokens_seen, session.tokensSeen)
|
||||
: session.tokensSeen;
|
||||
const watched = video?.watched ?? 0;
|
||||
const isFirstSessionForVideoRun =
|
||||
mediaLifetime === null &&
|
||||
!hasRetainedPriorSession(db, session.videoId, session.startedAtMs, session.sessionId);
|
||||
const isFirstCompletedSessionForVideoRun =
|
||||
watched > 0 && Number(mediaLifetime?.completed ?? 0) <= 0;
|
||||
const isFirstSessionForDay = isFirstSessionForLocalDay(
|
||||
db,
|
||||
session.sessionId,
|
||||
session.startedAtMs,
|
||||
);
|
||||
const episodesCompletedBefore = Number(animeLifetime?.episodes_completed ?? 0);
|
||||
const animeEpisodesTotal = anime?.episodes_total ?? null;
|
||||
const animeCompletedDelta =
|
||||
watched > 0 &&
|
||||
isFirstCompletedSessionForVideoRun &&
|
||||
animeEpisodesTotal !== null &&
|
||||
animeEpisodesTotal > 0 &&
|
||||
episodesCompletedBefore < animeEpisodesTotal &&
|
||||
episodesCompletedBefore + 1 >= animeEpisodesTotal
|
||||
? 1
|
||||
: 0;
|
||||
|
||||
const nowMs = Date.now();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_lifetime_global
|
||||
SET
|
||||
total_sessions = total_sessions + 1,
|
||||
total_active_ms = total_active_ms + ?,
|
||||
total_cards = total_cards + ?,
|
||||
active_days = active_days + ?,
|
||||
episodes_started = episodes_started + ?,
|
||||
episodes_completed = episodes_completed + ?,
|
||||
anime_completed = anime_completed + ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE global_id = 1
|
||||
`,
|
||||
).run(
|
||||
activeMs,
|
||||
cardsMined,
|
||||
isFirstSessionForDay ? 1 : 0,
|
||||
isFirstSessionForVideoRun ? 1 : 0,
|
||||
isFirstCompletedSessionForVideoRun ? 1 : 0,
|
||||
animeCompletedDelta,
|
||||
nowMs,
|
||||
);
|
||||
|
||||
upsertLifetimeMedia(
|
||||
db,
|
||||
session.videoId,
|
||||
nowMs,
|
||||
activeMs,
|
||||
cardsMined,
|
||||
linesSeen,
|
||||
tokensSeen,
|
||||
watched > 0 ? 1 : 0,
|
||||
session.startedAtMs,
|
||||
endedAtMs,
|
||||
);
|
||||
|
||||
if (video?.anime_id) {
|
||||
upsertLifetimeAnime(
|
||||
db,
|
||||
video.anime_id,
|
||||
nowMs,
|
||||
activeMs,
|
||||
cardsMined,
|
||||
linesSeen,
|
||||
tokensSeen,
|
||||
isFirstSessionForVideoRun ? 1 : 0,
|
||||
isFirstCompletedSessionForVideoRun ? 1 : 0,
|
||||
session.startedAtMs,
|
||||
endedAtMs,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
|
||||
const rebuiltAtMs = Date.now();
|
||||
const sessions = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
session_id AS sessionId,
|
||||
video_id AS videoId,
|
||||
started_at_ms AS startedAtMs,
|
||||
ended_at_ms AS endedAtMs,
|
||||
total_watched_ms AS totalWatchedMs,
|
||||
active_watched_ms AS activeWatchedMs,
|
||||
lines_seen AS linesSeen,
|
||||
tokens_seen AS tokensSeen,
|
||||
cards_mined AS cardsMined,
|
||||
lookup_count AS lookupCount,
|
||||
lookup_hits AS lookupHits,
|
||||
yomitan_lookup_count AS yomitanLookupCount,
|
||||
pause_count AS pauseCount,
|
||||
pause_ms AS pauseMs,
|
||||
seek_forward_count AS seekForwardCount,
|
||||
seek_backward_count AS seekBackwardCount,
|
||||
media_buffer_events AS mediaBufferEvents
|
||||
FROM imm_sessions
|
||||
WHERE ended_at_ms IS NOT NULL
|
||||
ORDER BY started_at_ms ASC, session_id ASC
|
||||
`,
|
||||
)
|
||||
.all() as RetainedSessionRow[];
|
||||
|
||||
db.exec('BEGIN');
|
||||
try {
|
||||
resetLifetimeSummaries(db, rebuiltAtMs);
|
||||
for (const session of sessions) {
|
||||
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
|
||||
}
|
||||
db.exec('COMMIT');
|
||||
} catch (error) {
|
||||
db.exec('ROLLBACK');
|
||||
throw error;
|
||||
}
|
||||
|
||||
return {
|
||||
appliedSessions: sessions.length,
|
||||
rebuiltAtMs,
|
||||
};
|
||||
}
|
||||
|
||||
export function reconcileStaleActiveSessions(db: DatabaseSync): number {
|
||||
const sessions = getRetainedStaleActiveSessions(db);
|
||||
if (sessions.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
db.exec('BEGIN');
|
||||
try {
|
||||
for (const session of sessions) {
|
||||
const state = toRebuildSessionState(session);
|
||||
finalizeSessionRecord(db, state, session.endedAtMs);
|
||||
applySessionLifetimeSummary(db, state, session.endedAtMs);
|
||||
}
|
||||
db.exec('COMMIT');
|
||||
} catch (error) {
|
||||
db.exec('ROLLBACK');
|
||||
throw error;
|
||||
}
|
||||
|
||||
return sessions.length;
|
||||
}
|
||||
|
||||
export function shouldBackfillLifetimeSummaries(db: DatabaseSync): boolean {
|
||||
const globalRow = db
|
||||
.prepare('SELECT total_sessions AS totalSessions FROM imm_lifetime_global WHERE global_id = 1')
|
||||
.get() as { totalSessions: number } | null;
|
||||
const appliedRow = db
|
||||
.prepare('SELECT COUNT(*) AS count FROM imm_lifetime_applied_sessions')
|
||||
.get() as ExistenceRow | null;
|
||||
const endedRow = db
|
||||
.prepare('SELECT COUNT(*) AS count FROM imm_sessions WHERE ended_at_ms IS NOT NULL')
|
||||
.get() as ExistenceRow | null;
|
||||
|
||||
const totalSessions = Number(globalRow?.totalSessions ?? 0);
|
||||
const appliedSessions = Number(appliedRow?.count ?? 0);
|
||||
const retainedEndedSessions = Number(endedRow?.count ?? 0);
|
||||
|
||||
return retainedEndedSessions > 0 && (appliedSessions === 0 || totalSessions === 0);
|
||||
}
|
||||
200
src/core/services/immersion-tracker/maintenance.test.ts
Normal file
200
src/core/services/immersion-tracker/maintenance.test.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { Database } from './sqlite';
|
||||
import {
|
||||
pruneRawRetention,
|
||||
pruneRollupRetention,
|
||||
runOptimizeMaintenance,
|
||||
toMonthKey,
|
||||
} from './maintenance';
|
||||
import { ensureSchema } from './storage';
|
||||
|
||||
function makeDbPath(): string {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-maintenance-test-'));
|
||||
return path.join(dir, 'tracker.db');
|
||||
}
|
||||
|
||||
function cleanupDbPath(dbPath: string): void {
|
||||
try {
|
||||
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
}
|
||||
|
||||
test('pruneRawRetention uses session retention separately from telemetry retention', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const nowMs = 90 * 86_400_000;
|
||||
const staleEndedAtMs = nowMs - 40 * 86_400_000;
|
||||
const keptEndedAtMs = nowMs - 5 * 86_400_000;
|
||||
|
||||
db.exec(`
|
||||
INSERT INTO imm_videos (
|
||||
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
|
||||
);
|
||||
INSERT INTO imm_sessions (
|
||||
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES
|
||||
(1, 'session-1', 1, ${staleEndedAtMs - 1_000}, ${staleEndedAtMs}, 2, ${staleEndedAtMs}, ${staleEndedAtMs}),
|
||||
(2, 'session-2', 1, ${keptEndedAtMs - 1_000}, ${keptEndedAtMs}, 2, ${keptEndedAtMs}, ${keptEndedAtMs});
|
||||
INSERT INTO imm_session_telemetry (
|
||||
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES
|
||||
(1, ${nowMs - 2 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}),
|
||||
(2, ${nowMs - 12 * 60 * 60 * 1000}, 0, 0, ${nowMs}, ${nowMs});
|
||||
`);
|
||||
|
||||
const result = pruneRawRetention(db, nowMs, {
|
||||
eventsRetentionMs: 7 * 86_400_000,
|
||||
telemetryRetentionMs: 1 * 86_400_000,
|
||||
sessionsRetentionMs: 30 * 86_400_000,
|
||||
});
|
||||
|
||||
const remainingSessions = db
|
||||
.prepare('SELECT session_id FROM imm_sessions ORDER BY session_id')
|
||||
.all() as Array<{ session_id: number }>;
|
||||
const remainingTelemetry = db
|
||||
.prepare('SELECT session_id FROM imm_session_telemetry ORDER BY session_id')
|
||||
.all() as Array<{ session_id: number }>;
|
||||
|
||||
assert.equal(result.deletedTelemetryRows, 1);
|
||||
assert.equal(result.deletedEndedSessions, 1);
|
||||
assert.deepEqual(
|
||||
remainingSessions.map((row) => row.session_id),
|
||||
[2],
|
||||
);
|
||||
assert.deepEqual(
|
||||
remainingTelemetry.map((row) => row.session_id),
|
||||
[2],
|
||||
);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('raw retention keeps rollups and rollup retention prunes them separately', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const nowMs = Date.UTC(2026, 2, 16, 12, 0, 0, 0);
|
||||
const oldDay = Math.floor((nowMs - 90 * 86_400_000) / 86_400_000);
|
||||
const oldMonth = toMonthKey(nowMs - 400 * 86_400_000);
|
||||
|
||||
db.exec(`
|
||||
INSERT INTO imm_videos (
|
||||
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
|
||||
);
|
||||
INSERT INTO imm_sessions (
|
||||
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
1, 'session-1', 1, ${nowMs - 90 * 86_400_000}, ${nowMs - 90 * 86_400_000 + 1_000}, 2, ${nowMs}, ${nowMs}
|
||||
);
|
||||
INSERT INTO imm_session_telemetry (
|
||||
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
1, ${nowMs - 90 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}
|
||||
);
|
||||
INSERT INTO imm_daily_rollups (
|
||||
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_tokens_seen, total_cards
|
||||
) VALUES (
|
||||
${oldDay}, 1, 1, 10, 1, 1, 1
|
||||
);
|
||||
INSERT INTO imm_monthly_rollups (
|
||||
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
${oldMonth}, 1, 1, 10, 1, 1, 1, ${nowMs}, ${nowMs}
|
||||
);
|
||||
`);
|
||||
|
||||
pruneRawRetention(db, nowMs, {
|
||||
eventsRetentionMs: 7 * 86_400_000,
|
||||
telemetryRetentionMs: 30 * 86_400_000,
|
||||
sessionsRetentionMs: 30 * 86_400_000,
|
||||
});
|
||||
|
||||
const rollupsAfterRawPrune = db
|
||||
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
|
||||
.get() as { total: number } | null;
|
||||
const monthlyAfterRawPrune = db
|
||||
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
|
||||
.get() as { total: number } | null;
|
||||
|
||||
assert.equal(rollupsAfterRawPrune?.total, 1);
|
||||
assert.equal(monthlyAfterRawPrune?.total, 1);
|
||||
|
||||
const rollupPrune = pruneRollupRetention(db, nowMs, {
|
||||
dailyRollupRetentionMs: 30 * 86_400_000,
|
||||
monthlyRollupRetentionMs: 365 * 86_400_000,
|
||||
});
|
||||
|
||||
const rollupsAfterRollupPrune = db
|
||||
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
|
||||
.get() as { total: number } | null;
|
||||
const monthlyAfterRollupPrune = db
|
||||
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
|
||||
.get() as { total: number } | null;
|
||||
|
||||
assert.equal(rollupPrune.deletedDailyRows, 1);
|
||||
assert.equal(rollupPrune.deletedMonthlyRows, 1);
|
||||
assert.equal(rollupsAfterRollupPrune?.total, 0);
|
||||
assert.equal(monthlyAfterRollupPrune?.total, 0);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema adds sample_ms index for telemetry rollup scans', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const indexes = db.prepare("PRAGMA index_list('imm_session_telemetry')").all() as Array<{
|
||||
name: string;
|
||||
}>;
|
||||
const hasSampleMsIndex = indexes.some((row) => row.name === 'idx_telemetry_sample_ms');
|
||||
assert.equal(hasSampleMsIndex, true);
|
||||
|
||||
const indexColumns = db.prepare("PRAGMA index_info('idx_telemetry_sample_ms')").all() as Array<{
|
||||
name: string;
|
||||
}>;
|
||||
assert.deepEqual(
|
||||
indexColumns.map((column) => column.name),
|
||||
['sample_ms'],
|
||||
);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('runOptimizeMaintenance executes PRAGMA optimize', () => {
|
||||
const executedSql: string[] = [];
|
||||
const db = {
|
||||
exec(source: string) {
|
||||
executedSql.push(source);
|
||||
return this;
|
||||
},
|
||||
} as unknown as Parameters<typeof runOptimizeMaintenance>[0];
|
||||
|
||||
runOptimizeMaintenance(db);
|
||||
|
||||
assert.deepEqual(executedSql, ['PRAGMA optimize']);
|
||||
});
|
||||
@@ -18,11 +18,9 @@ interface RollupTelemetryResult {
|
||||
maxSampleMs: number | null;
|
||||
}
|
||||
|
||||
interface RetentionResult {
|
||||
interface RawRetentionResult {
|
||||
deletedSessionEvents: number;
|
||||
deletedTelemetryRows: number;
|
||||
deletedDailyRows: number;
|
||||
deletedMonthlyRows: number;
|
||||
deletedEndedSessions: number;
|
||||
}
|
||||
|
||||
@@ -31,20 +29,18 @@ export function toMonthKey(timestampMs: number): number {
|
||||
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
|
||||
}
|
||||
|
||||
export function pruneRetention(
|
||||
export function pruneRawRetention(
|
||||
db: DatabaseSync,
|
||||
nowMs: number,
|
||||
policy: {
|
||||
eventsRetentionMs: number;
|
||||
telemetryRetentionMs: number;
|
||||
dailyRollupRetentionMs: number;
|
||||
monthlyRollupRetentionMs: number;
|
||||
sessionsRetentionMs: number;
|
||||
},
|
||||
): RetentionResult {
|
||||
): RawRetentionResult {
|
||||
const eventCutoff = nowMs - policy.eventsRetentionMs;
|
||||
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
|
||||
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
|
||||
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
|
||||
const sessionsCutoff = nowMs - policy.sessionsRetentionMs;
|
||||
|
||||
const deletedSessionEvents = (
|
||||
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
|
||||
@@ -56,28 +52,49 @@ export function pruneRetention(
|
||||
changes: number;
|
||||
}
|
||||
).changes;
|
||||
const deletedDailyRows = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
|
||||
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
|
||||
).changes;
|
||||
const deletedMonthlyRows = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
|
||||
.run(toMonthKey(monthCutoff)) as { changes: number }
|
||||
).changes;
|
||||
const deletedEndedSessions = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
|
||||
.run(telemetryCutoff) as { changes: number }
|
||||
.run(sessionsCutoff) as { changes: number }
|
||||
).changes;
|
||||
|
||||
return {
|
||||
deletedSessionEvents,
|
||||
deletedTelemetryRows,
|
||||
deletedEndedSessions,
|
||||
};
|
||||
}
|
||||
|
||||
export function pruneRollupRetention(
|
||||
db: DatabaseSync,
|
||||
nowMs: number,
|
||||
policy: {
|
||||
dailyRollupRetentionMs: number;
|
||||
monthlyRollupRetentionMs: number;
|
||||
},
|
||||
): { deletedDailyRows: number; deletedMonthlyRows: number } {
|
||||
const deletedDailyRows = Number.isFinite(policy.dailyRollupRetentionMs)
|
||||
? (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
|
||||
.run(Math.floor((nowMs - policy.dailyRollupRetentionMs) / DAILY_MS)) as {
|
||||
changes: number;
|
||||
}
|
||||
).changes
|
||||
: 0;
|
||||
const deletedMonthlyRows = Number.isFinite(policy.monthlyRollupRetentionMs)
|
||||
? (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
|
||||
.run(toMonthKey(nowMs - policy.monthlyRollupRetentionMs)) as {
|
||||
changes: number;
|
||||
}
|
||||
).changes
|
||||
: 0;
|
||||
|
||||
return {
|
||||
deletedDailyRows,
|
||||
deletedMonthlyRows,
|
||||
deletedEndedSessions,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -108,49 +125,57 @@ function upsertDailyRollupsForGroups(
|
||||
const upsertStmt = db.prepare(`
|
||||
INSERT INTO imm_daily_rollups (
|
||||
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
|
||||
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
|
||||
total_tokens_seen, total_cards, cards_per_hour,
|
||||
tokens_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
|
||||
s.video_id AS video_id,
|
||||
COUNT(DISTINCT s.session_id) AS total_sessions,
|
||||
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
|
||||
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
|
||||
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
|
||||
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
|
||||
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
|
||||
THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
|
||||
ELSE NULL
|
||||
END AS cards_per_hour,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
|
||||
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
|
||||
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
|
||||
THEN COALESCE(SUM(sm.max_tokens), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
|
||||
ELSE NULL
|
||||
END AS words_per_min,
|
||||
END AS tokens_per_min,
|
||||
CASE
|
||||
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
|
||||
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
|
||||
WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
|
||||
THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
|
||||
ELSE NULL
|
||||
END AS lookup_hit_rate,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
|
||||
JOIN (
|
||||
SELECT
|
||||
t.session_id,
|
||||
MAX(t.active_watched_ms) AS max_active_ms,
|
||||
MAX(t.lines_seen) AS max_lines,
|
||||
MAX(t.tokens_seen) AS max_tokens,
|
||||
MAX(t.cards_mined) AS max_cards,
|
||||
MAX(t.lookup_count) AS max_lookups,
|
||||
MAX(t.lookup_hits) AS max_hits
|
||||
FROM imm_session_telemetry t
|
||||
GROUP BY t.session_id
|
||||
) sm ON s.session_id = sm.session_id
|
||||
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_day, s.video_id
|
||||
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
total_active_min = excluded.total_active_min,
|
||||
total_lines_seen = excluded.total_lines_seen,
|
||||
total_words_seen = excluded.total_words_seen,
|
||||
total_tokens_seen = excluded.total_tokens_seen,
|
||||
total_cards = excluded.total_cards,
|
||||
cards_per_hour = excluded.cards_per_hour,
|
||||
words_per_min = excluded.words_per_min,
|
||||
tokens_per_min = excluded.tokens_per_min,
|
||||
lookup_hit_rate = excluded.lookup_hit_rate,
|
||||
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
|
||||
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
|
||||
@@ -173,29 +198,35 @@ function upsertMonthlyRollupsForGroups(
|
||||
const upsertStmt = db.prepare(`
|
||||
INSERT INTO imm_monthly_rollups (
|
||||
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
|
||||
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
|
||||
s.video_id AS video_id,
|
||||
COUNT(DISTINCT s.session_id) AS total_sessions,
|
||||
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
|
||||
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
|
||||
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
|
||||
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
|
||||
JOIN (
|
||||
SELECT
|
||||
t.session_id,
|
||||
MAX(t.active_watched_ms) AS max_active_ms,
|
||||
MAX(t.lines_seen) AS max_lines,
|
||||
MAX(t.tokens_seen) AS max_tokens,
|
||||
MAX(t.cards_mined) AS max_cards
|
||||
FROM imm_session_telemetry t
|
||||
GROUP BY t.session_id
|
||||
) sm ON s.session_id = sm.session_id
|
||||
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_month, s.video_id
|
||||
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
total_active_min = excluded.total_active_min,
|
||||
total_lines_seen = excluded.total_lines_seen,
|
||||
total_words_seen = excluded.total_words_seen,
|
||||
total_tokens_seen = excluded.total_tokens_seen,
|
||||
total_cards = excluded.total_cards,
|
||||
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
|
||||
@@ -216,8 +247,8 @@ function getAffectedRollupGroups(
|
||||
.prepare(
|
||||
`
|
||||
SELECT DISTINCT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
|
||||
s.video_id AS video_id
|
||||
FROM imm_session_telemetry t
|
||||
JOIN imm_sessions s
|
||||
@@ -292,3 +323,7 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function runOptimizeMaintenance(db: DatabaseSync): void {
|
||||
db.exec('PRAGMA optimize');
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
|
||||
import test from 'node:test';
|
||||
import type { spawn as spawnFn } from 'node:child_process';
|
||||
import { SOURCE_TYPE_LOCAL } from './types';
|
||||
import { getLocalVideoMetadata, runFfprobe } from './metadata';
|
||||
import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';
|
||||
|
||||
type Spawn = typeof spawnFn;
|
||||
|
||||
@@ -146,3 +146,83 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
|
||||
assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
|
||||
assert.equal(hashFallbackMetadata.hashSha256, null);
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
|
||||
const seenTargets: string[] = [];
|
||||
const parsed = await guessAnimeVideoMetadata(
|
||||
'/tmp/Little Witch Academia S02E05.mkv',
|
||||
'Episode 5',
|
||||
{
|
||||
runGuessit: async (target) => {
|
||||
seenTargets.push(target);
|
||||
return JSON.stringify({
|
||||
title: 'Little Witch Academia',
|
||||
season: 2,
|
||||
episode: 5,
|
||||
});
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'guessit',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
filename: 'Little Witch Academia S02E05.mkv',
|
||||
source: 'guessit',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
|
||||
const parsed = await guessAnimeVideoMetadata(
|
||||
'/tmp/Little Witch Academia S02E05.mkv',
|
||||
'Episode 5',
|
||||
{
|
||||
runGuessit: async () => {
|
||||
throw new Error('guessit unavailable');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: 'high',
|
||||
filename: 'Little Witch Academia S02E05.mkv',
|
||||
rawTitle: 'Little Witch Academia S02E05',
|
||||
source: 'fallback',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
|
||||
const parsed = await guessAnimeVideoMetadata('/tmp/[SubsPlease] Frieren - 03 (1080p).mkv', null, {
|
||||
runGuessit: async () => JSON.stringify({ episode: 3 }),
|
||||
});
|
||||
|
||||
assert.deepEqual(parsed, {
|
||||
parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
|
||||
parsedTitle: 'Frieren - 03 (1080p)',
|
||||
parsedSeason: null,
|
||||
parsedEpisode: null,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.2,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: 'low',
|
||||
filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
|
||||
rawTitle: 'Frieren - 03 (1080p)',
|
||||
source: 'fallback',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import crypto from 'node:crypto';
|
||||
import { spawn as nodeSpawn } from 'node:child_process';
|
||||
import * as fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { parseMediaInfo } from '../../../jimaku/utils';
|
||||
import {
|
||||
guessAnilistMediaInfo,
|
||||
runGuessit,
|
||||
type GuessAnilistMediaInfoDeps,
|
||||
} from '../anilist/anilist-updater';
|
||||
import {
|
||||
deriveCanonicalTitle,
|
||||
emptyMetadata,
|
||||
@@ -8,7 +15,12 @@ import {
|
||||
parseFps,
|
||||
toNullableInt,
|
||||
} from './reducer';
|
||||
import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
|
||||
import {
|
||||
SOURCE_TYPE_LOCAL,
|
||||
type ParsedAnimeVideoGuess,
|
||||
type ProbeMetadata,
|
||||
type VideoMetadata,
|
||||
} from './types';
|
||||
|
||||
type SpawnFn = typeof nodeSpawn;
|
||||
|
||||
@@ -24,6 +36,21 @@ interface MetadataDeps {
|
||||
fs?: FsDeps;
|
||||
}
|
||||
|
||||
interface GuessAnimeVideoMetadataDeps {
|
||||
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
|
||||
}
|
||||
|
||||
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
|
||||
switch (confidence) {
|
||||
case 'high':
|
||||
return 1;
|
||||
case 'medium':
|
||||
return 0.6;
|
||||
default:
|
||||
return 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
export async function computeSha256(
|
||||
mediaPath: string,
|
||||
deps: MetadataDeps = {},
|
||||
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
|
||||
metadataJson: null,
|
||||
};
|
||||
}
|
||||
|
||||
export async function guessAnimeVideoMetadata(
|
||||
mediaPath: string | null,
|
||||
mediaTitle: string | null,
|
||||
deps: GuessAnimeVideoMetadataDeps = {},
|
||||
): Promise<ParsedAnimeVideoGuess | null> {
|
||||
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
|
||||
runGuessit: deps.runGuessit ?? runGuessit,
|
||||
});
|
||||
if (!parsed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
|
||||
if (parsed.source === 'guessit') {
|
||||
return {
|
||||
parsedBasename,
|
||||
parsedTitle: parsed.title,
|
||||
parsedSeason: parsed.season,
|
||||
parsedEpisode: parsed.episode,
|
||||
parserSource: 'guessit',
|
||||
parserConfidence: 1,
|
||||
parseMetadataJson: JSON.stringify({
|
||||
filename: parsedBasename,
|
||||
source: 'guessit',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
|
||||
return {
|
||||
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
|
||||
parsedTitle: parsed.title,
|
||||
parsedSeason: parsed.season,
|
||||
parsedEpisode: parsed.episode,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
|
||||
parseMetadataJson: JSON.stringify({
|
||||
confidence: fallbackInfo.confidence,
|
||||
filename: fallbackInfo.filename,
|
||||
rawTitle: fallbackInfo.rawTitle,
|
||||
source: 'fallback',
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,11 +15,11 @@ export function createInitialSessionState(
|
||||
totalWatchedMs: 0,
|
||||
activeWatchedMs: 0,
|
||||
linesSeen: 0,
|
||||
wordsSeen: 0,
|
||||
tokensSeen: 0,
|
||||
cardsMined: 0,
|
||||
lookupCount: 0,
|
||||
lookupHits: 0,
|
||||
yomitanLookupCount: 0,
|
||||
pauseCount: 0,
|
||||
pauseMs: 0,
|
||||
seekForwardCount: 0,
|
||||
@@ -30,6 +30,7 @@ export function createInitialSessionState(
|
||||
lastPauseStartMs: null,
|
||||
isPaused: false,
|
||||
pendingTelemetry: true,
|
||||
markedWatched: false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -50,16 +51,6 @@ export function sanitizePayload(payload: Record<string, unknown>, maxPayloadByte
|
||||
return json.length <= maxPayloadBytes ? json : JSON.stringify({ truncated: true });
|
||||
}
|
||||
|
||||
export function calculateTextMetrics(value: string): {
|
||||
words: number;
|
||||
tokens: number;
|
||||
} {
|
||||
const words = value.split(/\s+/).filter(Boolean).length;
|
||||
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
|
||||
const tokens = Math.max(words, cjkCount);
|
||||
return { words, tokens };
|
||||
}
|
||||
|
||||
export function secToMs(seconds: number): number {
|
||||
const coerced = Number(seconds);
|
||||
if (!Number.isFinite(coerced)) return 0;
|
||||
|
||||
@@ -39,8 +39,41 @@ export function finalizeSessionRecord(
|
||||
SET
|
||||
ended_at_ms = ?,
|
||||
status = ?,
|
||||
ended_media_ms = ?,
|
||||
total_watched_ms = ?,
|
||||
active_watched_ms = ?,
|
||||
lines_seen = ?,
|
||||
tokens_seen = ?,
|
||||
cards_mined = ?,
|
||||
lookup_count = ?,
|
||||
lookup_hits = ?,
|
||||
yomitan_lookup_count = ?,
|
||||
pause_count = ?,
|
||||
pause_ms = ?,
|
||||
seek_forward_count = ?,
|
||||
seek_backward_count = ?,
|
||||
media_buffer_events = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE session_id = ?
|
||||
`,
|
||||
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
|
||||
).run(
|
||||
endedAtMs,
|
||||
SESSION_STATUS_ENDED,
|
||||
sessionState.lastMediaMs,
|
||||
sessionState.totalWatchedMs,
|
||||
sessionState.activeWatchedMs,
|
||||
sessionState.linesSeen,
|
||||
sessionState.tokensSeen,
|
||||
sessionState.cardsMined,
|
||||
sessionState.lookupCount,
|
||||
sessionState.lookupHits,
|
||||
sessionState.yomitanLookupCount,
|
||||
sessionState.pauseCount,
|
||||
sessionState.pauseMs,
|
||||
sessionState.seekForwardCount,
|
||||
sessionState.seekBackwardCount,
|
||||
sessionState.mediaBufferEvents,
|
||||
Date.now(),
|
||||
sessionState.sessionId,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -6,10 +6,15 @@ import test from 'node:test';
|
||||
import { Database } from './sqlite';
|
||||
import { finalizeSessionRecord, startSessionRecord } from './session';
|
||||
import {
|
||||
applyPragmas,
|
||||
createTrackerPreparedStatements,
|
||||
ensureSchema,
|
||||
executeQueuedWrite,
|
||||
normalizeCoverBlobBytes,
|
||||
parseCoverBlobReference,
|
||||
getOrCreateAnimeRecord,
|
||||
getOrCreateVideoRecord,
|
||||
linkVideoToAnimeRecord,
|
||||
} from './storage';
|
||||
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
|
||||
|
||||
@@ -46,6 +51,34 @@ function cleanupDbPath(dbPath: string): void {
|
||||
// libsql keeps Windows file handles alive after close when prepared statements were used.
|
||||
}
|
||||
|
||||
test('applyPragmas sets the SQLite tuning defaults used by immersion tracking', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
applyPragmas(db);
|
||||
|
||||
const journalModeRow = db.prepare('PRAGMA journal_mode').get() as {
|
||||
journal_mode: string;
|
||||
};
|
||||
const synchronousRow = db.prepare('PRAGMA synchronous').get() as { synchronous: number };
|
||||
const foreignKeysRow = db.prepare('PRAGMA foreign_keys').get() as { foreign_keys: number };
|
||||
const busyTimeoutRow = db.prepare('PRAGMA busy_timeout').get() as { timeout: number };
|
||||
const journalSizeLimitRow = db.prepare('PRAGMA journal_size_limit').get() as {
|
||||
journal_size_limit: number;
|
||||
};
|
||||
|
||||
assert.equal(journalModeRow.journal_mode, 'wal');
|
||||
assert.equal(synchronousRow.synchronous, 1);
|
||||
assert.equal(foreignKeysRow.foreign_keys, 1);
|
||||
assert.equal(busyTimeoutRow.timeout, 2500);
|
||||
assert.equal(journalSizeLimitRow.journal_size_limit, 67_108_864);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema creates immersion core tables', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
@@ -60,6 +93,7 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
const tableNames = new Set(rows.map((row) => row.name));
|
||||
|
||||
assert.ok(tableNames.has('imm_videos'));
|
||||
assert.ok(tableNames.has('imm_anime'));
|
||||
assert.ok(tableNames.has('imm_sessions'));
|
||||
assert.ok(tableNames.has('imm_session_telemetry'));
|
||||
assert.ok(tableNames.has('imm_session_events'));
|
||||
@@ -67,7 +101,37 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
assert.ok(tableNames.has('imm_monthly_rollups'));
|
||||
assert.ok(tableNames.has('imm_words'));
|
||||
assert.ok(tableNames.has('imm_kanji'));
|
||||
assert.ok(tableNames.has('imm_subtitle_lines'));
|
||||
assert.ok(tableNames.has('imm_word_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_rollup_state'));
|
||||
assert.ok(tableNames.has('imm_cover_art_blobs'));
|
||||
|
||||
const videoColumns = new Set(
|
||||
(
|
||||
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
|
||||
assert.ok(videoColumns.has('anime_id'));
|
||||
assert.ok(videoColumns.has('parsed_basename'));
|
||||
assert.ok(videoColumns.has('parsed_title'));
|
||||
assert.ok(videoColumns.has('parsed_season'));
|
||||
assert.ok(videoColumns.has('parsed_episode'));
|
||||
assert.ok(videoColumns.has('parser_source'));
|
||||
assert.ok(videoColumns.has('parser_confidence'));
|
||||
assert.ok(videoColumns.has('parse_metadata_json'));
|
||||
|
||||
const mediaArtColumns = new Set(
|
||||
(
|
||||
db.prepare('PRAGMA table_info(imm_media_art)').all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
assert.ok(mediaArtColumns.has('cover_blob_hash'));
|
||||
|
||||
const rollupStateRow = db
|
||||
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
|
||||
@@ -82,6 +146,566 @@ test('ensureSchema creates immersion core tables', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema creates large-history performance indexes', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const indexNames = new Set(
|
||||
(
|
||||
db
|
||||
.prepare(`SELECT name FROM sqlite_master WHERE type = 'index' AND name LIKE 'idx_%'`)
|
||||
.all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
|
||||
assert.ok(indexNames.has('idx_telemetry_sample_ms'));
|
||||
assert.ok(indexNames.has('idx_sessions_started_at'));
|
||||
assert.ok(indexNames.has('idx_sessions_ended_at'));
|
||||
assert.ok(indexNames.has('idx_words_frequency'));
|
||||
assert.ok(indexNames.has('idx_kanji_frequency'));
|
||||
assert.ok(indexNames.has('idx_media_art_anilist_id'));
|
||||
assert.ok(indexNames.has('idx_media_art_cover_url'));
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE imm_schema_version (
|
||||
schema_version INTEGER PRIMARY KEY,
|
||||
applied_at_ms INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
|
||||
|
||||
CREATE TABLE imm_videos(
|
||||
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_key TEXT NOT NULL UNIQUE,
|
||||
canonical_title TEXT NOT NULL,
|
||||
source_type INTEGER NOT NULL,
|
||||
source_path TEXT,
|
||||
source_url TEXT,
|
||||
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
|
||||
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
|
||||
codec_id INTEGER, container_id INTEGER,
|
||||
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
|
||||
bitrate_kbps INTEGER, audio_codec_id INTEGER,
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
`);
|
||||
|
||||
const insertLegacyVideo = db.prepare(`
|
||||
INSERT INTO imm_videos (
|
||||
video_key, canonical_title, source_type, source_path, source_url,
|
||||
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
|
||||
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
|
||||
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/Little Witch Academia S02E05.mkv',
|
||||
'Episode 5',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/Little Witch Academia S02E05.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/Little Witch Academia S02E06.mkv',
|
||||
'Episode 6',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/Little Witch Academia S02E06.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
insertLegacyVideo.run(
|
||||
'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
'Episode 3',
|
||||
SOURCE_TYPE_LOCAL,
|
||||
'/library/[SubsPlease] Frieren - 03 - Departure.mkv',
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
1,
|
||||
1,
|
||||
);
|
||||
|
||||
ensureSchema(db);
|
||||
|
||||
const videoColumns = new Set(
|
||||
(
|
||||
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
|
||||
name: string;
|
||||
}>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
assert.ok(videoColumns.has('anime_id'));
|
||||
assert.ok(videoColumns.has('parsed_basename'));
|
||||
assert.ok(videoColumns.has('parsed_title'));
|
||||
assert.ok(videoColumns.has('parsed_season'));
|
||||
assert.ok(videoColumns.has('parsed_episode'));
|
||||
assert.ok(videoColumns.has('parser_source'));
|
||||
assert.ok(videoColumns.has('parser_confidence'));
|
||||
assert.ok(videoColumns.has('parse_metadata_json'));
|
||||
|
||||
const animeRows = db
|
||||
.prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
|
||||
.all() as Array<{ canonical_title: string }>;
|
||||
assert.deepEqual(
|
||||
animeRows.map((row) => row.canonical_title),
|
||||
['Frieren', 'Little Witch Academia'],
|
||||
);
|
||||
|
||||
const littleWitchRows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
a.canonical_title AS anime_title,
|
||||
v.parsed_title,
|
||||
v.parsed_basename,
|
||||
v.parsed_season,
|
||||
v.parsed_episode,
|
||||
v.parser_source,
|
||||
v.parser_confidence
|
||||
FROM imm_videos v
|
||||
JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
|
||||
ORDER BY v.video_key
|
||||
`,
|
||||
)
|
||||
.all() as Array<{
|
||||
anime_title: string;
|
||||
parsed_title: string | null;
|
||||
parsed_basename: string | null;
|
||||
parsed_season: number | null;
|
||||
parsed_episode: number | null;
|
||||
parser_source: string | null;
|
||||
parser_confidence: number | null;
|
||||
}>;
|
||||
|
||||
assert.equal(littleWitchRows.length, 2);
|
||||
assert.deepEqual(
|
||||
littleWitchRows.map((row) => ({
|
||||
animeTitle: row.anime_title,
|
||||
parsedTitle: row.parsed_title,
|
||||
parsedBasename: row.parsed_basename,
|
||||
parsedSeason: row.parsed_season,
|
||||
parsedEpisode: row.parsed_episode,
|
||||
parserSource: row.parser_source,
|
||||
})),
|
||||
[
|
||||
{
|
||||
animeTitle: 'Little Witch Academia',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
},
|
||||
{
|
||||
animeTitle: 'Little Witch Academia',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedBasename: 'Little Witch Academia S02E06.mkv',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 6,
|
||||
parserSource: 'fallback',
|
||||
},
|
||||
],
|
||||
);
|
||||
assert.ok(
|
||||
littleWitchRows.every(
|
||||
(row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
|
||||
),
|
||||
);
|
||||
|
||||
const frierenRow = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
a.canonical_title AS anime_title,
|
||||
v.parsed_title,
|
||||
v.parsed_episode,
|
||||
v.parser_source
|
||||
FROM imm_videos v
|
||||
JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
WHERE v.video_key = ?
|
||||
`,
|
||||
)
|
||||
.get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
|
||||
anime_title: string;
|
||||
parsed_title: string | null;
|
||||
parsed_episode: number | null;
|
||||
parser_source: string | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(frierenRow);
|
||||
assert.equal(frierenRow?.anime_title, 'Frieren');
|
||||
assert.equal(frierenRow?.parsed_title, 'Frieren');
|
||||
assert.equal(frierenRow?.parsed_episode, 3);
|
||||
assert.equal(frierenRow?.parser_source, 'fallback');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
db.exec(`
|
||||
CREATE TABLE imm_schema_version (
|
||||
schema_version INTEGER PRIMARY KEY,
|
||||
applied_at_ms INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
|
||||
|
||||
CREATE TABLE imm_videos(
|
||||
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_key TEXT NOT NULL UNIQUE,
|
||||
anime_id INTEGER,
|
||||
canonical_title TEXT NOT NULL,
|
||||
source_type INTEGER NOT NULL,
|
||||
source_path TEXT,
|
||||
source_url TEXT,
|
||||
parsed_basename TEXT,
|
||||
parsed_title TEXT,
|
||||
parsed_season INTEGER,
|
||||
parsed_episode INTEGER,
|
||||
parser_source TEXT,
|
||||
parser_confidence REAL,
|
||||
parse_metadata_json TEXT,
|
||||
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
|
||||
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
|
||||
codec_id INTEGER, container_id INTEGER,
|
||||
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
|
||||
bitrate_kbps INTEGER, audio_codec_id INTEGER,
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_sessions(
|
||||
session_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_uuid TEXT NOT NULL UNIQUE,
|
||||
video_id INTEGER NOT NULL,
|
||||
started_at_ms INTEGER NOT NULL,
|
||||
ended_at_ms INTEGER,
|
||||
status INTEGER NOT NULL,
|
||||
locale_id INTEGER,
|
||||
target_lang_id INTEGER,
|
||||
difficulty_tier INTEGER,
|
||||
subtitle_mode INTEGER,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_session_events(
|
||||
event_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id INTEGER NOT NULL,
|
||||
ts_ms INTEGER NOT NULL,
|
||||
event_type INTEGER NOT NULL,
|
||||
line_index INTEGER,
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
words_delta INTEGER NOT NULL DEFAULT 0,
|
||||
cards_delta INTEGER NOT NULL DEFAULT 0,
|
||||
payload_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
CREATE TABLE imm_words(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
headword TEXT,
|
||||
word TEXT,
|
||||
reading TEXT,
|
||||
part_of_speech TEXT,
|
||||
pos1 TEXT,
|
||||
pos2 TEXT,
|
||||
pos3 TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(headword, word, reading)
|
||||
);
|
||||
CREATE TABLE imm_kanji(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
kanji TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(kanji)
|
||||
);
|
||||
CREATE TABLE imm_rollup_state(
|
||||
state_key TEXT PRIMARY KEY,
|
||||
state_value INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
|
||||
ensureSchema(db);
|
||||
|
||||
const tableNames = new Set(
|
||||
(
|
||||
db
|
||||
.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`)
|
||||
.all() as Array<{ name: string }>
|
||||
).map((row) => row.name),
|
||||
);
|
||||
|
||||
assert.ok(tableNames.has('imm_subtitle_lines'));
|
||||
assert.ok(tableNames.has('imm_word_line_occurrences'));
|
||||
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('ensureSchema migrates legacy cover art blobs into the shared blob store', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
db.prepare('UPDATE imm_schema_version SET schema_version = 12').run();
|
||||
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/legacy-cover-art.mkv', {
|
||||
canonicalTitle: 'Legacy Cover Art',
|
||||
sourcePath: '/tmp/legacy-cover-art.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const legacyBlob = Uint8Array.from([0xde, 0xad, 0xbe, 0xef]);
|
||||
|
||||
db.prepare(
|
||||
`
|
||||
INSERT INTO imm_media_art (
|
||||
video_id,
|
||||
anilist_id,
|
||||
cover_url,
|
||||
cover_blob,
|
||||
cover_blob_hash,
|
||||
title_romaji,
|
||||
title_english,
|
||||
episodes_total,
|
||||
fetched_at_ms,
|
||||
CREATED_DATE,
|
||||
LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`,
|
||||
).run(videoId, null, null, legacyBlob, null, null, null, null, 1, 1, 1);
|
||||
|
||||
assert.doesNotThrow(() => ensureSchema(db));
|
||||
|
||||
const mediaArtRow = db
|
||||
.prepare(
|
||||
'SELECT cover_blob AS coverBlob, cover_blob_hash AS coverBlobHash FROM imm_media_art',
|
||||
)
|
||||
.get() as {
|
||||
coverBlob: ArrayBuffer | Uint8Array | Buffer | null;
|
||||
coverBlobHash: string | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(mediaArtRow);
|
||||
assert.ok(mediaArtRow?.coverBlobHash);
|
||||
assert.equal(
|
||||
parseCoverBlobReference(normalizeCoverBlobBytes(mediaArtRow?.coverBlob)),
|
||||
mediaArtRow?.coverBlobHash,
|
||||
);
|
||||
|
||||
const sharedBlobRow = db
|
||||
.prepare('SELECT cover_blob AS coverBlob FROM imm_cover_art_blobs WHERE blob_hash = ?')
|
||||
.get(mediaArtRow?.coverBlobHash) as {
|
||||
coverBlob: ArrayBuffer | Uint8Array | Buffer;
|
||||
} | null;
|
||||
|
||||
assert.ok(sharedBlobRow);
|
||||
assert.equal(normalizeCoverBlobBytes(sharedBlobRow?.coverBlob)?.toString('hex'), 'deadbeef');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
|
||||
const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
|
||||
canonicalTitle: 'Episode 5',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
|
||||
canonicalTitle: 'Episode 6',
|
||||
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
|
||||
const provisionalAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"parsed"}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, firstVideoId, {
|
||||
animeId: provisionalAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E05.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 5,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.6,
|
||||
parseMetadataJson: '{"source":"parsed","episode":5}',
|
||||
});
|
||||
|
||||
const reusedAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: ' little witch academia ',
|
||||
canonicalTitle: 'Little Witch Academia',
|
||||
anilistId: null,
|
||||
titleRomaji: null,
|
||||
titleEnglish: null,
|
||||
titleNative: null,
|
||||
metadataJson: '{"source":"parsed"}',
|
||||
});
|
||||
linkVideoToAnimeRecord(db, secondVideoId, {
|
||||
animeId: reusedAnimeId,
|
||||
parsedBasename: 'Little Witch Academia S02E06.mkv',
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
parsedSeason: 2,
|
||||
parsedEpisode: 6,
|
||||
parserSource: 'fallback',
|
||||
parserConfidence: 0.6,
|
||||
parseMetadataJson: '{"source":"parsed","episode":6}',
|
||||
});
|
||||
|
||||
assert.equal(reusedAnimeId, provisionalAnimeId);
|
||||
|
||||
const upgradedAnimeId = getOrCreateAnimeRecord(db, {
|
||||
parsedTitle: 'Little Witch Academia',
|
||||
canonicalTitle: 'Little Witch Academia TV',
|
||||
anilistId: 33_435,
|
||||
titleRomaji: 'Little Witch Academia',
|
||||
titleEnglish: 'Little Witch Academia',
|
||||
titleNative: 'リトルウィッチアカデミア',
|
||||
metadataJson: '{"source":"anilist"}',
|
||||
});
|
||||
|
||||
assert.equal(upgradedAnimeId, provisionalAnimeId);
|
||||
|
||||
const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
|
||||
anime_id: number;
|
||||
normalized_title_key: string;
|
||||
canonical_title: string;
|
||||
anilist_id: number | null;
|
||||
title_romaji: string | null;
|
||||
title_english: string | null;
|
||||
title_native: string | null;
|
||||
metadata_json: string | null;
|
||||
}>;
|
||||
assert.equal(animeRows.length, 1);
|
||||
assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
|
||||
assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
|
||||
assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
|
||||
assert.equal(animeRows[0]?.anilist_id, 33_435);
|
||||
assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
|
||||
assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
|
||||
assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
|
||||
assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
|
||||
|
||||
const linkedVideos = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT anime_id, parsed_title, parsed_season, parsed_episode
|
||||
FROM imm_videos
|
||||
WHERE video_id IN (?, ?)
|
||||
ORDER BY video_id
|
||||
`,
|
||||
)
|
||||
.all(firstVideoId, secondVideoId) as Array<{
|
||||
anime_id: number | null;
|
||||
parsed_title: string | null;
|
||||
parsed_season: number | null;
|
||||
parsed_episode: number | null;
|
||||
}>;
|
||||
|
||||
assert.deepEqual(linkedVideos, [
|
||||
{
|
||||
anime_id: provisionalAnimeId,
|
||||
parsed_title: 'Little Witch Academia',
|
||||
parsed_season: 2,
|
||||
parsed_episode: 5,
|
||||
},
|
||||
{
|
||||
anime_id: provisionalAnimeId,
|
||||
parsed_title: 'Little Witch Academia',
|
||||
parsed_season: 2,
|
||||
parsed_episode: 6,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('start/finalize session updates ended_at and status', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
@@ -116,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
|
||||
}
|
||||
});
|
||||
|
||||
test('finalize session persists ended media position', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
|
||||
canonicalTitle: 'Slice A Ended Media',
|
||||
sourcePath: '/tmp/slice-a-ended-media.mkv',
|
||||
sourceUrl: null,
|
||||
sourceType: SOURCE_TYPE_LOCAL,
|
||||
});
|
||||
const startedAtMs = 1_234_567_000;
|
||||
const endedAtMs = startedAtMs + 8_500;
|
||||
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
|
||||
state.lastMediaMs = 91_000;
|
||||
|
||||
finalizeSessionRecord(db, state, endedAtMs);
|
||||
|
||||
const row = db
|
||||
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
|
||||
.get(sessionId) as {
|
||||
ended_media_ms: number | null;
|
||||
} | null;
|
||||
|
||||
assert.ok(row);
|
||||
assert.equal(row?.ended_media_ms, 91_000);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('executeQueuedWrite inserts event and telemetry rows', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
@@ -139,11 +796,11 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
|
||||
totalWatchedMs: 1_000,
|
||||
activeWatchedMs: 900,
|
||||
linesSeen: 3,
|
||||
wordsSeen: 6,
|
||||
tokensSeen: 6,
|
||||
cardsMined: 1,
|
||||
lookupCount: 2,
|
||||
lookupHits: 1,
|
||||
yomitanLookupCount: 0,
|
||||
pauseCount: 1,
|
||||
pauseMs: 50,
|
||||
seekForwardCount: 0,
|
||||
@@ -161,7 +818,7 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
|
||||
lineIndex: 1,
|
||||
segmentStartMs: 0,
|
||||
segmentEndMs: 800,
|
||||
wordsDelta: 2,
|
||||
tokensDelta: 2,
|
||||
cardsDelta: 0,
|
||||
payloadJson: '{"event":"subtitle-line"}',
|
||||
},
|
||||
@@ -191,18 +848,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
|
||||
|
||||
const wordRow = db
|
||||
.prepare(
|
||||
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
|
||||
`SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
|
||||
FROM imm_words WHERE headword = ?`,
|
||||
)
|
||||
.get('猫') as {
|
||||
headword: string;
|
||||
frequency: number;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
} | null;
|
||||
@@ -218,6 +879,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
assert.ok(wordRow);
|
||||
assert.ok(kanjiRow);
|
||||
assert.equal(wordRow?.frequency, 2);
|
||||
assert.equal(wordRow?.part_of_speech, 'noun');
|
||||
assert.equal(wordRow?.pos1, '名詞');
|
||||
assert.equal(wordRow?.pos2, '一般');
|
||||
assert.equal(kanjiRow?.frequency, 2);
|
||||
assert.equal(wordRow?.first_seen, 5);
|
||||
assert.equal(wordRow?.last_seen, 15);
|
||||
@@ -228,3 +892,54 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new Database(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
stmts.wordUpsertStmt.run(
|
||||
'知っている',
|
||||
'知っている',
|
||||
'しっている',
|
||||
'other',
|
||||
'動詞',
|
||||
'自立',
|
||||
'',
|
||||
10,
|
||||
10,
|
||||
);
|
||||
stmts.wordUpsertStmt.run(
|
||||
'知っている',
|
||||
'知っている',
|
||||
'しっている',
|
||||
'verb',
|
||||
'動詞',
|
||||
'自立',
|
||||
'',
|
||||
11,
|
||||
12,
|
||||
);
|
||||
|
||||
const row = db
|
||||
.prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
|
||||
.get('知っている') as {
|
||||
frequency: number;
|
||||
part_of_speech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
} | null;
|
||||
|
||||
assert.ok(row);
|
||||
assert.equal(row?.frequency, 2);
|
||||
assert.equal(row?.part_of_speech, 'verb');
|
||||
assert.equal(row?.pos1, '動詞');
|
||||
assert.equal(row?.pos2, '自立');
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
export const SCHEMA_VERSION = 3;
|
||||
export const SCHEMA_VERSION = 15;
|
||||
export const DEFAULT_QUEUE_CAP = 1_000;
|
||||
export const DEFAULT_BATCH_SIZE = 25;
|
||||
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
|
||||
@@ -7,6 +7,7 @@ const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
|
||||
export const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
|
||||
export const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
|
||||
export const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
|
||||
export const DEFAULT_SESSIONS_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
|
||||
export const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
|
||||
export const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
|
||||
export const DEFAULT_MAX_PAYLOAD_BYTES = 256;
|
||||
@@ -25,10 +26,14 @@ export const EVENT_SEEK_FORWARD = 5;
|
||||
export const EVENT_SEEK_BACKWARD = 6;
|
||||
export const EVENT_PAUSE_START = 7;
|
||||
export const EVENT_PAUSE_END = 8;
|
||||
export const EVENT_YOMITAN_LOOKUP = 9;
|
||||
|
||||
export interface ImmersionTrackerOptions {
|
||||
dbPath: string;
|
||||
policy?: ImmersionTrackerPolicy;
|
||||
resolveLegacyVocabularyPos?: (
|
||||
row: LegacyVocabularyPosRow,
|
||||
) => Promise<LegacyVocabularyPosResolution | null>;
|
||||
}
|
||||
|
||||
export interface ImmersionTrackerPolicy {
|
||||
@@ -40,6 +45,7 @@ export interface ImmersionTrackerPolicy {
|
||||
retention?: {
|
||||
eventsDays?: number;
|
||||
telemetryDays?: number;
|
||||
sessionsDays?: number;
|
||||
dailyRollupsDays?: number;
|
||||
monthlyRollupsDays?: number;
|
||||
vacuumIntervalDays?: number;
|
||||
@@ -50,11 +56,11 @@ export interface TelemetryAccumulator {
|
||||
totalWatchedMs: number;
|
||||
activeWatchedMs: number;
|
||||
linesSeen: number;
|
||||
wordsSeen: number;
|
||||
tokensSeen: number;
|
||||
cardsMined: number;
|
||||
lookupCount: number;
|
||||
lookupHits: number;
|
||||
yomitanLookupCount: number;
|
||||
pauseCount: number;
|
||||
pauseMs: number;
|
||||
seekForwardCount: number;
|
||||
@@ -72,20 +78,22 @@ export interface SessionState extends TelemetryAccumulator {
|
||||
lastPauseStartMs: number | null;
|
||||
isPaused: boolean;
|
||||
pendingTelemetry: boolean;
|
||||
markedWatched: boolean;
|
||||
}
|
||||
|
||||
interface QueuedTelemetryWrite {
|
||||
kind: 'telemetry';
|
||||
sessionId: number;
|
||||
sampleMs?: number;
|
||||
lastMediaMs?: number | null;
|
||||
totalWatchedMs?: number;
|
||||
activeWatchedMs?: number;
|
||||
linesSeen?: number;
|
||||
wordsSeen?: number;
|
||||
tokensSeen?: number;
|
||||
cardsMined?: number;
|
||||
lookupCount?: number;
|
||||
lookupHits?: number;
|
||||
yomitanLookupCount?: number;
|
||||
pauseCount?: number;
|
||||
pauseMs?: number;
|
||||
seekForwardCount?: number;
|
||||
@@ -95,7 +103,7 @@ interface QueuedTelemetryWrite {
|
||||
lineIndex?: number | null;
|
||||
segmentStartMs?: number | null;
|
||||
segmentEndMs?: number | null;
|
||||
wordsDelta?: number;
|
||||
tokensDelta?: number;
|
||||
cardsDelta?: number;
|
||||
payloadJson?: string | null;
|
||||
}
|
||||
@@ -108,7 +116,7 @@ interface QueuedEventWrite {
|
||||
lineIndex?: number | null;
|
||||
segmentStartMs?: number | null;
|
||||
segmentEndMs?: number | null;
|
||||
wordsDelta?: number;
|
||||
tokensDelta?: number;
|
||||
cardsDelta?: number;
|
||||
payloadJson?: string | null;
|
||||
}
|
||||
@@ -118,8 +126,13 @@ interface QueuedWordWrite {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
frequencyRank: number | null;
|
||||
}
|
||||
|
||||
interface QueuedKanjiWrite {
|
||||
@@ -129,11 +142,44 @@ interface QueuedKanjiWrite {
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface CountedWordOccurrence {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
occurrenceCount: number;
|
||||
frequencyRank: number | null;
|
||||
}
|
||||
|
||||
export interface CountedKanjiOccurrence {
|
||||
kanji: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
interface QueuedSubtitleLineWrite {
|
||||
kind: 'subtitleLine';
|
||||
sessionId: number;
|
||||
videoId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
secondaryText?: string | null;
|
||||
wordOccurrences: CountedWordOccurrence[];
|
||||
kanjiOccurrences: CountedKanjiOccurrence[];
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export type QueuedWrite =
|
||||
| QueuedTelemetryWrite
|
||||
| QueuedEventWrite
|
||||
| QueuedWordWrite
|
||||
| QueuedKanjiWrite;
|
||||
| QueuedKanjiWrite
|
||||
| QueuedSubtitleLineWrite;
|
||||
|
||||
export interface VideoMetadata {
|
||||
sourceType: number;
|
||||
@@ -152,18 +198,173 @@ export interface VideoMetadata {
|
||||
metadataJson: string | null;
|
||||
}
|
||||
|
||||
export interface ParsedAnimeVideoMetadata {
|
||||
animeId: number | null;
|
||||
parsedBasename: string | null;
|
||||
parsedTitle: string | null;
|
||||
parsedSeason: number | null;
|
||||
parsedEpisode: number | null;
|
||||
parserSource: string | null;
|
||||
parserConfidence: number | null;
|
||||
parseMetadataJson: string | null;
|
||||
}
|
||||
|
||||
export interface ParsedAnimeVideoGuess {
|
||||
parsedBasename: string | null;
|
||||
parsedTitle: string;
|
||||
parsedSeason: number | null;
|
||||
parsedEpisode: number | null;
|
||||
parserSource: 'guessit' | 'fallback';
|
||||
parserConfidence: number;
|
||||
parseMetadataJson: string;
|
||||
}
|
||||
|
||||
export interface SessionSummaryQueryRow {
|
||||
sessionId: number;
|
||||
videoId: number | null;
|
||||
canonicalTitle: string | null;
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
startedAtMs: number;
|
||||
endedAtMs: number | null;
|
||||
totalWatchedMs: number;
|
||||
activeWatchedMs: number;
|
||||
linesSeen: number;
|
||||
wordsSeen: number;
|
||||
tokensSeen: number;
|
||||
cardsMined: number;
|
||||
lookupCount: number;
|
||||
lookupHits: number;
|
||||
yomitanLookupCount: number;
|
||||
knownWordsSeen?: number;
|
||||
knownWordRate?: number;
|
||||
}
|
||||
|
||||
export interface LifetimeGlobalRow {
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
activeDays: number;
|
||||
episodesStarted: number;
|
||||
episodesCompleted: number;
|
||||
animeCompleted: number;
|
||||
lastRebuiltMs: number | null;
|
||||
}
|
||||
|
||||
export interface LifetimeAnimeRow {
|
||||
animeId: number;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalLinesSeen: number;
|
||||
totalTokensSeen: number;
|
||||
episodesStarted: number;
|
||||
episodesCompleted: number;
|
||||
firstWatchedMs: number | null;
|
||||
lastWatchedMs: number | null;
|
||||
}
|
||||
|
||||
export interface LifetimeMediaRow {
|
||||
videoId: number;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalLinesSeen: number;
|
||||
totalTokensSeen: number;
|
||||
completed: number;
|
||||
firstWatchedMs: number | null;
|
||||
lastWatchedMs: number | null;
|
||||
}
|
||||
|
||||
export interface AppliedSessionRow {
|
||||
sessionId: number;
|
||||
appliedAtMs: number;
|
||||
}
|
||||
|
||||
export interface LifetimeRebuildSummary {
|
||||
appliedSessions: number;
|
||||
rebuiltAtMs: number;
|
||||
}
|
||||
|
||||
export interface VocabularyStatsRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
pos1: string | null;
|
||||
pos2: string | null;
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
frequencyRank: number | null;
|
||||
animeCount: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface VocabularyCleanupSummary {
|
||||
scanned: number;
|
||||
kept: number;
|
||||
deleted: number;
|
||||
repaired: number;
|
||||
}
|
||||
|
||||
export interface LegacyVocabularyPosRow {
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
export interface LegacyVocabularyPosResolution {
|
||||
headword: string;
|
||||
reading: string;
|
||||
partOfSpeech: string;
|
||||
pos1: string;
|
||||
pos2: string;
|
||||
pos3: string;
|
||||
}
|
||||
|
||||
export interface KanjiStatsRow {
|
||||
kanjiId: number;
|
||||
kanji: string;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface WordOccurrenceRow {
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sourcePath: string | null;
|
||||
secondaryText: string | null;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface KanjiOccurrenceRow {
|
||||
animeId: number | null;
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sourcePath: string | null;
|
||||
secondaryText: string | null;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface SessionEventRow {
|
||||
eventType: number;
|
||||
tsMs: number;
|
||||
payload: string | null;
|
||||
}
|
||||
|
||||
export interface SessionTimelineRow {
|
||||
@@ -171,7 +372,6 @@ export interface SessionTimelineRow {
|
||||
totalWatchedMs: number;
|
||||
activeWatchedMs: number;
|
||||
linesSeen: number;
|
||||
wordsSeen: number;
|
||||
tokensSeen: number;
|
||||
cardsMined: number;
|
||||
}
|
||||
@@ -182,11 +382,10 @@ export interface ImmersionSessionRollupRow {
|
||||
totalSessions: number;
|
||||
totalActiveMin: number;
|
||||
totalLinesSeen: number;
|
||||
totalWordsSeen: number;
|
||||
totalTokensSeen: number;
|
||||
totalCards: number;
|
||||
cardsPerHour: number | null;
|
||||
wordsPerMin: number | null;
|
||||
tokensPerMin: number | null;
|
||||
lookupHitRate: number | null;
|
||||
}
|
||||
|
||||
@@ -200,3 +399,186 @@ export interface ProbeMetadata {
|
||||
bitrateKbps: number | null;
|
||||
audioCodecId: number | null;
|
||||
}
|
||||
|
||||
export interface MediaArtRow {
|
||||
videoId: number;
|
||||
anilistId: number | null;
|
||||
coverUrl: string | null;
|
||||
coverBlob: Buffer | null;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
episodesTotal: number | null;
|
||||
fetchedAtMs: number;
|
||||
}
|
||||
|
||||
export interface MediaLibraryRow {
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalTokensSeen: number;
|
||||
lastWatchedMs: number;
|
||||
hasCoverArt: number;
|
||||
}
|
||||
|
||||
export interface MediaDetailRow {
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
animeId: number | null;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalTokensSeen: number;
|
||||
totalLinesSeen: number;
|
||||
totalLookupCount: number;
|
||||
totalLookupHits: number;
|
||||
totalYomitanLookupCount: number;
|
||||
}
|
||||
|
||||
export interface AnimeLibraryRow {
|
||||
animeId: number;
|
||||
canonicalTitle: string;
|
||||
anilistId: number | null;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalTokensSeen: number;
|
||||
episodeCount: number;
|
||||
episodesTotal: number | null;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface AnimeDetailRow {
|
||||
animeId: number;
|
||||
canonicalTitle: string;
|
||||
anilistId: number | null;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
titleNative: string | null;
|
||||
description: string | null;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalTokensSeen: number;
|
||||
totalLinesSeen: number;
|
||||
totalLookupCount: number;
|
||||
totalLookupHits: number;
|
||||
totalYomitanLookupCount: number;
|
||||
episodeCount: number;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface AnimeAnilistEntryRow {
|
||||
anilistId: number;
|
||||
titleRomaji: string | null;
|
||||
titleEnglish: string | null;
|
||||
season: number | null;
|
||||
}
|
||||
|
||||
export interface AnimeEpisodeRow {
|
||||
animeId: number;
|
||||
videoId: number;
|
||||
canonicalTitle: string;
|
||||
parsedTitle: string | null;
|
||||
season: number | null;
|
||||
episode: number | null;
|
||||
durationMs: number;
|
||||
endedMediaMs: number | null;
|
||||
watched: number;
|
||||
totalSessions: number;
|
||||
totalActiveMs: number;
|
||||
totalCards: number;
|
||||
totalTokensSeen: number;
|
||||
totalYomitanLookupCount: number;
|
||||
lastWatchedMs: number;
|
||||
}
|
||||
|
||||
export interface StreakCalendarRow {
|
||||
epochDay: number;
|
||||
totalActiveMin: number;
|
||||
}
|
||||
|
||||
export interface AnimeWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface EpisodesPerDayRow {
|
||||
epochDay: number;
|
||||
episodeCount: number;
|
||||
}
|
||||
|
||||
export interface NewAnimePerDayRow {
|
||||
epochDay: number;
|
||||
newAnimeCount: number;
|
||||
}
|
||||
|
||||
export interface WatchTimePerAnimeRow {
|
||||
epochDay: number;
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
totalActiveMin: number;
|
||||
}
|
||||
|
||||
export interface WordDetailRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
partOfSpeech: string | null;
|
||||
pos1: string | null;
|
||||
pos2: string | null;
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface WordAnimeAppearanceRow {
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface SimilarWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface KanjiDetailRow {
|
||||
kanjiId: number;
|
||||
kanji: string;
|
||||
frequency: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export interface KanjiAnimeAppearanceRow {
|
||||
animeId: number;
|
||||
animeTitle: string;
|
||||
occurrenceCount: number;
|
||||
}
|
||||
|
||||
export interface KanjiWordRow {
|
||||
wordId: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
export interface EpisodeCardEventRow {
|
||||
eventId: number;
|
||||
sessionId: number;
|
||||
tsMs: number;
|
||||
cardsDelta: number;
|
||||
noteIds: number[];
|
||||
}
|
||||
|
||||
@@ -29,7 +29,10 @@ export {
|
||||
} from './startup';
|
||||
export { openYomitanSettingsWindow } from './yomitan-settings';
|
||||
export { createTokenizerDepsRuntime, tokenizeSubtitle } from './tokenizer';
|
||||
export { clearYomitanParserCachesForWindow } from './tokenizer/yomitan-parser-runtime';
|
||||
export {
|
||||
addYomitanNoteViaSearch,
|
||||
clearYomitanParserCachesForWindow,
|
||||
} from './tokenizer/yomitan-parser-runtime';
|
||||
export {
|
||||
deleteYomitanDictionaryByTitle,
|
||||
getYomitanDictionaryInfo,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import { createIpcDepsRuntime, registerIpcHandlers } from './ipc';
|
||||
import { createIpcDepsRuntime, registerIpcHandlers, type IpcServiceDeps } from './ipc';
|
||||
import { IPC_CHANNELS } from '../../shared/ipc/contracts';
|
||||
|
||||
interface FakeIpcRegistrar {
|
||||
@@ -77,6 +77,90 @@ function createControllerConfigFixture() {
|
||||
};
|
||||
}
|
||||
|
||||
function createRegisterIpcDeps(overrides: Partial<IpcServiceDeps> = {}): IpcServiceDeps {
|
||||
return {
|
||||
onOverlayModalClosed: () => {},
|
||||
openYomitanSettings: () => {},
|
||||
quitApp: () => {},
|
||||
toggleDevTools: () => {},
|
||||
getVisibleOverlayVisibility: () => false,
|
||||
toggleVisibleOverlay: () => {},
|
||||
tokenizeCurrentSubtitle: async () => null,
|
||||
getCurrentSubtitleRaw: () => '',
|
||||
getCurrentSubtitleAss: () => '',
|
||||
getPlaybackPaused: () => false,
|
||||
getSubtitlePosition: () => null,
|
||||
getSubtitleStyle: () => null,
|
||||
saveSubtitlePosition: () => {},
|
||||
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
|
||||
setMecabEnabled: () => {},
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: async () => {},
|
||||
saveControllerPreference: async () => {},
|
||||
getSecondarySubMode: () => 'hover',
|
||||
getCurrentSecondarySub: () => '',
|
||||
focusMainWindow: () => {},
|
||||
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
|
||||
getAnkiConnectStatus: () => false,
|
||||
getRuntimeOptions: () => [],
|
||||
setRuntimeOption: () => ({ ok: true }),
|
||||
cycleRuntimeOption: () => ({ ok: true }),
|
||||
reportOverlayContentBounds: () => {},
|
||||
getAnilistStatus: () => ({}),
|
||||
clearAnilistToken: () => {},
|
||||
openAnilistSetup: () => {},
|
||||
getAnilistQueueStatus: () => ({}),
|
||||
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
|
||||
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
|
||||
immersionTracker: null,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createFakeImmersionTracker(
|
||||
overrides: Partial<NonNullable<IpcServiceDeps['immersionTracker']>> = {},
|
||||
): NonNullable<IpcServiceDeps['immersionTracker']> {
|
||||
return {
|
||||
recordYomitanLookup: () => {},
|
||||
getSessionSummaries: async () => [],
|
||||
getDailyRollups: async () => [],
|
||||
getMonthlyRollups: async () => [],
|
||||
getQueryHints: async () => ({
|
||||
totalSessions: 0,
|
||||
activeSessions: 0,
|
||||
episodesToday: 0,
|
||||
activeAnimeCount: 0,
|
||||
totalActiveMin: 0,
|
||||
totalCards: 0,
|
||||
activeDays: 0,
|
||||
totalEpisodesWatched: 0,
|
||||
totalAnimeCompleted: 0,
|
||||
totalTokensSeen: 0,
|
||||
totalLookupCount: 0,
|
||||
totalLookupHits: 0,
|
||||
totalYomitanLookupCount: 0,
|
||||
newWordsToday: 0,
|
||||
newWordsThisWeek: 0,
|
||||
}),
|
||||
getSessionTimeline: async () => [],
|
||||
getSessionEvents: async () => [],
|
||||
getVocabularyStats: async () => [],
|
||||
getKanjiStats: async () => [],
|
||||
getMediaLibrary: async () => [],
|
||||
getMediaDetail: async () => null,
|
||||
getMediaSessions: async () => [],
|
||||
getMediaDailyRollups: async () => [],
|
||||
getCoverArt: async () => null,
|
||||
markActiveVideoWatched: async () => false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
test('createIpcDepsRuntime wires AniList handlers', async () => {
|
||||
const calls: string[] = [];
|
||||
const deps = createIpcDepsRuntime({
|
||||
@@ -97,6 +181,8 @@ test('createIpcDepsRuntime wires AniList handlers', async () => {
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: () => {},
|
||||
saveControllerPreference: () => {},
|
||||
@@ -164,6 +250,8 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: () => {},
|
||||
saveControllerPreference: () => {},
|
||||
@@ -232,6 +320,194 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
|
||||
);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers forwards yomitan lookup tracking commands to immersion tracker', () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
const calls: string[] = [];
|
||||
registerIpcHandlers(
|
||||
createRegisterIpcDeps({
|
||||
immersionTracker: createFakeImmersionTracker({
|
||||
recordYomitanLookup: () => {
|
||||
calls.push('lookup');
|
||||
},
|
||||
}),
|
||||
}),
|
||||
registrar,
|
||||
);
|
||||
|
||||
const handler = handlers.on.get(IPC_CHANNELS.command.recordYomitanLookup);
|
||||
assert.equal(typeof handler, 'function');
|
||||
|
||||
handler?.({}, null);
|
||||
|
||||
assert.deepEqual(calls, ['lookup']);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers returns empty stats overview shape without a tracker', async () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
registerIpcHandlers(createRegisterIpcDeps(), registrar);
|
||||
|
||||
const overviewHandler = handlers.handle.get(IPC_CHANNELS.request.statsGetOverview);
|
||||
assert.ok(overviewHandler);
|
||||
assert.deepEqual(await overviewHandler!({}), {
|
||||
sessions: [],
|
||||
rollups: [],
|
||||
hints: {
|
||||
totalSessions: 0,
|
||||
activeSessions: 0,
|
||||
episodesToday: 0,
|
||||
activeAnimeCount: 0,
|
||||
totalCards: 0,
|
||||
totalActiveMin: 0,
|
||||
activeDays: 0,
|
||||
totalEpisodesWatched: 0,
|
||||
totalAnimeCompleted: 0,
|
||||
totalTokensSeen: 0,
|
||||
totalLookupCount: 0,
|
||||
totalLookupHits: 0,
|
||||
totalYomitanLookupCount: 0,
|
||||
newWordsToday: 0,
|
||||
newWordsThisWeek: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('registerIpcHandlers validates and clamps stats request limits', async () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
const calls: Array<[string, number, number?]> = [];
|
||||
|
||||
registerIpcHandlers(
|
||||
createRegisterIpcDeps({
|
||||
immersionTracker: {
|
||||
recordYomitanLookup: () => {},
|
||||
getSessionSummaries: async (limit = 0) => {
|
||||
calls.push(['sessions', limit]);
|
||||
return [];
|
||||
},
|
||||
getDailyRollups: async (limit = 0) => {
|
||||
calls.push(['daily', limit]);
|
||||
return [];
|
||||
},
|
||||
getMonthlyRollups: async (limit = 0) => {
|
||||
calls.push(['monthly', limit]);
|
||||
return [];
|
||||
},
|
||||
getQueryHints: async () => ({
|
||||
totalSessions: 0,
|
||||
activeSessions: 0,
|
||||
episodesToday: 0,
|
||||
activeAnimeCount: 0,
|
||||
totalCards: 0,
|
||||
totalActiveMin: 0,
|
||||
activeDays: 0,
|
||||
totalEpisodesWatched: 0,
|
||||
totalAnimeCompleted: 0,
|
||||
totalTokensSeen: 0,
|
||||
totalLookupCount: 0,
|
||||
totalLookupHits: 0,
|
||||
totalYomitanLookupCount: 0,
|
||||
newWordsToday: 0,
|
||||
newWordsThisWeek: 0,
|
||||
}),
|
||||
getSessionTimeline: async (sessionId: number, limit = 0) => {
|
||||
calls.push(['timeline', limit, sessionId]);
|
||||
return [];
|
||||
},
|
||||
getSessionEvents: async (sessionId: number, limit = 0) => {
|
||||
calls.push(['events', limit, sessionId]);
|
||||
return [];
|
||||
},
|
||||
getVocabularyStats: async (limit = 0) => {
|
||||
calls.push(['vocabulary', limit]);
|
||||
return [];
|
||||
},
|
||||
getKanjiStats: async (limit = 0) => {
|
||||
calls.push(['kanji', limit]);
|
||||
return [];
|
||||
},
|
||||
getMediaLibrary: async () => [],
|
||||
getMediaDetail: async () => null,
|
||||
getMediaSessions: async () => [],
|
||||
getMediaDailyRollups: async () => [],
|
||||
getCoverArt: async () => null,
|
||||
markActiveVideoWatched: async () => false,
|
||||
},
|
||||
}),
|
||||
registrar,
|
||||
);
|
||||
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetDailyRollups)!({}, -1);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetMonthlyRollups)!(
|
||||
{},
|
||||
Number.POSITIVE_INFINITY,
|
||||
);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessions)!({}, 9999);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, 12.5);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionEvents)!({}, 7, 0);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetVocabulary)!({}, 1000);
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetKanji)!({}, NaN);
|
||||
|
||||
assert.deepEqual(calls, [
|
||||
['daily', 60],
|
||||
['monthly', 24],
|
||||
['sessions', 500],
|
||||
['timeline', 200, 7],
|
||||
['events', 500, 7],
|
||||
['vocabulary', 500],
|
||||
['kanji', 100],
|
||||
]);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers requests the full timeline when no limit is provided', async () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
const calls: Array<[string, number | undefined, number]> = [];
|
||||
|
||||
registerIpcHandlers(
|
||||
createRegisterIpcDeps({
|
||||
immersionTracker: {
|
||||
recordYomitanLookup: () => {},
|
||||
getSessionSummaries: async () => [],
|
||||
getDailyRollups: async () => [],
|
||||
getMonthlyRollups: async () => [],
|
||||
getQueryHints: async () => ({
|
||||
totalSessions: 0,
|
||||
activeSessions: 0,
|
||||
episodesToday: 0,
|
||||
activeAnimeCount: 0,
|
||||
totalCards: 0,
|
||||
totalActiveMin: 0,
|
||||
activeDays: 0,
|
||||
totalEpisodesWatched: 0,
|
||||
totalAnimeCompleted: 0,
|
||||
totalTokensSeen: 0,
|
||||
totalLookupCount: 0,
|
||||
totalLookupHits: 0,
|
||||
totalYomitanLookupCount: 0,
|
||||
newWordsToday: 0,
|
||||
newWordsThisWeek: 0,
|
||||
}),
|
||||
getSessionTimeline: async (sessionId: number, limit?: number) => {
|
||||
calls.push(['timeline', limit, sessionId]);
|
||||
return [];
|
||||
},
|
||||
getSessionEvents: async () => [],
|
||||
getVocabularyStats: async () => [],
|
||||
getKanjiStats: async () => [],
|
||||
getMediaLibrary: async () => [],
|
||||
getMediaDetail: async () => null,
|
||||
getMediaSessions: async () => [],
|
||||
getMediaDailyRollups: async () => [],
|
||||
getCoverArt: async () => null,
|
||||
markActiveVideoWatched: async () => false,
|
||||
},
|
||||
}),
|
||||
registrar,
|
||||
);
|
||||
|
||||
await handlers.handle.get(IPC_CHANNELS.request.statsGetSessionTimeline)!({}, 7, undefined);
|
||||
|
||||
assert.deepEqual(calls, [['timeline', undefined, 7]]);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
const saves: unknown[] = [];
|
||||
@@ -265,10 +541,10 @@ test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: (update) => {
|
||||
controllerSaves.push(update);
|
||||
},
|
||||
saveControllerConfig: () => {},
|
||||
saveControllerPreference: (update) => {
|
||||
controllerSaves.push(update);
|
||||
},
|
||||
@@ -329,6 +605,8 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: async () => {},
|
||||
saveControllerPreference: async (update) => {
|
||||
@@ -376,85 +654,6 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
|
||||
]);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers awaits saveControllerConfig through request-response IPC', async () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
const controllerConfigSaves: unknown[] = [];
|
||||
registerIpcHandlers(
|
||||
{
|
||||
onOverlayModalClosed: () => {},
|
||||
openYomitanSettings: () => {},
|
||||
quitApp: () => {},
|
||||
toggleDevTools: () => {},
|
||||
getVisibleOverlayVisibility: () => false,
|
||||
toggleVisibleOverlay: () => {},
|
||||
tokenizeCurrentSubtitle: async () => null,
|
||||
getCurrentSubtitleRaw: () => '',
|
||||
getCurrentSubtitleAss: () => '',
|
||||
getPlaybackPaused: () => false,
|
||||
getSubtitlePosition: () => null,
|
||||
getSubtitleStyle: () => null,
|
||||
saveSubtitlePosition: () => {},
|
||||
getMecabStatus: () => ({ available: false, enabled: false, path: null }),
|
||||
setMecabEnabled: () => {},
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: async (update) => {
|
||||
await Promise.resolve();
|
||||
controllerConfigSaves.push(update);
|
||||
},
|
||||
saveControllerPreference: async () => {},
|
||||
getSecondarySubMode: () => 'hover',
|
||||
getCurrentSecondarySub: () => '',
|
||||
focusMainWindow: () => {},
|
||||
runSubsyncManual: async () => ({ ok: true, message: 'ok' }),
|
||||
getAnkiConnectStatus: () => false,
|
||||
getRuntimeOptions: () => [],
|
||||
setRuntimeOption: () => ({ ok: true }),
|
||||
cycleRuntimeOption: () => ({ ok: true }),
|
||||
reportOverlayContentBounds: () => {},
|
||||
getAnilistStatus: () => ({}),
|
||||
clearAnilistToken: () => {},
|
||||
openAnilistSetup: () => {},
|
||||
getAnilistQueueStatus: () => ({}),
|
||||
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
|
||||
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
|
||||
},
|
||||
registrar,
|
||||
);
|
||||
|
||||
const saveHandler = handlers.handle.get(IPC_CHANNELS.command.saveControllerConfig);
|
||||
assert.ok(saveHandler);
|
||||
|
||||
await assert.rejects(
|
||||
async () => {
|
||||
await saveHandler!({}, { bindings: { toggleLookup: { kind: 'button', buttonIndex: -1 } } });
|
||||
},
|
||||
/Invalid controller config payload/,
|
||||
);
|
||||
|
||||
await saveHandler!({}, {
|
||||
preferredGamepadId: 'pad-2',
|
||||
bindings: {
|
||||
toggleLookup: { kind: 'button', buttonIndex: 11 },
|
||||
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
|
||||
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
|
||||
},
|
||||
});
|
||||
|
||||
assert.deepEqual(controllerConfigSaves, [
|
||||
{
|
||||
preferredGamepadId: 'pad-2',
|
||||
bindings: {
|
||||
toggleLookup: { kind: 'button', buttonIndex: 11 },
|
||||
closeLookup: { kind: 'axis', axisIndex: 4, direction: 'negative' },
|
||||
leftStickHorizontal: { kind: 'axis', axisIndex: 7, dpadFallback: 'none' },
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('registerIpcHandlers rejects malformed controller preference payloads', async () => {
|
||||
const { registrar, handlers } = createFakeIpcRegistrar();
|
||||
registerIpcHandlers(
|
||||
@@ -477,6 +676,8 @@ test('registerIpcHandlers rejects malformed controller preference payloads', asy
|
||||
handleMpvCommand: () => {},
|
||||
getKeybindings: () => [],
|
||||
getConfiguredShortcuts: () => ({}),
|
||||
getStatsToggleKey: () => 'Backquote',
|
||||
getMarkWatchedKey: () => 'KeyW',
|
||||
getControllerConfig: () => createControllerConfigFixture(),
|
||||
saveControllerConfig: async () => {},
|
||||
saveControllerPreference: async () => {},
|
||||
|
||||
@@ -50,6 +50,8 @@ export interface IpcServiceDeps {
|
||||
handleMpvCommand: (command: Array<string | number>) => void;
|
||||
getKeybindings: () => unknown;
|
||||
getConfiguredShortcuts: () => unknown;
|
||||
getStatsToggleKey: () => string;
|
||||
getMarkWatchedKey: () => string;
|
||||
getControllerConfig: () => ResolvedControllerConfig;
|
||||
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
|
||||
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
|
||||
@@ -68,6 +70,39 @@ export interface IpcServiceDeps {
|
||||
getAnilistQueueStatus: () => unknown;
|
||||
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
|
||||
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
|
||||
immersionTracker?: {
|
||||
recordYomitanLookup: () => void;
|
||||
getSessionSummaries: (limit?: number) => Promise<unknown>;
|
||||
getDailyRollups: (limit?: number) => Promise<unknown>;
|
||||
getMonthlyRollups: (limit?: number) => Promise<unknown>;
|
||||
getQueryHints: () => Promise<{
|
||||
totalSessions: number;
|
||||
activeSessions: number;
|
||||
episodesToday: number;
|
||||
activeAnimeCount: number;
|
||||
totalActiveMin: number;
|
||||
totalCards: number;
|
||||
activeDays: number;
|
||||
totalEpisodesWatched: number;
|
||||
totalAnimeCompleted: number;
|
||||
totalTokensSeen: number;
|
||||
totalLookupCount: number;
|
||||
totalLookupHits: number;
|
||||
totalYomitanLookupCount: number;
|
||||
newWordsToday: number;
|
||||
newWordsThisWeek: number;
|
||||
}>;
|
||||
getSessionTimeline: (sessionId: number, limit?: number) => Promise<unknown>;
|
||||
getSessionEvents: (sessionId: number, limit?: number) => Promise<unknown>;
|
||||
getVocabularyStats: (limit?: number) => Promise<unknown>;
|
||||
getKanjiStats: (limit?: number) => Promise<unknown>;
|
||||
getMediaLibrary: () => Promise<unknown>;
|
||||
getMediaDetail: (videoId: number) => Promise<unknown>;
|
||||
getMediaSessions: (videoId: number, limit?: number) => Promise<unknown>;
|
||||
getMediaDailyRollups: (videoId: number, limit?: number) => Promise<unknown>;
|
||||
getCoverArt: (videoId: number) => Promise<unknown>;
|
||||
markActiveVideoWatched: () => Promise<boolean>;
|
||||
} | null;
|
||||
}
|
||||
|
||||
interface WindowLike {
|
||||
@@ -116,6 +151,8 @@ export interface IpcDepsRuntimeOptions {
|
||||
handleMpvCommand: (command: Array<string | number>) => void;
|
||||
getKeybindings: () => unknown;
|
||||
getConfiguredShortcuts: () => unknown;
|
||||
getStatsToggleKey: () => string;
|
||||
getMarkWatchedKey: () => string;
|
||||
getControllerConfig: () => ResolvedControllerConfig;
|
||||
saveControllerConfig: (update: ControllerConfigUpdate) => void | Promise<void>;
|
||||
saveControllerPreference: (update: ControllerPreferenceUpdate) => void | Promise<void>;
|
||||
@@ -134,6 +171,7 @@ export interface IpcDepsRuntimeOptions {
|
||||
getAnilistQueueStatus: () => unknown;
|
||||
retryAnilistQueueNow: () => Promise<{ ok: boolean; message: string }>;
|
||||
appendClipboardVideoToQueue: () => { ok: boolean; message: string };
|
||||
getImmersionTracker?: () => IpcServiceDeps['immersionTracker'];
|
||||
}
|
||||
|
||||
export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcServiceDeps {
|
||||
@@ -170,6 +208,8 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
|
||||
handleMpvCommand: options.handleMpvCommand,
|
||||
getKeybindings: options.getKeybindings,
|
||||
getConfiguredShortcuts: options.getConfiguredShortcuts,
|
||||
getStatsToggleKey: options.getStatsToggleKey,
|
||||
getMarkWatchedKey: options.getMarkWatchedKey,
|
||||
getControllerConfig: options.getControllerConfig,
|
||||
saveControllerConfig: options.saveControllerConfig,
|
||||
saveControllerPreference: options.saveControllerPreference,
|
||||
@@ -192,10 +232,31 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
|
||||
getAnilistQueueStatus: options.getAnilistQueueStatus,
|
||||
retryAnilistQueueNow: options.retryAnilistQueueNow,
|
||||
appendClipboardVideoToQueue: options.appendClipboardVideoToQueue,
|
||||
get immersionTracker() {
|
||||
return options.getImmersionTracker?.() ?? null;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar = ipcMain): void {
|
||||
const parsePositiveIntLimit = (
|
||||
value: unknown,
|
||||
defaultValue: number,
|
||||
maxValue: number,
|
||||
): number => {
|
||||
if (!Number.isInteger(value) || (value as number) < 1) {
|
||||
return defaultValue;
|
||||
}
|
||||
return Math.min(value as number, maxValue);
|
||||
};
|
||||
|
||||
const parsePositiveInteger = (value: unknown): number | null => {
|
||||
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
|
||||
return null;
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
ipc.on(
|
||||
IPC_CHANNELS.command.setIgnoreMouseEvents,
|
||||
(event: unknown, ignore: unknown, options: unknown = {}) => {
|
||||
@@ -224,6 +285,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
|
||||
deps.openYomitanSettings();
|
||||
});
|
||||
|
||||
ipc.on(IPC_CHANNELS.command.recordYomitanLookup, () => {
|
||||
deps.immersionTracker?.recordYomitanLookup();
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.command.markActiveVideoWatched, async () => {
|
||||
return (await deps.immersionTracker?.markActiveVideoWatched()) ?? false;
|
||||
});
|
||||
|
||||
ipc.on(IPC_CHANNELS.command.quitApp, () => {
|
||||
deps.quitApp();
|
||||
});
|
||||
@@ -312,6 +381,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
|
||||
return deps.getConfiguredShortcuts();
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.getStatsToggleKey, () => {
|
||||
return deps.getStatsToggleKey();
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.getMarkWatchedKey, () => {
|
||||
return deps.getMarkWatchedKey();
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.getControllerConfig, () => {
|
||||
return deps.getControllerConfig();
|
||||
});
|
||||
@@ -397,4 +474,115 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
|
||||
ipc.handle(IPC_CHANNELS.request.appendClipboardVideoToQueue, () => {
|
||||
return deps.appendClipboardVideoToQueue();
|
||||
});
|
||||
|
||||
// Stats request handlers
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetOverview, async () => {
|
||||
const tracker = deps.immersionTracker;
|
||||
if (!tracker) {
|
||||
return {
|
||||
sessions: [],
|
||||
rollups: [],
|
||||
hints: {
|
||||
totalSessions: 0,
|
||||
activeSessions: 0,
|
||||
episodesToday: 0,
|
||||
activeAnimeCount: 0,
|
||||
totalActiveMin: 0,
|
||||
totalCards: 0,
|
||||
activeDays: 0,
|
||||
totalEpisodesWatched: 0,
|
||||
totalAnimeCompleted: 0,
|
||||
totalTokensSeen: 0,
|
||||
totalLookupCount: 0,
|
||||
totalLookupHits: 0,
|
||||
totalYomitanLookupCount: 0,
|
||||
newWordsToday: 0,
|
||||
newWordsThisWeek: 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
const [sessions, rollups, hints] = await Promise.all([
|
||||
tracker.getSessionSummaries(5),
|
||||
tracker.getDailyRollups(14),
|
||||
tracker.getQueryHints(),
|
||||
]);
|
||||
return { sessions, rollups, hints };
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetDailyRollups, async (_event, limit: unknown) => {
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 60, 500);
|
||||
return deps.immersionTracker?.getDailyRollups(parsedLimit) ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetMonthlyRollups, async (_event, limit: unknown) => {
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 24, 120);
|
||||
return deps.immersionTracker?.getMonthlyRollups(parsedLimit) ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetSessions, async (_event, limit: unknown) => {
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 50, 500);
|
||||
return deps.immersionTracker?.getSessionSummaries(parsedLimit) ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(
|
||||
IPC_CHANNELS.request.statsGetSessionTimeline,
|
||||
async (_event, sessionId: unknown, limit: unknown) => {
|
||||
const parsedSessionId = parsePositiveInteger(sessionId);
|
||||
if (parsedSessionId === null) return [];
|
||||
const parsedLimit = limit === undefined ? undefined : parsePositiveIntLimit(limit, 200, 1000);
|
||||
return deps.immersionTracker?.getSessionTimeline(parsedSessionId, parsedLimit) ?? [];
|
||||
},
|
||||
);
|
||||
|
||||
ipc.handle(
|
||||
IPC_CHANNELS.request.statsGetSessionEvents,
|
||||
async (_event, sessionId: unknown, limit: unknown) => {
|
||||
const parsedSessionId = parsePositiveInteger(sessionId);
|
||||
if (parsedSessionId === null) return [];
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 500, 1000);
|
||||
return deps.immersionTracker?.getSessionEvents(parsedSessionId, parsedLimit) ?? [];
|
||||
},
|
||||
);
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetVocabulary, async (_event, limit: unknown) => {
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
|
||||
return deps.immersionTracker?.getVocabularyStats(parsedLimit) ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetKanji, async (_event, limit: unknown) => {
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
|
||||
return deps.immersionTracker?.getKanjiStats(parsedLimit) ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetMediaLibrary, async () => {
|
||||
return deps.immersionTracker?.getMediaLibrary() ?? [];
|
||||
});
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetMediaDetail, async (_event, videoId: unknown) => {
|
||||
if (typeof videoId !== 'number') return null;
|
||||
return deps.immersionTracker?.getMediaDetail(videoId) ?? null;
|
||||
});
|
||||
|
||||
ipc.handle(
|
||||
IPC_CHANNELS.request.statsGetMediaSessions,
|
||||
async (_event, videoId: unknown, limit: unknown) => {
|
||||
if (typeof videoId !== 'number') return [];
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 100, 500);
|
||||
return deps.immersionTracker?.getMediaSessions(videoId, parsedLimit) ?? [];
|
||||
},
|
||||
);
|
||||
|
||||
ipc.handle(
|
||||
IPC_CHANNELS.request.statsGetMediaDailyRollups,
|
||||
async (_event, videoId: unknown, limit: unknown) => {
|
||||
if (typeof videoId !== 'number') return [];
|
||||
const parsedLimit = parsePositiveIntLimit(limit, 90, 500);
|
||||
return deps.immersionTracker?.getMediaDailyRollups(videoId, parsedLimit) ?? [];
|
||||
},
|
||||
);
|
||||
|
||||
ipc.handle(IPC_CHANNELS.request.statsGetMediaCover, async (_event, videoId: unknown) => {
|
||||
if (typeof videoId !== 'number') return null;
|
||||
return deps.immersionTracker?.getCoverArt(videoId) ?? null;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -59,9 +59,12 @@ const MPV_SUBTITLE_PROPERTY_OBSERVATIONS: string[] = [
|
||||
'sub-ass-override',
|
||||
'sub-use-margins',
|
||||
'pause',
|
||||
'duration',
|
||||
'media-title',
|
||||
'secondary-sub-visibility',
|
||||
'sub-visibility',
|
||||
'sid',
|
||||
'track-list',
|
||||
];
|
||||
|
||||
const MPV_INITIAL_PROPERTY_REQUESTS: Array<MpvProtocolCommand> = [
|
||||
|
||||
@@ -60,6 +60,8 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
|
||||
emitSubtitleAssChange: (payload) => state.events.push(payload),
|
||||
emitSubtitleTiming: (payload) => state.events.push(payload),
|
||||
emitSecondarySubtitleChange: (payload) => state.events.push(payload),
|
||||
emitSubtitleTrackChange: (payload) => state.events.push(payload),
|
||||
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
|
||||
getCurrentSubText: () => state.subText,
|
||||
setCurrentSubText: (text) => {
|
||||
state.subText = text;
|
||||
@@ -87,6 +89,7 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
|
||||
getPauseAtTime: () => null,
|
||||
setPauseAtTime: () => {},
|
||||
emitTimePosChange: () => {},
|
||||
emitDurationChange: () => {},
|
||||
emitPauseChange: () => {},
|
||||
autoLoadSecondarySubTrack: () => {},
|
||||
setCurrentVideoPath: () => {},
|
||||
@@ -119,6 +122,21 @@ test('dispatchMpvProtocolMessage emits subtitle text on property change', async
|
||||
assert.deepEqual(state.events, [{ text: '字幕', isOverlayVisible: false }]);
|
||||
});
|
||||
|
||||
test('dispatchMpvProtocolMessage emits subtitle track changes', async () => {
|
||||
const { deps, state } = createDeps({
|
||||
emitSubtitleTrackChange: (payload) => state.events.push(payload),
|
||||
emitSubtitleTrackListChange: (payload) => state.events.push(payload),
|
||||
});
|
||||
|
||||
await dispatchMpvProtocolMessage({ event: 'property-change', name: 'sid', data: '3' }, deps);
|
||||
await dispatchMpvProtocolMessage(
|
||||
{ event: 'property-change', name: 'track-list', data: [{ type: 'sub', id: 3 }] },
|
||||
deps,
|
||||
);
|
||||
|
||||
assert.deepEqual(state.events, [{ sid: 3 }, { trackList: [{ type: 'sub', id: 3 }] }]);
|
||||
});
|
||||
|
||||
test('dispatchMpvProtocolMessage enforces sub-visibility hidden when overlay suppression is enabled', async () => {
|
||||
const { deps, state } = createDeps({
|
||||
isVisibleOverlayVisible: () => true,
|
||||
|
||||
@@ -52,6 +52,8 @@ export interface MpvProtocolHandleMessageDeps {
|
||||
emitSubtitleAssChange: (payload: { text: string }) => void;
|
||||
emitSubtitleTiming: (payload: { text: string; start: number; end: number }) => void;
|
||||
emitSecondarySubtitleChange: (payload: { text: string }) => void;
|
||||
emitSubtitleTrackChange: (payload: { sid: number | null }) => void;
|
||||
emitSubtitleTrackListChange: (payload: { trackList: unknown[] | null }) => void;
|
||||
getCurrentSubText: () => string;
|
||||
setCurrentSubText: (text: string) => void;
|
||||
setCurrentSubStart: (value: number) => void;
|
||||
@@ -61,6 +63,7 @@ export interface MpvProtocolHandleMessageDeps {
|
||||
emitMediaPathChange: (payload: { path: string }) => void;
|
||||
emitMediaTitleChange: (payload: { title: string | null }) => void;
|
||||
emitTimePosChange: (payload: { time: number }) => void;
|
||||
emitDurationChange: (payload: { duration: number }) => void;
|
||||
emitPauseChange: (payload: { paused: boolean }) => void;
|
||||
emitSubtitleMetricsChange: (payload: Partial<MpvSubtitleRenderMetrics>) => void;
|
||||
setCurrentSecondarySubText: (text: string) => void;
|
||||
@@ -159,6 +162,18 @@ export async function dispatchMpvProtocolMessage(
|
||||
const nextSubText = (msg.data as string) || '';
|
||||
deps.setCurrentSecondarySubText(nextSubText);
|
||||
deps.emitSecondarySubtitleChange({ text: nextSubText });
|
||||
} else if (msg.name === 'sid') {
|
||||
const sid =
|
||||
typeof msg.data === 'number'
|
||||
? msg.data
|
||||
: typeof msg.data === 'string'
|
||||
? Number(msg.data)
|
||||
: null;
|
||||
deps.emitSubtitleTrackChange({ sid: sid !== null && Number.isFinite(sid) ? sid : null });
|
||||
} else if (msg.name === 'track-list') {
|
||||
deps.emitSubtitleTrackListChange({
|
||||
trackList: Array.isArray(msg.data) ? (msg.data as unknown[]) : null,
|
||||
});
|
||||
} else if (msg.name === 'aid') {
|
||||
deps.setCurrentAudioTrackId(typeof msg.data === 'number' ? (msg.data as number) : null);
|
||||
deps.syncCurrentAudioStreamIndex();
|
||||
@@ -172,6 +187,11 @@ export async function dispatchMpvProtocolMessage(
|
||||
deps.setPauseAtTime(null);
|
||||
deps.sendCommand({ command: ['set_property', 'pause', true] });
|
||||
}
|
||||
} else if (msg.name === 'duration') {
|
||||
const duration = typeof msg.data === 'number' ? msg.data : 0;
|
||||
if (duration > 0) {
|
||||
deps.emitDurationChange({ duration });
|
||||
}
|
||||
} else if (msg.name === 'pause') {
|
||||
deps.emitPauseChange({ paused: asBoolean(msg.data, false) });
|
||||
} else if (msg.name === 'media-title') {
|
||||
|
||||
@@ -115,8 +115,11 @@ export interface MpvIpcClientEventMap {
|
||||
'subtitle-ass-change': { text: string };
|
||||
'subtitle-timing': { text: string; start: number; end: number };
|
||||
'time-pos-change': { time: number };
|
||||
'duration-change': { duration: number };
|
||||
'pause-change': { paused: boolean };
|
||||
'secondary-subtitle-change': { text: string };
|
||||
'subtitle-track-change': { sid: number | null };
|
||||
'subtitle-track-list-change': { trackList: unknown[] | null };
|
||||
'media-path-change': { path: string };
|
||||
'media-title-change': { title: string | null };
|
||||
'subtitle-metrics-change': { patch: Partial<MpvSubtitleRenderMetrics> };
|
||||
@@ -314,6 +317,9 @@ export class MpvIpcClient implements MpvClient {
|
||||
emitTimePosChange: (payload) => {
|
||||
this.emit('time-pos-change', payload);
|
||||
},
|
||||
emitDurationChange: (payload) => {
|
||||
this.emit('duration-change', payload);
|
||||
},
|
||||
emitPauseChange: (payload) => {
|
||||
this.playbackPaused = payload.paused;
|
||||
this.emit('pause-change', payload);
|
||||
@@ -321,6 +327,12 @@ export class MpvIpcClient implements MpvClient {
|
||||
emitSecondarySubtitleChange: (payload) => {
|
||||
this.emit('secondary-subtitle-change', payload);
|
||||
},
|
||||
emitSubtitleTrackChange: (payload) => {
|
||||
this.emit('subtitle-track-change', payload);
|
||||
},
|
||||
emitSubtitleTrackListChange: (payload) => {
|
||||
this.emit('subtitle-track-list-change', payload);
|
||||
},
|
||||
getCurrentSubText: () => this.currentSubText,
|
||||
setCurrentSubText: (text: string) => {
|
||||
this.currentSubText = text;
|
||||
|
||||
@@ -109,6 +109,60 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
|
||||
assert.equal(setIntegrationCalls, 1);
|
||||
});
|
||||
|
||||
test('initializeOverlayRuntime can skip starting Anki integration transport', () => {
|
||||
let createdIntegrations = 0;
|
||||
let startedIntegrations = 0;
|
||||
let setIntegrationCalls = 0;
|
||||
|
||||
initializeOverlayRuntime({
|
||||
backendOverride: null,
|
||||
createMainWindow: () => {},
|
||||
registerGlobalShortcuts: () => {},
|
||||
updateVisibleOverlayBounds: () => {},
|
||||
isVisibleOverlayVisible: () => false,
|
||||
updateVisibleOverlayVisibility: () => {},
|
||||
getOverlayWindows: () => [],
|
||||
syncOverlayShortcuts: () => {},
|
||||
setWindowTracker: () => {},
|
||||
getMpvSocketPath: () => '/tmp/mpv.sock',
|
||||
createWindowTracker: () => null,
|
||||
getResolvedConfig: () => ({
|
||||
ankiConnect: { enabled: true } as never,
|
||||
}),
|
||||
getSubtitleTimingTracker: () => ({}),
|
||||
getMpvClient: () => ({
|
||||
send: () => {},
|
||||
}),
|
||||
getRuntimeOptionsManager: () => ({
|
||||
getEffectiveAnkiConnectConfig: (config) => config as never,
|
||||
}),
|
||||
createAnkiIntegration: () => {
|
||||
createdIntegrations += 1;
|
||||
return {
|
||||
start: () => {
|
||||
startedIntegrations += 1;
|
||||
},
|
||||
};
|
||||
},
|
||||
setAnkiIntegration: () => {
|
||||
setIntegrationCalls += 1;
|
||||
},
|
||||
showDesktopNotification: () => {},
|
||||
createFieldGroupingCallback: () => async () => ({
|
||||
keepNoteId: 7,
|
||||
deleteNoteId: 8,
|
||||
deleteDuplicate: false,
|
||||
cancelled: false,
|
||||
}),
|
||||
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
|
||||
shouldStartAnkiIntegration: () => false,
|
||||
});
|
||||
|
||||
assert.equal(createdIntegrations, 1);
|
||||
assert.equal(startedIntegrations, 0);
|
||||
assert.equal(setIntegrationCalls, 1);
|
||||
});
|
||||
|
||||
test('initializeOverlayRuntime merges shared ai config with Anki overrides', () => {
|
||||
initializeOverlayRuntime({
|
||||
backendOverride: null,
|
||||
@@ -213,3 +267,49 @@ test('initializeOverlayRuntime re-syncs overlay shortcuts when tracker focus cha
|
||||
tracker.onWindowFocusChange?.(true);
|
||||
assert.equal(syncCalls, 1);
|
||||
});
|
||||
|
||||
test('initializeOverlayRuntime refreshes visible overlay when tracker focus changes while overlay is shown', () => {
|
||||
let visibilityRefreshCalls = 0;
|
||||
const tracker = {
|
||||
onGeometryChange: null as ((...args: unknown[]) => void) | null,
|
||||
onWindowFound: null as ((...args: unknown[]) => void) | null,
|
||||
onWindowLost: null as (() => void) | null,
|
||||
onWindowFocusChange: null as ((focused: boolean) => void) | null,
|
||||
start: () => {},
|
||||
};
|
||||
|
||||
initializeOverlayRuntime({
|
||||
backendOverride: null,
|
||||
createMainWindow: () => {},
|
||||
registerGlobalShortcuts: () => {},
|
||||
updateVisibleOverlayBounds: () => {},
|
||||
isVisibleOverlayVisible: () => true,
|
||||
updateVisibleOverlayVisibility: () => {
|
||||
visibilityRefreshCalls += 1;
|
||||
},
|
||||
getOverlayWindows: () => [],
|
||||
syncOverlayShortcuts: () => {},
|
||||
setWindowTracker: () => {},
|
||||
getMpvSocketPath: () => '/tmp/mpv.sock',
|
||||
createWindowTracker: () => tracker as never,
|
||||
getResolvedConfig: () => ({
|
||||
ankiConnect: { enabled: false } as never,
|
||||
}),
|
||||
getSubtitleTimingTracker: () => null,
|
||||
getMpvClient: () => null,
|
||||
getRuntimeOptionsManager: () => null,
|
||||
setAnkiIntegration: () => {},
|
||||
showDesktopNotification: () => {},
|
||||
createFieldGroupingCallback: () => async () => ({
|
||||
keepNoteId: 1,
|
||||
deleteNoteId: 2,
|
||||
deleteDuplicate: false,
|
||||
cancelled: false,
|
||||
}),
|
||||
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
|
||||
});
|
||||
|
||||
tracker.onWindowFocusChange?.(true);
|
||||
|
||||
assert.equal(visibilityRefreshCalls, 2);
|
||||
});
|
||||
|
||||
@@ -75,6 +75,7 @@ export function initializeOverlayRuntime(options: {
|
||||
data: KikuFieldGroupingRequestData,
|
||||
) => Promise<KikuFieldGroupingChoice>;
|
||||
getKnownWordCacheStatePath: () => string;
|
||||
shouldStartAnkiIntegration?: () => boolean;
|
||||
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
|
||||
}): void {
|
||||
options.createMainWindow();
|
||||
@@ -90,9 +91,6 @@ export function initializeOverlayRuntime(options: {
|
||||
windowTracker.onGeometryChange = (geometry: WindowGeometry) => {
|
||||
options.updateVisibleOverlayBounds(geometry);
|
||||
};
|
||||
windowTracker.onTargetWindowFocusChange = () => {
|
||||
options.syncOverlayShortcuts();
|
||||
};
|
||||
windowTracker.onWindowFound = (geometry: WindowGeometry) => {
|
||||
options.updateVisibleOverlayBounds(geometry);
|
||||
if (options.isVisibleOverlayVisible()) {
|
||||
@@ -106,6 +104,9 @@ export function initializeOverlayRuntime(options: {
|
||||
options.syncOverlayShortcuts();
|
||||
};
|
||||
windowTracker.onWindowFocusChange = () => {
|
||||
if (options.isVisibleOverlayVisible()) {
|
||||
options.updateVisibleOverlayVisibility();
|
||||
}
|
||||
options.syncOverlayShortcuts();
|
||||
};
|
||||
windowTracker.start();
|
||||
@@ -135,7 +136,9 @@ export function initializeOverlayRuntime(options: {
|
||||
createFieldGroupingCallback: options.createFieldGroupingCallback,
|
||||
knownWordCacheStatePath: options.getKnownWordCacheStatePath(),
|
||||
});
|
||||
integration.start();
|
||||
if (options.shouldStartAnkiIntegration?.() !== false) {
|
||||
integration.start();
|
||||
}
|
||||
options.setAnkiIntegration(integration);
|
||||
}
|
||||
|
||||
|
||||
@@ -200,6 +200,81 @@ test('Windows visible overlay stays click-through and does not steal focus while
|
||||
assert.ok(!calls.includes('focus'));
|
||||
});
|
||||
|
||||
test('macOS tracked visible overlay stays visible without passively stealing focus', () => {
|
||||
const { window, calls } = createMainWindowRecorder();
|
||||
const tracker: WindowTrackerStub = {
|
||||
isTracking: () => true,
|
||||
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
|
||||
};
|
||||
|
||||
updateVisibleOverlayVisibility({
|
||||
visibleOverlayVisible: true,
|
||||
mainWindow: window as never,
|
||||
windowTracker: tracker as never,
|
||||
trackerNotReadyWarningShown: false,
|
||||
setTrackerNotReadyWarningShown: () => {},
|
||||
updateVisibleOverlayBounds: () => {
|
||||
calls.push('update-bounds');
|
||||
},
|
||||
ensureOverlayWindowLevel: () => {
|
||||
calls.push('ensure-level');
|
||||
},
|
||||
syncPrimaryOverlayWindowLayer: () => {
|
||||
calls.push('sync-layer');
|
||||
},
|
||||
enforceOverlayLayerOrder: () => {
|
||||
calls.push('enforce-order');
|
||||
},
|
||||
syncOverlayShortcuts: () => {
|
||||
calls.push('sync-shortcuts');
|
||||
},
|
||||
isMacOSPlatform: true,
|
||||
isWindowsPlatform: false,
|
||||
} as never);
|
||||
|
||||
assert.ok(calls.includes('mouse-ignore:false:plain'));
|
||||
assert.ok(calls.includes('show'));
|
||||
assert.ok(!calls.includes('focus'));
|
||||
});
|
||||
|
||||
test('forced mouse passthrough keeps macOS tracked overlay passive while visible', () => {
|
||||
const { window, calls } = createMainWindowRecorder();
|
||||
const tracker: WindowTrackerStub = {
|
||||
isTracking: () => true,
|
||||
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
|
||||
};
|
||||
|
||||
updateVisibleOverlayVisibility({
|
||||
visibleOverlayVisible: true,
|
||||
mainWindow: window as never,
|
||||
windowTracker: tracker as never,
|
||||
trackerNotReadyWarningShown: false,
|
||||
setTrackerNotReadyWarningShown: () => {},
|
||||
updateVisibleOverlayBounds: () => {
|
||||
calls.push('update-bounds');
|
||||
},
|
||||
ensureOverlayWindowLevel: () => {
|
||||
calls.push('ensure-level');
|
||||
},
|
||||
syncPrimaryOverlayWindowLayer: () => {
|
||||
calls.push('sync-layer');
|
||||
},
|
||||
enforceOverlayLayerOrder: () => {
|
||||
calls.push('enforce-order');
|
||||
},
|
||||
syncOverlayShortcuts: () => {
|
||||
calls.push('sync-shortcuts');
|
||||
},
|
||||
isMacOSPlatform: true,
|
||||
isWindowsPlatform: false,
|
||||
forceMousePassthrough: true,
|
||||
} as never);
|
||||
|
||||
assert.ok(calls.includes('mouse-ignore:true:forward'));
|
||||
assert.ok(calls.includes('show'));
|
||||
assert.ok(!calls.includes('focus'));
|
||||
});
|
||||
|
||||
test('Windows keeps visible overlay hidden while tracker is not ready', () => {
|
||||
const { window, calls } = createMainWindowRecorder();
|
||||
let trackerWarning = false;
|
||||
@@ -283,6 +358,59 @@ test('macOS keeps visible overlay hidden while tracker is not initialized yet',
|
||||
assert.ok(!calls.includes('update-bounds'));
|
||||
});
|
||||
|
||||
test('macOS suppresses immediate repeat loading OSD after tracker recovery until cooldown expires', () => {
|
||||
const { window } = createMainWindowRecorder();
|
||||
const osdMessages: string[] = [];
|
||||
let trackerWarning = false;
|
||||
let lastLoadingOsdAtMs: number | null = null;
|
||||
let nowMs = 1_000;
|
||||
const hiddenTracker: WindowTrackerStub = {
|
||||
isTracking: () => false,
|
||||
getGeometry: () => null,
|
||||
};
|
||||
const trackedTracker: WindowTrackerStub = {
|
||||
isTracking: () => true,
|
||||
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
|
||||
};
|
||||
|
||||
const run = (windowTracker: WindowTrackerStub) =>
|
||||
updateVisibleOverlayVisibility({
|
||||
visibleOverlayVisible: true,
|
||||
mainWindow: window as never,
|
||||
windowTracker: windowTracker as never,
|
||||
trackerNotReadyWarningShown: trackerWarning,
|
||||
setTrackerNotReadyWarningShown: (shown: boolean) => {
|
||||
trackerWarning = shown;
|
||||
},
|
||||
updateVisibleOverlayBounds: () => {},
|
||||
ensureOverlayWindowLevel: () => {},
|
||||
syncPrimaryOverlayWindowLayer: () => {},
|
||||
enforceOverlayLayerOrder: () => {},
|
||||
syncOverlayShortcuts: () => {},
|
||||
isMacOSPlatform: true,
|
||||
showOverlayLoadingOsd: (message: string) => {
|
||||
osdMessages.push(message);
|
||||
},
|
||||
shouldShowOverlayLoadingOsd: () =>
|
||||
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
|
||||
markOverlayLoadingOsdShown: () => {
|
||||
lastLoadingOsdAtMs = nowMs;
|
||||
},
|
||||
} as never);
|
||||
|
||||
run(hiddenTracker);
|
||||
run(trackedTracker);
|
||||
|
||||
nowMs = 2_000;
|
||||
run(hiddenTracker);
|
||||
run(trackedTracker);
|
||||
|
||||
nowMs = 6_500;
|
||||
run(hiddenTracker);
|
||||
|
||||
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);
|
||||
});
|
||||
|
||||
test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly', () => {
|
||||
const calls: string[] = [];
|
||||
setVisibleOverlayVisible({
|
||||
@@ -298,10 +426,12 @@ test('setVisibleOverlayVisible does not mutate mpv subtitle visibility directly'
|
||||
assert.deepEqual(calls, ['state:true', 'update']);
|
||||
});
|
||||
|
||||
test('macOS loading OSD can show again after overlay is hidden and retried', () => {
|
||||
test('macOS explicit hide resets loading OSD suppression before retry', () => {
|
||||
const { window, calls } = createMainWindowRecorder();
|
||||
const osdMessages: string[] = [];
|
||||
let trackerWarning = false;
|
||||
let lastLoadingOsdAtMs: number | null = null;
|
||||
let nowMs = 1_000;
|
||||
|
||||
updateVisibleOverlayVisibility({
|
||||
visibleOverlayVisible: true,
|
||||
@@ -331,8 +461,17 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
|
||||
showOverlayLoadingOsd: (message: string) => {
|
||||
osdMessages.push(message);
|
||||
},
|
||||
shouldShowOverlayLoadingOsd: () =>
|
||||
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
|
||||
markOverlayLoadingOsdShown: () => {
|
||||
lastLoadingOsdAtMs = nowMs;
|
||||
},
|
||||
resetOverlayLoadingOsdSuppression: () => {
|
||||
lastLoadingOsdAtMs = null;
|
||||
},
|
||||
} as never);
|
||||
|
||||
nowMs = 1_500;
|
||||
updateVisibleOverlayVisibility({
|
||||
visibleOverlayVisible: false,
|
||||
mainWindow: window as never,
|
||||
@@ -349,6 +488,9 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
|
||||
syncOverlayShortcuts: () => {},
|
||||
isMacOSPlatform: true,
|
||||
showOverlayLoadingOsd: () => {},
|
||||
resetOverlayLoadingOsdSuppression: () => {
|
||||
lastLoadingOsdAtMs = null;
|
||||
},
|
||||
} as never);
|
||||
|
||||
updateVisibleOverlayVisibility({
|
||||
@@ -379,6 +521,14 @@ test('macOS loading OSD can show again after overlay is hidden and retried', ()
|
||||
showOverlayLoadingOsd: (message: string) => {
|
||||
osdMessages.push(message);
|
||||
},
|
||||
shouldShowOverlayLoadingOsd: () =>
|
||||
lastLoadingOsdAtMs === null || nowMs - lastLoadingOsdAtMs >= 5_000,
|
||||
markOverlayLoadingOsdShown: () => {
|
||||
lastLoadingOsdAtMs = nowMs;
|
||||
},
|
||||
resetOverlayLoadingOsdSuppression: () => {
|
||||
lastLoadingOsdAtMs = null;
|
||||
},
|
||||
} as never);
|
||||
|
||||
assert.deepEqual(osdMessages, ['Overlay loading...', 'Overlay loading...']);
|
||||
|
||||
@@ -4,6 +4,7 @@ import { WindowGeometry } from '../../types';
|
||||
|
||||
export function updateVisibleOverlayVisibility(args: {
|
||||
visibleOverlayVisible: boolean;
|
||||
forceMousePassthrough?: boolean;
|
||||
mainWindow: BrowserWindow | null;
|
||||
windowTracker: BaseWindowTracker | null;
|
||||
trackerNotReadyWarningShown: boolean;
|
||||
@@ -16,6 +17,9 @@ export function updateVisibleOverlayVisibility(args: {
|
||||
isMacOSPlatform?: boolean;
|
||||
isWindowsPlatform?: boolean;
|
||||
showOverlayLoadingOsd?: (message: string) => void;
|
||||
shouldShowOverlayLoadingOsd?: () => boolean;
|
||||
markOverlayLoadingOsdShown?: () => void;
|
||||
resetOverlayLoadingOsdSuppression?: () => void;
|
||||
resolveFallbackBounds?: () => WindowGeometry;
|
||||
}): void {
|
||||
if (!args.mainWindow || args.mainWindow.isDestroyed()) {
|
||||
@@ -25,20 +29,33 @@ export function updateVisibleOverlayVisibility(args: {
|
||||
const mainWindow = args.mainWindow;
|
||||
|
||||
const showPassiveVisibleOverlay = (): void => {
|
||||
if (args.isWindowsPlatform) {
|
||||
const forceMousePassthrough = args.forceMousePassthrough === true;
|
||||
if (args.isWindowsPlatform || forceMousePassthrough) {
|
||||
mainWindow.setIgnoreMouseEvents(true, { forward: true });
|
||||
} else {
|
||||
mainWindow.setIgnoreMouseEvents(false);
|
||||
}
|
||||
args.ensureOverlayWindowLevel(mainWindow);
|
||||
mainWindow.show();
|
||||
if (!args.isWindowsPlatform) {
|
||||
if (!args.isWindowsPlatform && !args.isMacOSPlatform && !forceMousePassthrough) {
|
||||
mainWindow.focus();
|
||||
}
|
||||
};
|
||||
|
||||
const maybeShowOverlayLoadingOsd = (): void => {
|
||||
if (!args.isMacOSPlatform || !args.showOverlayLoadingOsd) {
|
||||
return;
|
||||
}
|
||||
if (args.shouldShowOverlayLoadingOsd && !args.shouldShowOverlayLoadingOsd()) {
|
||||
return;
|
||||
}
|
||||
args.showOverlayLoadingOsd('Overlay loading...');
|
||||
args.markOverlayLoadingOsdShown?.();
|
||||
};
|
||||
|
||||
if (!args.visibleOverlayVisible) {
|
||||
args.setTrackerNotReadyWarningShown(false);
|
||||
args.resetOverlayLoadingOsdSuppression?.();
|
||||
mainWindow.hide();
|
||||
args.syncOverlayShortcuts();
|
||||
return;
|
||||
@@ -61,9 +78,7 @@ export function updateVisibleOverlayVisibility(args: {
|
||||
if (args.isMacOSPlatform || args.isWindowsPlatform) {
|
||||
if (!args.trackerNotReadyWarningShown) {
|
||||
args.setTrackerNotReadyWarningShown(true);
|
||||
if (args.isMacOSPlatform) {
|
||||
args.showOverlayLoadingOsd?.('Overlay loading...');
|
||||
}
|
||||
maybeShowOverlayLoadingOsd();
|
||||
}
|
||||
mainWindow.hide();
|
||||
args.syncOverlayShortcuts();
|
||||
@@ -79,9 +94,7 @@ export function updateVisibleOverlayVisibility(args: {
|
||||
|
||||
if (!args.trackerNotReadyWarningShown) {
|
||||
args.setTrackerNotReadyWarningShown(true);
|
||||
if (args.isMacOSPlatform) {
|
||||
args.showOverlayLoadingOsd?.('Overlay loading...');
|
||||
}
|
||||
maybeShowOverlayLoadingOsd();
|
||||
}
|
||||
|
||||
mainWindow.hide();
|
||||
|
||||
@@ -46,6 +46,7 @@ export function ensureOverlayWindowLevel(window: BrowserWindow): void {
|
||||
window.setAlwaysOnTop(true, 'screen-saver', 1);
|
||||
window.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
|
||||
window.setFullScreenable(false);
|
||||
window.moveTop();
|
||||
return;
|
||||
}
|
||||
if (process.platform === 'win32') {
|
||||
|
||||
@@ -34,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||
anilistSetup: false,
|
||||
anilistRetryQueue: false,
|
||||
dictionary: false,
|
||||
stats: false,
|
||||
jellyfin: false,
|
||||
jellyfinLogin: false,
|
||||
jellyfinLogout: false,
|
||||
|
||||
196
src/core/services/startup.test.ts
Normal file
196
src/core/services/startup.test.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { runAppReadyRuntime } from './startup';
|
||||
|
||||
test('runAppReadyRuntime minimal startup skips Yomitan and first-run setup while still handling CLI args', async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
await runAppReadyRuntime({
|
||||
ensureDefaultConfigBootstrap: () => {
|
||||
calls.push('bootstrap');
|
||||
},
|
||||
loadSubtitlePosition: () => {
|
||||
calls.push('load-subtitle-position');
|
||||
},
|
||||
resolveKeybindings: () => {
|
||||
calls.push('resolve-keybindings');
|
||||
},
|
||||
createMpvClient: () => {
|
||||
calls.push('create-mpv');
|
||||
},
|
||||
reloadConfig: () => {
|
||||
calls.push('reload-config');
|
||||
},
|
||||
getResolvedConfig: () => ({}),
|
||||
getConfigWarnings: () => [],
|
||||
logConfigWarning: () => {
|
||||
calls.push('config-warning');
|
||||
},
|
||||
setLogLevel: () => {
|
||||
calls.push('set-log-level');
|
||||
},
|
||||
initRuntimeOptionsManager: () => {
|
||||
calls.push('init-runtime-options');
|
||||
},
|
||||
setSecondarySubMode: () => {
|
||||
calls.push('set-secondary-sub-mode');
|
||||
},
|
||||
defaultSecondarySubMode: 'hover',
|
||||
defaultWebsocketPort: 0,
|
||||
defaultAnnotationWebsocketPort: 0,
|
||||
defaultTexthookerPort: 0,
|
||||
hasMpvWebsocketPlugin: () => false,
|
||||
startSubtitleWebsocket: () => {
|
||||
calls.push('subtitle-ws');
|
||||
},
|
||||
startAnnotationWebsocket: () => {
|
||||
calls.push('annotation-ws');
|
||||
},
|
||||
startTexthooker: () => {
|
||||
calls.push('texthooker');
|
||||
},
|
||||
log: () => {
|
||||
calls.push('log');
|
||||
},
|
||||
createMecabTokenizerAndCheck: async () => {
|
||||
calls.push('mecab');
|
||||
},
|
||||
createSubtitleTimingTracker: () => {
|
||||
calls.push('subtitle-timing');
|
||||
},
|
||||
createImmersionTracker: () => {
|
||||
calls.push('immersion');
|
||||
},
|
||||
startJellyfinRemoteSession: async () => {
|
||||
calls.push('jellyfin');
|
||||
},
|
||||
loadYomitanExtension: async () => {
|
||||
calls.push('load-yomitan');
|
||||
},
|
||||
handleFirstRunSetup: async () => {
|
||||
calls.push('first-run');
|
||||
},
|
||||
prewarmSubtitleDictionaries: async () => {
|
||||
calls.push('prewarm');
|
||||
},
|
||||
startBackgroundWarmups: () => {
|
||||
calls.push('warmups');
|
||||
},
|
||||
texthookerOnlyMode: false,
|
||||
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
|
||||
setVisibleOverlayVisible: () => {
|
||||
calls.push('visible-overlay');
|
||||
},
|
||||
initializeOverlayRuntime: () => {
|
||||
calls.push('init-overlay');
|
||||
},
|
||||
handleInitialArgs: () => {
|
||||
calls.push('handle-initial-args');
|
||||
},
|
||||
shouldUseMinimalStartup: () => true,
|
||||
shouldSkipHeavyStartup: () => false,
|
||||
});
|
||||
|
||||
assert.deepEqual(calls, ['bootstrap', 'reload-config', 'handle-initial-args']);
|
||||
});
|
||||
|
||||
test('runAppReadyRuntime headless refresh bootstraps Anki runtime without UI startup', async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
await runAppReadyRuntime({
|
||||
ensureDefaultConfigBootstrap: () => {
|
||||
calls.push('bootstrap');
|
||||
},
|
||||
loadSubtitlePosition: () => {
|
||||
calls.push('load-subtitle-position');
|
||||
},
|
||||
resolveKeybindings: () => {
|
||||
calls.push('resolve-keybindings');
|
||||
},
|
||||
createMpvClient: () => {
|
||||
calls.push('create-mpv');
|
||||
},
|
||||
reloadConfig: () => {
|
||||
calls.push('reload-config');
|
||||
},
|
||||
getResolvedConfig: () => ({}),
|
||||
getConfigWarnings: () => [],
|
||||
logConfigWarning: () => {
|
||||
calls.push('config-warning');
|
||||
},
|
||||
setLogLevel: () => {
|
||||
calls.push('set-log-level');
|
||||
},
|
||||
initRuntimeOptionsManager: () => {
|
||||
calls.push('init-runtime-options');
|
||||
},
|
||||
setSecondarySubMode: () => {
|
||||
calls.push('set-secondary-sub-mode');
|
||||
},
|
||||
defaultSecondarySubMode: 'hover',
|
||||
defaultWebsocketPort: 0,
|
||||
defaultAnnotationWebsocketPort: 0,
|
||||
defaultTexthookerPort: 0,
|
||||
hasMpvWebsocketPlugin: () => false,
|
||||
startSubtitleWebsocket: () => {
|
||||
calls.push('subtitle-ws');
|
||||
},
|
||||
startAnnotationWebsocket: () => {
|
||||
calls.push('annotation-ws');
|
||||
},
|
||||
startTexthooker: () => {
|
||||
calls.push('texthooker');
|
||||
},
|
||||
log: () => {
|
||||
calls.push('log');
|
||||
},
|
||||
createMecabTokenizerAndCheck: async () => {
|
||||
calls.push('mecab');
|
||||
},
|
||||
createSubtitleTimingTracker: () => {
|
||||
calls.push('subtitle-timing');
|
||||
},
|
||||
createImmersionTracker: () => {
|
||||
calls.push('immersion');
|
||||
},
|
||||
startJellyfinRemoteSession: async () => {
|
||||
calls.push('jellyfin');
|
||||
},
|
||||
loadYomitanExtension: async () => {
|
||||
calls.push('load-yomitan');
|
||||
},
|
||||
handleFirstRunSetup: async () => {
|
||||
calls.push('first-run');
|
||||
},
|
||||
prewarmSubtitleDictionaries: async () => {
|
||||
calls.push('prewarm');
|
||||
},
|
||||
startBackgroundWarmups: () => {
|
||||
calls.push('warmups');
|
||||
},
|
||||
texthookerOnlyMode: false,
|
||||
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
|
||||
setVisibleOverlayVisible: () => {
|
||||
calls.push('visible-overlay');
|
||||
},
|
||||
initializeOverlayRuntime: () => {
|
||||
calls.push('init-overlay');
|
||||
},
|
||||
runHeadlessInitialCommand: async () => {
|
||||
calls.push('run-headless-command');
|
||||
},
|
||||
handleInitialArgs: () => {
|
||||
calls.push('handle-initial-args');
|
||||
},
|
||||
shouldRunHeadlessInitialCommand: () => true,
|
||||
shouldUseMinimalStartup: () => false,
|
||||
shouldSkipHeavyStartup: () => false,
|
||||
});
|
||||
|
||||
assert.deepEqual(calls, [
|
||||
'bootstrap',
|
||||
'reload-config',
|
||||
'init-runtime-options',
|
||||
'run-headless-command',
|
||||
]);
|
||||
});
|
||||
@@ -131,10 +131,13 @@ export interface AppReadyRuntimeDeps {
|
||||
shouldAutoInitializeOverlayRuntimeFromConfig: () => boolean;
|
||||
setVisibleOverlayVisible: (visible: boolean) => void;
|
||||
initializeOverlayRuntime: () => void;
|
||||
runHeadlessInitialCommand?: () => Promise<void>;
|
||||
handleInitialArgs: () => void;
|
||||
logDebug?: (message: string) => void;
|
||||
onCriticalConfigErrors?: (errors: string[]) => void;
|
||||
now?: () => number;
|
||||
shouldRunHeadlessInitialCommand?: () => boolean;
|
||||
shouldUseMinimalStartup?: () => boolean;
|
||||
shouldSkipHeavyStartup?: () => boolean;
|
||||
}
|
||||
|
||||
@@ -183,6 +186,32 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
|
||||
const now = deps.now ?? (() => Date.now());
|
||||
const startupStartedAtMs = now();
|
||||
deps.ensureDefaultConfigBootstrap();
|
||||
if (deps.shouldRunHeadlessInitialCommand?.()) {
|
||||
deps.reloadConfig();
|
||||
deps.initRuntimeOptionsManager();
|
||||
if (deps.runHeadlessInitialCommand) {
|
||||
await deps.runHeadlessInitialCommand();
|
||||
} else {
|
||||
deps.createMpvClient();
|
||||
deps.createSubtitleTimingTracker();
|
||||
deps.initializeOverlayRuntime();
|
||||
deps.handleInitialArgs();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (deps.texthookerOnlyMode) {
|
||||
deps.reloadConfig();
|
||||
deps.handleInitialArgs();
|
||||
return;
|
||||
}
|
||||
|
||||
if (deps.shouldUseMinimalStartup?.()) {
|
||||
deps.reloadConfig();
|
||||
deps.handleInitialArgs();
|
||||
return;
|
||||
}
|
||||
|
||||
if (deps.shouldSkipHeavyStartup?.()) {
|
||||
await deps.loadYomitanExtension();
|
||||
deps.reloadConfig();
|
||||
|
||||
1015
src/core/services/stats-server.ts
Normal file
1015
src/core/services/stats-server.ts
Normal file
File diff suppressed because it is too large
Load Diff
88
src/core/services/stats-window-runtime.ts
Normal file
88
src/core/services/stats-window-runtime.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import type { BrowserWindow, BrowserWindowConstructorOptions } from 'electron';
|
||||
import type { WindowGeometry } from '../../types';
|
||||
|
||||
const DEFAULT_STATS_WINDOW_WIDTH = 900;
|
||||
const DEFAULT_STATS_WINDOW_HEIGHT = 700;
|
||||
|
||||
type StatsWindowLevelController = Pick<BrowserWindow, 'setAlwaysOnTop' | 'moveTop'> &
|
||||
Partial<Pick<BrowserWindow, 'setVisibleOnAllWorkspaces' | 'setFullScreenable'>>;
|
||||
|
||||
function isBareToggleKeyInput(input: Electron.Input, toggleKey: string): boolean {
|
||||
return (
|
||||
input.type === 'keyDown' &&
|
||||
input.code === toggleKey &&
|
||||
!input.control &&
|
||||
!input.alt &&
|
||||
!input.meta &&
|
||||
!input.shift &&
|
||||
!input.isAutoRepeat
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldHideStatsWindowForInput(input: Electron.Input, toggleKey: string): boolean {
|
||||
return (
|
||||
(input.type === 'keyDown' && input.key === 'Escape') || isBareToggleKeyInput(input, toggleKey)
|
||||
);
|
||||
}
|
||||
|
||||
export function buildStatsWindowOptions(options: {
|
||||
preloadPath: string;
|
||||
bounds?: WindowGeometry | null;
|
||||
}): BrowserWindowConstructorOptions {
|
||||
return {
|
||||
x: options.bounds?.x,
|
||||
y: options.bounds?.y,
|
||||
width: options.bounds?.width ?? DEFAULT_STATS_WINDOW_WIDTH,
|
||||
height: options.bounds?.height ?? DEFAULT_STATS_WINDOW_HEIGHT,
|
||||
frame: false,
|
||||
transparent: true,
|
||||
alwaysOnTop: true,
|
||||
resizable: false,
|
||||
skipTaskbar: true,
|
||||
hasShadow: false,
|
||||
focusable: true,
|
||||
acceptFirstMouse: true,
|
||||
fullscreenable: false,
|
||||
backgroundColor: '#1e1e2e',
|
||||
show: false,
|
||||
webPreferences: {
|
||||
nodeIntegration: false,
|
||||
contextIsolation: true,
|
||||
preload: options.preloadPath,
|
||||
sandbox: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function promoteStatsWindowLevel(
|
||||
window: StatsWindowLevelController,
|
||||
platform: NodeJS.Platform = process.platform,
|
||||
): void {
|
||||
if (platform === 'darwin') {
|
||||
window.setAlwaysOnTop(true, 'screen-saver', 2);
|
||||
window.setVisibleOnAllWorkspaces?.(true, { visibleOnFullScreen: true });
|
||||
window.setFullScreenable?.(false);
|
||||
window.moveTop();
|
||||
return;
|
||||
}
|
||||
|
||||
if (platform === 'win32') {
|
||||
window.setAlwaysOnTop(true, 'screen-saver', 2);
|
||||
window.moveTop();
|
||||
return;
|
||||
}
|
||||
|
||||
window.setAlwaysOnTop(true);
|
||||
window.moveTop();
|
||||
}
|
||||
|
||||
export function buildStatsWindowLoadFileOptions(apiBaseUrl?: string): {
|
||||
query: Record<string, string>;
|
||||
} {
|
||||
return {
|
||||
query: {
|
||||
overlay: '1',
|
||||
...(apiBaseUrl ? { apiBase: apiBaseUrl } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
202
src/core/services/stats-window.test.ts
Normal file
202
src/core/services/stats-window.test.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import {
|
||||
buildStatsWindowLoadFileOptions,
|
||||
buildStatsWindowOptions,
|
||||
promoteStatsWindowLevel,
|
||||
shouldHideStatsWindowForInput,
|
||||
} from './stats-window-runtime';
|
||||
|
||||
test('buildStatsWindowOptions uses tracked overlay bounds and preload-friendly web preferences', () => {
|
||||
const options = buildStatsWindowOptions({
|
||||
preloadPath: '/tmp/preload-stats.js',
|
||||
bounds: {
|
||||
x: 120,
|
||||
y: 80,
|
||||
width: 1440,
|
||||
height: 900,
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(options.x, 120);
|
||||
assert.equal(options.y, 80);
|
||||
assert.equal(options.width, 1440);
|
||||
assert.equal(options.height, 900);
|
||||
assert.equal(options.frame, false);
|
||||
assert.equal(options.transparent, true);
|
||||
assert.equal(options.resizable, false);
|
||||
assert.equal(options.webPreferences?.preload, '/tmp/preload-stats.js');
|
||||
assert.equal(options.webPreferences?.contextIsolation, true);
|
||||
assert.equal(options.webPreferences?.nodeIntegration, false);
|
||||
assert.equal(options.webPreferences?.sandbox, true);
|
||||
});
|
||||
|
||||
test('shouldHideStatsWindowForInput matches Escape and configured bare toggle key', () => {
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: 'Escape',
|
||||
code: 'Escape',
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
control: true,
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
alt: true,
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
meta: true,
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
isAutoRepeat: true,
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyDown',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
shift: true,
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
shouldHideStatsWindowForInput(
|
||||
{
|
||||
type: 'keyUp',
|
||||
key: '`',
|
||||
code: 'Backquote',
|
||||
} as Electron.Input,
|
||||
'Backquote',
|
||||
),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test('buildStatsWindowLoadFileOptions enables overlay rendering mode', () => {
|
||||
assert.deepEqual(buildStatsWindowLoadFileOptions(), {
|
||||
query: {
|
||||
overlay: '1',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('buildStatsWindowLoadFileOptions includes provided stats API base URL', () => {
|
||||
assert.deepEqual(buildStatsWindowLoadFileOptions('http://127.0.0.1:6123'), {
|
||||
query: {
|
||||
overlay: '1',
|
||||
apiBase: 'http://127.0.0.1:6123',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('promoteStatsWindowLevel raises stats above overlay level on macOS', () => {
|
||||
const calls: string[] = [];
|
||||
promoteStatsWindowLevel(
|
||||
{
|
||||
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
|
||||
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
|
||||
},
|
||||
setVisibleOnAllWorkspaces: (
|
||||
visible: boolean,
|
||||
options?: { visibleOnFullScreen?: boolean },
|
||||
) => {
|
||||
calls.push(
|
||||
`all-workspaces:${visible}:${options?.visibleOnFullScreen === true ? 'fullscreen' : 'plain'}`,
|
||||
);
|
||||
},
|
||||
setFullScreenable: (fullscreenable: boolean) => {
|
||||
calls.push(`fullscreenable:${fullscreenable}`);
|
||||
},
|
||||
moveTop: () => {
|
||||
calls.push('move-top');
|
||||
},
|
||||
} as never,
|
||||
'darwin',
|
||||
);
|
||||
|
||||
assert.deepEqual(calls, [
|
||||
'always-on-top:true:screen-saver:2',
|
||||
'all-workspaces:true:fullscreen',
|
||||
'fullscreenable:false',
|
||||
'move-top',
|
||||
]);
|
||||
});
|
||||
|
||||
test('promoteStatsWindowLevel raises stats above overlay level on Windows', () => {
|
||||
const calls: string[] = [];
|
||||
promoteStatsWindowLevel(
|
||||
{
|
||||
setAlwaysOnTop: (flag: boolean, level?: string, relativeLevel?: number) => {
|
||||
calls.push(`always-on-top:${flag}:${level ?? 'none'}:${relativeLevel ?? 0}`);
|
||||
},
|
||||
moveTop: () => {
|
||||
calls.push('move-top');
|
||||
},
|
||||
} as never,
|
||||
'win32',
|
||||
);
|
||||
|
||||
assert.deepEqual(calls, ['always-on-top:true:screen-saver:2', 'move-top']);
|
||||
});
|
||||
118
src/core/services/stats-window.ts
Normal file
118
src/core/services/stats-window.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { BrowserWindow, ipcMain } from 'electron';
|
||||
import * as path from 'path';
|
||||
import type { WindowGeometry } from '../../types.js';
|
||||
import { IPC_CHANNELS } from '../../shared/ipc/contracts.js';
|
||||
import {
|
||||
buildStatsWindowLoadFileOptions,
|
||||
buildStatsWindowOptions,
|
||||
promoteStatsWindowLevel,
|
||||
shouldHideStatsWindowForInput,
|
||||
} from './stats-window-runtime.js';
|
||||
|
||||
let statsWindow: BrowserWindow | null = null;
|
||||
let toggleRegistered = false;
|
||||
|
||||
export interface StatsWindowOptions {
|
||||
/** Absolute path to stats/dist/ directory */
|
||||
staticDir: string;
|
||||
/** Absolute path to the compiled preload-stats.js */
|
||||
preloadPath: string;
|
||||
/** Resolve the active stats API base URL */
|
||||
getApiBaseUrl?: () => string;
|
||||
/** Resolve the active stats toggle key from config */
|
||||
getToggleKey: () => string;
|
||||
/** Resolve the tracked overlay/mpv bounds */
|
||||
resolveBounds: () => WindowGeometry | null;
|
||||
/** Notify the main process when the stats overlay becomes visible/hidden */
|
||||
onVisibilityChanged?: (visible: boolean) => void;
|
||||
}
|
||||
|
||||
function syncStatsWindowBounds(window: BrowserWindow, bounds: WindowGeometry | null): void {
|
||||
if (!bounds || window.isDestroyed()) return;
|
||||
window.setBounds({
|
||||
x: bounds.x,
|
||||
y: bounds.y,
|
||||
width: bounds.width,
|
||||
height: bounds.height,
|
||||
});
|
||||
}
|
||||
|
||||
function showStatsWindow(window: BrowserWindow, options: StatsWindowOptions): void {
|
||||
syncStatsWindowBounds(window, options.resolveBounds());
|
||||
promoteStatsWindowLevel(window);
|
||||
window.show();
|
||||
window.focus();
|
||||
options.onVisibilityChanged?.(true);
|
||||
promoteStatsWindowLevel(window);
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle the stats overlay window: create on first call, then show/hide.
|
||||
* The React app stays mounted across toggles — state is preserved.
|
||||
*/
|
||||
export function toggleStatsOverlay(options: StatsWindowOptions): void {
|
||||
if (!statsWindow) {
|
||||
statsWindow = new BrowserWindow(
|
||||
buildStatsWindowOptions({
|
||||
preloadPath: options.preloadPath,
|
||||
bounds: options.resolveBounds(),
|
||||
}),
|
||||
);
|
||||
|
||||
const indexPath = path.join(options.staticDir, 'index.html');
|
||||
statsWindow.loadFile(indexPath, buildStatsWindowLoadFileOptions(options.getApiBaseUrl?.()));
|
||||
|
||||
statsWindow.on('closed', () => {
|
||||
options.onVisibilityChanged?.(false);
|
||||
statsWindow = null;
|
||||
});
|
||||
|
||||
statsWindow.webContents.on('before-input-event', (event, input) => {
|
||||
if (shouldHideStatsWindowForInput(input, options.getToggleKey())) {
|
||||
event.preventDefault();
|
||||
statsWindow?.hide();
|
||||
options.onVisibilityChanged?.(false);
|
||||
}
|
||||
});
|
||||
|
||||
statsWindow.once('ready-to-show', () => {
|
||||
if (!statsWindow) return;
|
||||
showStatsWindow(statsWindow, options);
|
||||
});
|
||||
|
||||
statsWindow.on('blur', () => {
|
||||
if (!statsWindow || statsWindow.isDestroyed() || !statsWindow.isVisible()) {
|
||||
return;
|
||||
}
|
||||
promoteStatsWindowLevel(statsWindow);
|
||||
});
|
||||
} else if (statsWindow.isVisible()) {
|
||||
statsWindow.hide();
|
||||
options.onVisibilityChanged?.(false);
|
||||
} else {
|
||||
showStatsWindow(statsWindow, options);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the IPC command handler for toggling the overlay.
|
||||
* Call this once during app initialization.
|
||||
*/
|
||||
export function registerStatsOverlayToggle(options: StatsWindowOptions): void {
|
||||
if (toggleRegistered) return;
|
||||
toggleRegistered = true;
|
||||
ipcMain.on(IPC_CHANNELS.command.toggleStatsOverlay, () => {
|
||||
toggleStatsOverlay(options);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up — destroy the stats window if it exists.
|
||||
* Call during app quit.
|
||||
*/
|
||||
export function destroyStatsWindow(): void {
|
||||
if (statsWindow && !statsWindow.isDestroyed()) {
|
||||
statsWindow.destroy();
|
||||
statsWindow = null;
|
||||
}
|
||||
}
|
||||
245
src/core/services/subtitle-cue-parser.test.ts
Normal file
245
src/core/services/subtitle-cue-parser.test.ts
Normal file
@@ -0,0 +1,245 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { parseSrtCues, parseAssCues, parseSubtitleCues } from './subtitle-cue-parser';
|
||||
import type { SubtitleCue } from './subtitle-cue-parser';
|
||||
|
||||
test('parseSrtCues parses basic SRT content', () => {
|
||||
const content = [
|
||||
'1',
|
||||
'00:00:01,000 --> 00:00:04,000',
|
||||
'こんにちは',
|
||||
'',
|
||||
'2',
|
||||
'00:00:05,000 --> 00:00:08,500',
|
||||
'元気ですか',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues.length, 2);
|
||||
assert.equal(cues[0]!.startTime, 1.0);
|
||||
assert.equal(cues[0]!.endTime, 4.0);
|
||||
assert.equal(cues[0]!.text, 'こんにちは');
|
||||
assert.equal(cues[1]!.startTime, 5.0);
|
||||
assert.equal(cues[1]!.endTime, 8.5);
|
||||
assert.equal(cues[1]!.text, '元気ですか');
|
||||
});
|
||||
|
||||
test('parseSrtCues handles multi-line subtitle text', () => {
|
||||
const content = ['1', '00:01:00,000 --> 00:01:05,000', 'これは', 'テストです', ''].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'これは\nテストです');
|
||||
});
|
||||
|
||||
test('parseSrtCues handles hours in timestamps', () => {
|
||||
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues[0]!.startTime, 5400.0);
|
||||
assert.equal(cues[0]!.endTime, 5405.0);
|
||||
});
|
||||
|
||||
test('parseSrtCues handles VTT-style dot separator', () => {
|
||||
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTスタイル', ''].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.startTime, 1.0);
|
||||
});
|
||||
|
||||
test('parseSrtCues returns empty array for empty content', () => {
|
||||
assert.deepEqual(parseSrtCues(''), []);
|
||||
assert.deepEqual(parseSrtCues(' \n\n '), []);
|
||||
});
|
||||
|
||||
test('parseSrtCues skips malformed timing lines gracefully', () => {
|
||||
const content = [
|
||||
'1',
|
||||
'NOT A TIMING LINE',
|
||||
'テスト',
|
||||
'',
|
||||
'2',
|
||||
'00:00:01,000 --> 00:00:02,000',
|
||||
'有効',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, '有効');
|
||||
});
|
||||
|
||||
test('parseAssCues parses basic ASS dialogue lines', () => {
|
||||
const content = [
|
||||
'[Script Info]',
|
||||
'Title: Test',
|
||||
'',
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,こんにちは',
|
||||
'Dialogue: 0,0:00:05.00,0:00:08.50,Default,,0,0,0,,元気ですか',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues.length, 2);
|
||||
assert.equal(cues[0]!.startTime, 1.0);
|
||||
assert.equal(cues[0]!.endTime, 4.0);
|
||||
assert.equal(cues[0]!.text, 'こんにちは');
|
||||
assert.equal(cues[1]!.startTime, 5.0);
|
||||
assert.equal(cues[1]!.endTime, 8.5);
|
||||
assert.equal(cues[1]!.text, '元気ですか');
|
||||
});
|
||||
|
||||
test('parseAssCues strips override tags from text', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\b1}太字{\\b0}テスト',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues[0]!.text, '太字テスト');
|
||||
});
|
||||
|
||||
test('parseAssCues handles text containing commas', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,はい、そうです、ね',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues[0]!.text, 'はい、そうです、ね');
|
||||
});
|
||||
|
||||
test('parseAssCues handles \\N line breaks', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,一行目\\N二行目',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
||||
});
|
||||
|
||||
test('parseAssCues returns empty for content without Events section', () => {
|
||||
const content = ['[Script Info]', 'Title: Test'].join('\n');
|
||||
|
||||
assert.deepEqual(parseAssCues(content), []);
|
||||
});
|
||||
|
||||
test('parseAssCues skips Comment lines', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Comment: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,これはコメント',
|
||||
'Dialogue: 0,0:00:05.00,0:00:08.00,Default,,0,0,0,,これは字幕',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'これは字幕');
|
||||
});
|
||||
|
||||
test('parseAssCues handles hour timestamps', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,1:30:00.00,1:30:05.00,Default,,0,0,0,,テスト',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues[0]!.startTime, 5400.0);
|
||||
assert.equal(cues[0]!.endTime, 5405.0);
|
||||
});
|
||||
|
||||
test('parseAssCues respects dynamic field ordering from the Format row', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Style, Start, End, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,Default,0:00:01.00,0:00:04.00,,0,0,0,,順番が違う',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.startTime, 1.0);
|
||||
assert.equal(cues[0]!.endTime, 4.0);
|
||||
assert.equal(cues[0]!.text, '順番が違う');
|
||||
});
|
||||
|
||||
test('parseSubtitleCues auto-detects SRT format', () => {
|
||||
const content = ['1', '00:00:01,000 --> 00:00:04,000', 'SRTテスト', ''].join('\n');
|
||||
|
||||
const cues = parseSubtitleCues(content, 'test.srt');
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'SRTテスト');
|
||||
});
|
||||
|
||||
test('parseSubtitleCues auto-detects ASS format', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,ASSテスト',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSubtitleCues(content, 'test.ass');
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'ASSテスト');
|
||||
});
|
||||
|
||||
test('parseSubtitleCues auto-detects VTT format', () => {
|
||||
const content = ['1', '00:00:01.000 --> 00:00:04.000', 'VTTテスト', ''].join('\n');
|
||||
|
||||
const cues = parseSubtitleCues(content, 'test.vtt');
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'VTTテスト');
|
||||
});
|
||||
|
||||
test('parseSubtitleCues returns empty for unknown format', () => {
|
||||
assert.deepEqual(parseSubtitleCues('random content', 'test.xyz'), []);
|
||||
});
|
||||
|
||||
test('parseSubtitleCues returns cues sorted by start time', () => {
|
||||
const content = [
|
||||
'1',
|
||||
'00:00:10,000 --> 00:00:14,000',
|
||||
'二番目',
|
||||
'',
|
||||
'2',
|
||||
'00:00:01,000 --> 00:00:04,000',
|
||||
'一番目',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSubtitleCues(content, 'test.srt');
|
||||
assert.equal(cues[0]!.text, '一番目');
|
||||
assert.equal(cues[1]!.text, '二番目');
|
||||
});
|
||||
|
||||
test('parseSubtitleCues detects subtitle formats from remote URLs', () => {
|
||||
const assContent = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:02.00,Default,,0,0,0,,URLテスト',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSubtitleCues(assContent, 'https://host/subs.ass?lang=ja#track');
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'URLテスト');
|
||||
});
|
||||
191
src/core/services/subtitle-cue-parser.ts
Normal file
191
src/core/services/subtitle-cue-parser.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
export interface SubtitleCue {
|
||||
startTime: number;
|
||||
endTime: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
const SRT_TIMING_PATTERN =
|
||||
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
|
||||
|
||||
function parseTimestamp(
|
||||
hours: string | undefined,
|
||||
minutes: string,
|
||||
seconds: string,
|
||||
millis: string,
|
||||
): number {
|
||||
return (
|
||||
Number(hours || 0) * 3600 +
|
||||
Number(minutes) * 60 +
|
||||
Number(seconds) +
|
||||
Number(millis.padEnd(3, '0')) / 1000
|
||||
);
|
||||
}
|
||||
|
||||
export function parseSrtCues(content: string): SubtitleCue[] {
|
||||
const cues: SubtitleCue[] = [];
|
||||
const lines = content.split(/\r?\n/);
|
||||
let i = 0;
|
||||
|
||||
while (i < lines.length) {
|
||||
const line = lines[i]!;
|
||||
const timingMatch = SRT_TIMING_PATTERN.exec(line);
|
||||
if (!timingMatch) {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const startTime = parseTimestamp(
|
||||
timingMatch[1],
|
||||
timingMatch[2]!,
|
||||
timingMatch[3]!,
|
||||
timingMatch[4]!,
|
||||
);
|
||||
const endTime = parseTimestamp(
|
||||
timingMatch[5],
|
||||
timingMatch[6]!,
|
||||
timingMatch[7]!,
|
||||
timingMatch[8]!,
|
||||
);
|
||||
|
||||
i += 1;
|
||||
const textLines: string[] = [];
|
||||
while (i < lines.length && lines[i]!.trim() !== '') {
|
||||
textLines.push(lines[i]!);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
const text = textLines.join('\n').trim();
|
||||
if (text) {
|
||||
cues.push({ startTime, endTime, text });
|
||||
}
|
||||
}
|
||||
|
||||
return cues;
|
||||
}
|
||||
|
||||
const ASS_OVERRIDE_TAG_PATTERN = /\{[^}]*\}/g;
|
||||
|
||||
const ASS_TIMING_PATTERN = /^(\d+):(\d{2}):(\d{2})\.(\d{1,2})$/;
|
||||
const ASS_FORMAT_PREFIX = 'Format:';
|
||||
const ASS_DIALOGUE_PREFIX = 'Dialogue:';
|
||||
|
||||
function parseAssTimestamp(raw: string): number | null {
|
||||
const match = ASS_TIMING_PATTERN.exec(raw.trim());
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
const hours = Number(match[1]);
|
||||
const minutes = Number(match[2]);
|
||||
const seconds = Number(match[3]);
|
||||
const centiseconds = Number(match[4]!.padEnd(2, '0'));
|
||||
return hours * 3600 + minutes * 60 + seconds + centiseconds / 100;
|
||||
}
|
||||
|
||||
export function parseAssCues(content: string): SubtitleCue[] {
|
||||
const cues: SubtitleCue[] = [];
|
||||
const lines = content.split(/\r?\n/);
|
||||
let inEventsSection = false;
|
||||
let startFieldIndex = -1;
|
||||
let endFieldIndex = -1;
|
||||
let textFieldIndex = -1;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
|
||||
inEventsSection = trimmed.toLowerCase() === '[events]';
|
||||
if (!inEventsSection) {
|
||||
startFieldIndex = -1;
|
||||
endFieldIndex = -1;
|
||||
textFieldIndex = -1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inEventsSection) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.startsWith(ASS_FORMAT_PREFIX)) {
|
||||
const formatFields = trimmed
|
||||
.slice(ASS_FORMAT_PREFIX.length)
|
||||
.split(',')
|
||||
.map((field) => field.trim().toLowerCase());
|
||||
startFieldIndex = formatFields.indexOf('start');
|
||||
endFieldIndex = formatFields.indexOf('end');
|
||||
textFieldIndex = formatFields.indexOf('text');
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!trimmed.startsWith(ASS_DIALOGUE_PREFIX)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (startFieldIndex < 0 || endFieldIndex < 0 || textFieldIndex < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fields = trimmed.slice(ASS_DIALOGUE_PREFIX.length).split(',');
|
||||
if (
|
||||
startFieldIndex >= fields.length ||
|
||||
endFieldIndex >= fields.length ||
|
||||
textFieldIndex >= fields.length
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const startTime = parseAssTimestamp(fields[startFieldIndex]!);
|
||||
const endTime = parseAssTimestamp(fields[endFieldIndex]!);
|
||||
if (startTime === null || endTime === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rawText = fields
|
||||
.slice(textFieldIndex)
|
||||
.join(',')
|
||||
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
|
||||
.trim();
|
||||
if (rawText) {
|
||||
cues.push({ startTime, endTime, text: rawText });
|
||||
}
|
||||
}
|
||||
|
||||
return cues;
|
||||
}
|
||||
|
||||
function detectSubtitleFormat(source: string): 'srt' | 'vtt' | 'ass' | 'ssa' | null {
|
||||
const [normalizedSource = source] =
|
||||
(() => {
|
||||
try {
|
||||
return /^[a-z]+:\/\//i.test(source) ? new URL(source).pathname : source;
|
||||
} catch {
|
||||
return source;
|
||||
}
|
||||
})().split(/[?#]/, 1)[0] ?? '';
|
||||
const ext = normalizedSource.split('.').pop()?.toLowerCase() ?? '';
|
||||
if (ext === 'srt') return 'srt';
|
||||
if (ext === 'vtt') return 'vtt';
|
||||
if (ext === 'ass' || ext === 'ssa') return 'ass';
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseSubtitleCues(content: string, filename: string): SubtitleCue[] {
|
||||
const format = detectSubtitleFormat(filename);
|
||||
let cues: SubtitleCue[];
|
||||
|
||||
switch (format) {
|
||||
case 'srt':
|
||||
case 'vtt':
|
||||
cues = parseSrtCues(content);
|
||||
break;
|
||||
case 'ass':
|
||||
case 'ssa':
|
||||
cues = parseAssCues(content);
|
||||
break;
|
||||
default:
|
||||
return [];
|
||||
}
|
||||
|
||||
cues.sort((a, b) => a.startTime - b.startTime);
|
||||
return cues;
|
||||
}
|
||||
244
src/core/services/subtitle-prefetch.test.ts
Normal file
244
src/core/services/subtitle-prefetch.test.ts
Normal file
@@ -0,0 +1,244 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { computePriorityWindow, createSubtitlePrefetchService } from './subtitle-prefetch';
|
||||
import type { SubtitleCue } from './subtitle-cue-parser';
|
||||
import type { SubtitleData } from '../../types';
|
||||
|
||||
function makeCues(count: number, startOffset = 0): SubtitleCue[] {
|
||||
return Array.from({ length: count }, (_, i) => ({
|
||||
startTime: startOffset + i * 5,
|
||||
endTime: startOffset + i * 5 + 4,
|
||||
text: `line-${i}`,
|
||||
}));
|
||||
}
|
||||
|
||||
test('computePriorityWindow returns next N cues from current position', () => {
|
||||
const cues = makeCues(20);
|
||||
const window = computePriorityWindow(cues, 12.0, 5);
|
||||
|
||||
assert.equal(window.length, 5);
|
||||
// Position 12.0 falls during cue 2, so the active cue should be warmed first.
|
||||
assert.equal(window[0]!.text, 'line-2');
|
||||
assert.equal(window[4]!.text, 'line-6');
|
||||
});
|
||||
|
||||
test('computePriorityWindow clamps to remaining cues at end of file', () => {
|
||||
const cues = makeCues(5);
|
||||
const window = computePriorityWindow(cues, 18.0, 10);
|
||||
|
||||
// Position 18.0 is during cue 3 (start=15), so cue 3 and cue 4 remain.
|
||||
assert.equal(window.length, 2);
|
||||
assert.equal(window[0]!.text, 'line-3');
|
||||
assert.equal(window[1]!.text, 'line-4');
|
||||
});
|
||||
|
||||
test('computePriorityWindow returns empty when past all cues', () => {
|
||||
const cues = makeCues(3);
|
||||
const window = computePriorityWindow(cues, 999.0, 10);
|
||||
assert.equal(window.length, 0);
|
||||
});
|
||||
|
||||
test('computePriorityWindow at position 0 returns first N cues', () => {
|
||||
const cues = makeCues(20);
|
||||
const window = computePriorityWindow(cues, 0, 5);
|
||||
|
||||
assert.equal(window.length, 5);
|
||||
assert.equal(window[0]!.text, 'line-0');
|
||||
});
|
||||
|
||||
test('computePriorityWindow includes the active cue when current position is mid-line', () => {
|
||||
const cues = makeCues(20);
|
||||
const window = computePriorityWindow(cues, 18.0, 3);
|
||||
|
||||
assert.equal(window.length, 3);
|
||||
assert.equal(window[0]!.text, 'line-3');
|
||||
assert.equal(window[1]!.text, 'line-4');
|
||||
assert.equal(window[2]!.text, 'line-5');
|
||||
});
|
||||
|
||||
function flushMicrotasks(): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, 0));
|
||||
}
|
||||
|
||||
test('prefetch service tokenizes priority window cues and caches them', async () => {
|
||||
const cues = makeCues(20);
|
||||
const cached: Map<string, SubtitleData> = new Map();
|
||||
let tokenizeCalls = 0;
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: (text, data) => {
|
||||
cached.set(text, data);
|
||||
},
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
// Allow all async tokenization to complete
|
||||
for (let i = 0; i < 25; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
// Priority window (first 3) should be cached
|
||||
assert.ok(cached.has('line-0'));
|
||||
assert.ok(cached.has('line-1'));
|
||||
assert.ok(cached.has('line-2'));
|
||||
});
|
||||
|
||||
test('prefetch service stops when cache is full', async () => {
|
||||
const cues = makeCues(20);
|
||||
let tokenizeCalls = 0;
|
||||
let cacheSize = 0;
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: () => {
|
||||
cacheSize += 1;
|
||||
},
|
||||
isCacheFull: () => cacheSize >= 5,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
for (let i = 0; i < 30; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
// Should have stopped at 5 (cache full), not tokenized all 20
|
||||
assert.ok(tokenizeCalls <= 6, `Expected <= 6 tokenize calls, got ${tokenizeCalls}`);
|
||||
});
|
||||
|
||||
test('prefetch service can be stopped mid-flight', async () => {
|
||||
const cues = makeCues(100);
|
||||
let tokenizeCalls = 0;
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: () => {},
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
await flushMicrotasks();
|
||||
await flushMicrotasks();
|
||||
service.stop();
|
||||
const callsAtStop = tokenizeCalls;
|
||||
|
||||
// Wait more to confirm no further calls
|
||||
for (let i = 0; i < 10; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
|
||||
assert.equal(tokenizeCalls, callsAtStop, 'No further tokenize calls after stop');
|
||||
assert.ok(tokenizeCalls < 100, 'Should not have tokenized all cues');
|
||||
});
|
||||
|
||||
test('prefetch service onSeek re-prioritizes from new position', async () => {
|
||||
const cues = makeCues(20);
|
||||
const cachedTexts: string[] = [];
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
|
||||
preCacheTokenization: (text) => {
|
||||
cachedTexts.push(text);
|
||||
},
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
// Let a few cues process
|
||||
for (let i = 0; i < 5; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
|
||||
// Seek to near the end
|
||||
service.onSeek(80.0);
|
||||
for (let i = 0; i < 30; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
// After seek to 80.0, cues starting after 80.0 (line-17, line-18, line-19) should appear in cached
|
||||
const hasPostSeekCue = cachedTexts.some(
|
||||
(t) => t === 'line-17' || t === 'line-18' || t === 'line-19',
|
||||
);
|
||||
assert.ok(hasPostSeekCue, 'Should have cached cues after seek position');
|
||||
});
|
||||
|
||||
test('prefetch service still warms the priority window when cache is full', async () => {
|
||||
const cues = makeCues(20);
|
||||
const cachedTexts: string[] = [];
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
|
||||
preCacheTokenization: (text) => {
|
||||
cachedTexts.push(text);
|
||||
},
|
||||
isCacheFull: () => true,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
for (let i = 0; i < 10; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
assert.deepEqual(cachedTexts.slice(0, 3), ['line-0', 'line-1', 'line-2']);
|
||||
});
|
||||
|
||||
test('prefetch service pause/resume halts and continues tokenization', async () => {
|
||||
const cues = makeCues(20);
|
||||
let tokenizeCalls = 0;
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: () => {},
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
await flushMicrotasks();
|
||||
await flushMicrotasks();
|
||||
service.pause();
|
||||
|
||||
const callsWhenPaused = tokenizeCalls;
|
||||
// Wait while paused
|
||||
for (let i = 0; i < 5; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
// Should not have advanced much (may have 1 in-flight)
|
||||
assert.ok(tokenizeCalls <= callsWhenPaused + 1, 'Should not tokenize much while paused');
|
||||
|
||||
service.resume();
|
||||
for (let i = 0; i < 30; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
assert.ok(tokenizeCalls > callsWhenPaused + 1, 'Should resume tokenizing after unpause');
|
||||
});
|
||||
153
src/core/services/subtitle-prefetch.ts
Normal file
153
src/core/services/subtitle-prefetch.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
import type { SubtitleCue } from './subtitle-cue-parser';
|
||||
import type { SubtitleData } from '../../types';
|
||||
|
||||
export interface SubtitlePrefetchServiceDeps {
|
||||
cues: SubtitleCue[];
|
||||
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
|
||||
preCacheTokenization: (text: string, data: SubtitleData) => void;
|
||||
isCacheFull: () => boolean;
|
||||
priorityWindowSize?: number;
|
||||
}
|
||||
|
||||
export interface SubtitlePrefetchService {
|
||||
start: (currentTimeSeconds: number) => void;
|
||||
stop: () => void;
|
||||
onSeek: (newTimeSeconds: number) => void;
|
||||
pause: () => void;
|
||||
resume: () => void;
|
||||
}
|
||||
|
||||
const DEFAULT_PRIORITY_WINDOW_SIZE = 10;
|
||||
|
||||
export function computePriorityWindow(
|
||||
cues: SubtitleCue[],
|
||||
currentTimeSeconds: number,
|
||||
windowSize: number,
|
||||
): SubtitleCue[] {
|
||||
if (cues.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Find the first cue whose end time is after the current position.
|
||||
// This includes the currently active cue when playback starts or seeks
|
||||
// mid-line, while still skipping cues that have already finished.
|
||||
let startIndex = -1;
|
||||
for (let i = 0; i < cues.length; i += 1) {
|
||||
if (cues[i]!.endTime > currentTimeSeconds) {
|
||||
startIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (startIndex < 0) {
|
||||
// All cues are before current time
|
||||
return [];
|
||||
}
|
||||
|
||||
return cues.slice(startIndex, startIndex + windowSize);
|
||||
}
|
||||
|
||||
export function createSubtitlePrefetchService(
|
||||
deps: SubtitlePrefetchServiceDeps,
|
||||
): SubtitlePrefetchService {
|
||||
const windowSize = deps.priorityWindowSize ?? DEFAULT_PRIORITY_WINDOW_SIZE;
|
||||
let stopped = true;
|
||||
let paused = false;
|
||||
let currentRunId = 0;
|
||||
|
||||
async function tokenizeCueList(
|
||||
cuesToProcess: SubtitleCue[],
|
||||
runId: number,
|
||||
options: { allowWhenCacheFull?: boolean } = {},
|
||||
): Promise<void> {
|
||||
for (const cue of cuesToProcess) {
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait while paused
|
||||
while (paused && !stopped && runId === currentRunId) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!options.allowWhenCacheFull && deps.isCacheFull()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await deps.tokenizeSubtitle(cue.text);
|
||||
if (result && !stopped && runId === currentRunId) {
|
||||
deps.preCacheTokenization(cue.text, result);
|
||||
}
|
||||
} catch {
|
||||
// Skip failed cues, continue prefetching
|
||||
}
|
||||
|
||||
// Yield to allow live processing to take priority
|
||||
await new Promise((resolve) => setTimeout(resolve, 0));
|
||||
}
|
||||
}
|
||||
|
||||
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
|
||||
const cues = deps.cues;
|
||||
|
||||
// Phase 1: Priority window
|
||||
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
|
||||
await tokenizeCueList(priorityCues, runId, { allowWhenCacheFull: true });
|
||||
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Phase 2: Background - remaining cues forward from current position
|
||||
const priorityTexts = new Set(priorityCues.map((c) => c.text));
|
||||
const remainingCues = cues.filter(
|
||||
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
|
||||
);
|
||||
await tokenizeCueList(remainingCues, runId);
|
||||
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Phase 3: Background - earlier cues (for rewind support)
|
||||
const earlierCues = cues.filter(
|
||||
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
|
||||
);
|
||||
await tokenizeCueList(earlierCues, runId);
|
||||
}
|
||||
|
||||
return {
|
||||
start(currentTimeSeconds: number) {
|
||||
stopped = false;
|
||||
paused = false;
|
||||
currentRunId += 1;
|
||||
const runId = currentRunId;
|
||||
void startPrefetching(currentTimeSeconds, runId);
|
||||
},
|
||||
|
||||
stop() {
|
||||
stopped = true;
|
||||
currentRunId += 1;
|
||||
},
|
||||
|
||||
onSeek(newTimeSeconds: number) {
|
||||
// Cancel current run and restart from new position
|
||||
currentRunId += 1;
|
||||
const runId = currentRunId;
|
||||
void startPrefetching(newTimeSeconds, runId);
|
||||
},
|
||||
|
||||
pause() {
|
||||
paused = true;
|
||||
},
|
||||
|
||||
resume() {
|
||||
paused = false;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -170,3 +170,87 @@ test('subtitle processing cache invalidation only affects future subtitle events
|
||||
|
||||
assert.equal(callsByText.get('same'), 2);
|
||||
});
|
||||
|
||||
test('preCacheTokenization stores entry that is returned on next subtitle change', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.preCacheTokenization('予め', { text: '予め', tokens: [] });
|
||||
controller.onSubtitleChange('予め');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.equal(tokenizeCalls, 0, 'should not call tokenize when pre-cached');
|
||||
assert.deepEqual(emitted, [{ text: '予め', tokens: [] }]);
|
||||
});
|
||||
|
||||
test('preCacheTokenization reuses normalized subtitle text across ASS linebreak variants', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.preCacheTokenization('一行目\\N二行目', { text: '一行目\n二行目', tokens: [] });
|
||||
controller.onSubtitleChange('一行目\n二行目');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.equal(tokenizeCalls, 0, 'should not call tokenize when normalized text matches');
|
||||
assert.deepEqual(emitted, [{ text: '一行目\n二行目', tokens: [] }]);
|
||||
});
|
||||
|
||||
test('consumeCachedSubtitle returns prefetched payload and prevents reprocessing same line', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.preCacheTokenization('猫\\Nです', { text: '猫\nです', tokens: [] });
|
||||
|
||||
const immediate = controller.consumeCachedSubtitle('猫\nです');
|
||||
assert.deepEqual(immediate, { text: '猫\nです', tokens: [] });
|
||||
|
||||
controller.onSubtitleChange('猫\nです');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.equal(tokenizeCalls, 0, 'same cached subtitle should not reprocess after immediate consume');
|
||||
assert.deepEqual(emitted, []);
|
||||
});
|
||||
|
||||
test('isCacheFull returns false when cache is below limit', () => {
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: null }),
|
||||
emitSubtitle: () => {},
|
||||
});
|
||||
|
||||
assert.equal(controller.isCacheFull(), false);
|
||||
});
|
||||
|
||||
test('isCacheFull returns true when cache reaches limit', async () => {
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
|
||||
emitSubtitle: () => {},
|
||||
});
|
||||
|
||||
// Fill cache to the 256 limit
|
||||
for (let i = 0; i < 256; i += 1) {
|
||||
controller.preCacheTokenization(`line-${i}`, { text: `line-${i}`, tokens: [] });
|
||||
}
|
||||
|
||||
assert.equal(controller.isCacheFull(), true);
|
||||
});
|
||||
|
||||
@@ -11,6 +11,13 @@ export interface SubtitleProcessingController {
|
||||
onSubtitleChange: (text: string) => void;
|
||||
refreshCurrentSubtitle: (textOverride?: string) => void;
|
||||
invalidateTokenizationCache: () => void;
|
||||
preCacheTokenization: (text: string, data: SubtitleData) => void;
|
||||
consumeCachedSubtitle: (text: string) => SubtitleData | null;
|
||||
isCacheFull: () => boolean;
|
||||
}
|
||||
|
||||
function normalizeSubtitleCacheKey(text: string): string {
|
||||
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
||||
}
|
||||
|
||||
export function createSubtitleProcessingController(
|
||||
@@ -26,18 +33,19 @@ export function createSubtitleProcessingController(
|
||||
const now = deps.now ?? (() => Date.now());
|
||||
|
||||
const getCachedTokenization = (text: string): SubtitleData | null => {
|
||||
const cached = tokenizationCache.get(text);
|
||||
const cacheKey = normalizeSubtitleCacheKey(text);
|
||||
const cached = tokenizationCache.get(cacheKey);
|
||||
if (!cached) {
|
||||
return null;
|
||||
}
|
||||
|
||||
tokenizationCache.delete(text);
|
||||
tokenizationCache.set(text, cached);
|
||||
tokenizationCache.delete(cacheKey);
|
||||
tokenizationCache.set(cacheKey, cached);
|
||||
return cached;
|
||||
};
|
||||
|
||||
const setCachedTokenization = (text: string, payload: SubtitleData): void => {
|
||||
tokenizationCache.set(text, payload);
|
||||
tokenizationCache.set(normalizeSubtitleCacheKey(text), payload);
|
||||
while (tokenizationCache.size > SUBTITLE_TOKENIZATION_CACHE_LIMIT) {
|
||||
const firstKey = tokenizationCache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
@@ -130,5 +138,22 @@ export function createSubtitleProcessingController(
|
||||
invalidateTokenizationCache: () => {
|
||||
tokenizationCache.clear();
|
||||
},
|
||||
preCacheTokenization: (text: string, data: SubtitleData) => {
|
||||
setCachedTokenization(text, data);
|
||||
},
|
||||
consumeCachedSubtitle: (text: string) => {
|
||||
const cached = getCachedTokenization(text);
|
||||
if (!cached) {
|
||||
return null;
|
||||
}
|
||||
|
||||
latestText = text;
|
||||
lastEmittedText = text;
|
||||
refreshRequested = false;
|
||||
return cached;
|
||||
},
|
||||
isCacheFull: () => {
|
||||
return tokenizationCache.size >= SUBTITLE_TOKENIZATION_CACHE_LIMIT;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -108,8 +108,9 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNPlusOneTarget: true,
|
||||
isNameMatch: true,
|
||||
jlptLevel: 'N5',
|
||||
frequencyRank: 12,
|
||||
},
|
||||
],
|
||||
@@ -122,9 +123,35 @@ test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence'
|
||||
);
|
||||
assert.match(
|
||||
markup,
|
||||
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア" data-frequency-rank="12">アレクシア<\/span>/,
|
||||
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア">アレクシア<\/span>/,
|
||||
);
|
||||
assert.doesNotMatch(markup, /word-name-match word-known|word-known word-name-match/);
|
||||
assert.doesNotMatch(markup, /word-name-match word-n-plus-one|word-n-plus-one word-name-match/);
|
||||
assert.doesNotMatch(markup, /data-frequency-rank="12"|data-jlpt-level="N5"|word-jlpt-n5/);
|
||||
});
|
||||
|
||||
test('serializeSubtitleMarkup keeps filtered tokens hoverable without annotation attrs', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: 'は',
|
||||
tokens: [
|
||||
{
|
||||
surface: 'は',
|
||||
reading: 'は',
|
||||
headword: 'は',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
|
||||
assert.equal(markup, '<span class="word" data-reading="は" data-headword="は">は</span>');
|
||||
});
|
||||
|
||||
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
|
||||
|
||||
@@ -47,10 +47,15 @@ function escapeHtml(text: string): string {
|
||||
.replaceAll("'", ''');
|
||||
}
|
||||
|
||||
function hasPrioritizedNameMatch(token: MergedToken): boolean {
|
||||
return token.isNameMatch === true;
|
||||
}
|
||||
|
||||
function computeFrequencyClass(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string | null {
|
||||
if (hasPrioritizedNameMatch(token)) return null;
|
||||
if (!options.enabled) return null;
|
||||
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
|
||||
|
||||
@@ -70,6 +75,7 @@ function getFrequencyRankLabel(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string | null {
|
||||
if (hasPrioritizedNameMatch(token)) return null;
|
||||
if (!options.enabled) return null;
|
||||
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
|
||||
|
||||
@@ -79,21 +85,25 @@ function getFrequencyRankLabel(
|
||||
}
|
||||
|
||||
function getJlptLevelLabel(token: MergedToken): string | null {
|
||||
if (hasPrioritizedNameMatch(token)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return token.jlptLevel ?? null;
|
||||
}
|
||||
|
||||
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
|
||||
const classes = ['word'];
|
||||
|
||||
if (token.isNPlusOneTarget) {
|
||||
classes.push('word-n-plus-one');
|
||||
} else if (token.isNameMatch) {
|
||||
if (hasPrioritizedNameMatch(token)) {
|
||||
classes.push('word-name-match');
|
||||
} else if (token.isNPlusOneTarget) {
|
||||
classes.push('word-n-plus-one');
|
||||
} else if (token.isKnown) {
|
||||
classes.push('word-known');
|
||||
}
|
||||
|
||||
if (token.jlptLevel) {
|
||||
if (!hasPrioritizedNameMatch(token) && token.jlptLevel) {
|
||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||
}
|
||||
|
||||
@@ -137,6 +147,8 @@ function serializeSubtitleToken(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): SerializedSubtitleToken {
|
||||
const prioritizedNameMatch = hasPrioritizedNameMatch(token);
|
||||
|
||||
return {
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
@@ -146,10 +158,10 @@ function serializeSubtitleToken(
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
isMerged: token.isMerged,
|
||||
isKnown: token.isKnown,
|
||||
isNPlusOneTarget: token.isNPlusOneTarget,
|
||||
isNPlusOneTarget: prioritizedNameMatch ? false : token.isNPlusOneTarget,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
jlptLevel: token.jlptLevel,
|
||||
frequencyRank: token.frequencyRank,
|
||||
jlptLevel: prioritizedNameMatch ? undefined : token.jlptLevel,
|
||||
frequencyRank: prioritizedNameMatch ? undefined : token.frequencyRank,
|
||||
className: computeWordClass(token, options),
|
||||
frequencyRankLabel: getFrequencyRankLabel(token, options),
|
||||
jlptLevelLabel: getJlptLevelLabel(token),
|
||||
|
||||
@@ -1,23 +1,72 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { injectTexthookerBootstrapHtml } from './texthooker';
|
||||
import { injectTexthookerBootstrapHtml, type TexthookerBootstrapSettings } from './texthooker';
|
||||
|
||||
test('injectTexthookerBootstrapHtml injects websocket bootstrap before head close', () => {
|
||||
const html = '<html><head><title>Texthooker</title></head><body></body></html>';
|
||||
|
||||
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678');
|
||||
const settings: TexthookerBootstrapSettings = {
|
||||
enableKnownWordColoring: true,
|
||||
enableNPlusOneColoring: true,
|
||||
enableNameMatchColoring: true,
|
||||
enableFrequencyColoring: true,
|
||||
enableJlptColoring: true,
|
||||
characterDictionaryEnabled: true,
|
||||
knownWordColor: '#a6da95',
|
||||
nPlusOneColor: '#c6a0f6',
|
||||
nameMatchColor: '#f5bde6',
|
||||
hoverTokenColor: '#f4dbd6',
|
||||
hoverTokenBackgroundColor: 'rgba(54, 58, 79, 0.84)',
|
||||
jlptColors: {
|
||||
N1: '#ed8796',
|
||||
N2: '#f5a97f',
|
||||
N3: '#f9e2af',
|
||||
N4: '#a6e3a1',
|
||||
N5: '#8aadf4',
|
||||
},
|
||||
frequencyDictionary: {
|
||||
singleColor: '#f5a97f',
|
||||
bandedColors: ['#ed8796', '#f5a97f', '#f9e2af', '#8bd5ca', '#8aadf4'],
|
||||
},
|
||||
};
|
||||
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678', settings);
|
||||
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-websocketUrl', "ws:\/\/127\.0\.0\.1:6678"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-enableKnownWordColoring', "1"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-enableNPlusOneColoring', "1"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-enableNameMatchColoring', "1"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-enableFrequencyColoring', "1"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-enableJlptColoring', "1"\)/,
|
||||
);
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-characterDictionaryEnabled', "1"\)/,
|
||||
);
|
||||
assert.match(actual, /--subminer-known-word-color:\s*#a6da95;/);
|
||||
assert.match(actual, /--subminer-n-plus-one-color:\s*#c6a0f6;/);
|
||||
assert.match(actual, /--subminer-name-match-color:\s*#f5bde6;/);
|
||||
assert.match(actual, /--subminer-jlpt-n1-color:\s*#ed8796;/);
|
||||
assert.match(actual, /--subminer-frequency-band-4-color:\s*#8bd5ca;/);
|
||||
assert.match(actual, /--sm-token-hover-bg:\s*rgba\(54, 58, 79, 0\.84\);/);
|
||||
assert.doesNotMatch(actual, /p \.word\.word-known\s*\{/);
|
||||
assert.ok(actual.indexOf('</script></head>') !== -1);
|
||||
assert.ok(actual.includes('bannou-texthooker-websocketUrl'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableKnownWordColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableNPlusOneColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableNameMatchColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableFrequencyColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableJlptColoring'));
|
||||
});
|
||||
|
||||
test('injectTexthookerBootstrapHtml leaves html unchanged without websocketUrl', () => {
|
||||
|
||||
@@ -5,23 +5,92 @@ import { createLogger } from '../../logger';
|
||||
|
||||
const logger = createLogger('main:texthooker');
|
||||
|
||||
export function injectTexthookerBootstrapHtml(html: string, websocketUrl?: string): string {
|
||||
if (!websocketUrl) {
|
||||
export type TexthookerBootstrapSettings = {
|
||||
enableKnownWordColoring: boolean;
|
||||
enableNPlusOneColoring: boolean;
|
||||
enableNameMatchColoring: boolean;
|
||||
enableFrequencyColoring: boolean;
|
||||
enableJlptColoring: boolean;
|
||||
characterDictionaryEnabled: boolean;
|
||||
knownWordColor: string;
|
||||
nPlusOneColor: string;
|
||||
nameMatchColor: string;
|
||||
hoverTokenColor: string;
|
||||
hoverTokenBackgroundColor: string;
|
||||
jlptColors: {
|
||||
N1: string;
|
||||
N2: string;
|
||||
N3: string;
|
||||
N4: string;
|
||||
N5: string;
|
||||
};
|
||||
frequencyDictionary: {
|
||||
singleColor: string;
|
||||
bandedColors: readonly [string, string, string, string, string];
|
||||
};
|
||||
};
|
||||
|
||||
function buildTexthookerBootstrapScript(
|
||||
websocketUrl?: string,
|
||||
settings?: TexthookerBootstrapSettings,
|
||||
): string {
|
||||
const statements: string[] = [];
|
||||
|
||||
if (websocketUrl) {
|
||||
statements.push(
|
||||
`window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(websocketUrl)});`,
|
||||
);
|
||||
}
|
||||
|
||||
if (settings) {
|
||||
const booleanStorageValue = (enabled: boolean): '"1"' | '"0"' => (enabled ? '"1"' : '"0"');
|
||||
statements.push(
|
||||
`window.localStorage.setItem('bannou-texthooker-enableKnownWordColoring', ${booleanStorageValue(settings.enableKnownWordColoring)});`,
|
||||
`window.localStorage.setItem('bannou-texthooker-enableNPlusOneColoring', ${booleanStorageValue(settings.enableNPlusOneColoring)});`,
|
||||
`window.localStorage.setItem('bannou-texthooker-enableNameMatchColoring', ${booleanStorageValue(settings.enableNameMatchColoring)});`,
|
||||
`window.localStorage.setItem('bannou-texthooker-enableFrequencyColoring', ${booleanStorageValue(settings.enableFrequencyColoring)});`,
|
||||
`window.localStorage.setItem('bannou-texthooker-enableJlptColoring', ${booleanStorageValue(settings.enableJlptColoring)});`,
|
||||
`window.localStorage.setItem('bannou-texthooker-characterDictionaryEnabled', ${booleanStorageValue(settings.characterDictionaryEnabled)});`,
|
||||
);
|
||||
}
|
||||
|
||||
return statements.length > 0 ? `<script>${statements.join('')}</script>` : '';
|
||||
}
|
||||
|
||||
function buildTexthookerBootstrapStyle(settings?: TexthookerBootstrapSettings): string {
|
||||
if (!settings) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const [band1, band2, band3, band4, band5] = settings.frequencyDictionary.bandedColors;
|
||||
|
||||
return `<style id="subminer-texthooker-bootstrap-style">:root{--subminer-known-word-color:${settings.knownWordColor};--subminer-n-plus-one-color:${settings.nPlusOneColor};--subminer-name-match-color:${settings.nameMatchColor};--subminer-jlpt-n1-color:${settings.jlptColors.N1};--subminer-jlpt-n2-color:${settings.jlptColors.N2};--subminer-jlpt-n3-color:${settings.jlptColors.N3};--subminer-jlpt-n4-color:${settings.jlptColors.N4};--subminer-jlpt-n5-color:${settings.jlptColors.N5};--subminer-frequency-single-color:${settings.frequencyDictionary.singleColor};--subminer-frequency-band-1-color:${band1};--subminer-frequency-band-2-color:${band2};--subminer-frequency-band-3-color:${band3};--subminer-frequency-band-4-color:${band4};--subminer-frequency-band-5-color:${band5};--sm-token-hover-bg:${settings.hoverTokenBackgroundColor};--sm-token-hover-text:${settings.hoverTokenColor};}</style>`;
|
||||
}
|
||||
|
||||
export function injectTexthookerBootstrapHtml(
|
||||
html: string,
|
||||
websocketUrl?: string,
|
||||
settings?: TexthookerBootstrapSettings,
|
||||
): string {
|
||||
const bootstrapStyle = buildTexthookerBootstrapStyle(settings);
|
||||
const bootstrapScript = buildTexthookerBootstrapScript(websocketUrl, settings);
|
||||
|
||||
if (!bootstrapStyle && !bootstrapScript) {
|
||||
return html;
|
||||
}
|
||||
|
||||
const bootstrapScript = `<script>window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(
|
||||
websocketUrl,
|
||||
)});</script>`;
|
||||
|
||||
if (html.includes('</head>')) {
|
||||
return html.replace('</head>', `${bootstrapScript}</head>`);
|
||||
return html.replace('</head>', `${bootstrapStyle}${bootstrapScript}</head>`);
|
||||
}
|
||||
|
||||
return `${bootstrapScript}${html}`;
|
||||
return `${bootstrapStyle}${bootstrapScript}${html}`;
|
||||
}
|
||||
|
||||
export class Texthooker {
|
||||
constructor(
|
||||
private readonly getBootstrapSettings?: () => TexthookerBootstrapSettings | undefined,
|
||||
) {}
|
||||
|
||||
private server: http.Server | null = null;
|
||||
|
||||
public isRunning(): boolean {
|
||||
@@ -62,9 +131,16 @@ export class Texthooker {
|
||||
res.end('Not found');
|
||||
return;
|
||||
}
|
||||
const bootstrapSettings = this.getBootstrapSettings?.();
|
||||
const responseData =
|
||||
urlPath === '/' || urlPath === '/index.html'
|
||||
? Buffer.from(injectTexthookerBootstrapHtml(data.toString('utf-8'), websocketUrl))
|
||||
? Buffer.from(
|
||||
injectTexthookerBootstrapHtml(
|
||||
data.toString('utf-8'),
|
||||
websocketUrl,
|
||||
bootstrapSettings,
|
||||
),
|
||||
)
|
||||
: data;
|
||||
res.writeHead(200, { 'Content-Type': mimeTypes[ext] || 'text/plain' });
|
||||
res.end(responseData);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,7 @@ import {
|
||||
requestYomitanScanTokens,
|
||||
requestYomitanTermFrequencies,
|
||||
} from './tokenizer/yomitan-parser-runtime';
|
||||
import type { YomitanTermFrequency } from './tokenizer/yomitan-parser-runtime';
|
||||
|
||||
const logger = createLogger('main:tokenizer');
|
||||
|
||||
@@ -177,6 +178,19 @@ async function applyAnnotationStage(
|
||||
);
|
||||
}
|
||||
|
||||
async function stripSubtitleAnnotationMetadata(tokens: MergedToken[]): Promise<MergedToken[]> {
|
||||
if (tokens.length === 0) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
if (!annotationStageModulePromise) {
|
||||
annotationStageModulePromise = import('./tokenizer/annotation-stage');
|
||||
}
|
||||
|
||||
const annotationStage = await annotationStageModulePromise;
|
||||
return tokens.map((token) => annotationStage.stripSubtitleAnnotationMetadata(token));
|
||||
}
|
||||
|
||||
export function createTokenizerDepsRuntime(
|
||||
options: TokenizerDepsRuntimeOptions,
|
||||
): TokenizerServiceDeps {
|
||||
@@ -225,7 +239,13 @@ export function createTokenizerDepsRuntime(
|
||||
return null;
|
||||
}
|
||||
|
||||
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode(), false);
|
||||
return mergeTokens(
|
||||
rawTokens,
|
||||
options.isKnownWord,
|
||||
options.getKnownWordMatchMode(),
|
||||
false,
|
||||
text,
|
||||
);
|
||||
},
|
||||
enrichTokensWithMecab: async (tokens, mecabTokens) =>
|
||||
enrichTokensWithMecabAsync(tokens, mecabTokens),
|
||||
@@ -336,56 +356,162 @@ function resolveFrequencyLookupText(
|
||||
return token.surface;
|
||||
}
|
||||
|
||||
function resolveYomitanFrequencyLookupTexts(
|
||||
token: MergedToken,
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
): string[] {
|
||||
const primaryLookupText = resolveFrequencyLookupText(token, matchMode).trim();
|
||||
if (!primaryLookupText) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (matchMode !== 'headword') {
|
||||
return [primaryLookupText];
|
||||
}
|
||||
|
||||
const normalizedHeadword = token.headword.trim();
|
||||
const normalizedSurface = token.surface.trim();
|
||||
if (
|
||||
!normalizedHeadword ||
|
||||
!normalizedSurface ||
|
||||
normalizedSurface === normalizedHeadword ||
|
||||
normalizedSurface === primaryLookupText
|
||||
) {
|
||||
return [primaryLookupText];
|
||||
}
|
||||
|
||||
return [primaryLookupText, normalizedSurface];
|
||||
}
|
||||
|
||||
function buildYomitanFrequencyTermReadingList(
|
||||
tokens: MergedToken[],
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
): Array<{ term: string; reading: string | null }> {
|
||||
const termReadingList: Array<{ term: string; reading: string | null }> = [];
|
||||
for (const token of tokens) {
|
||||
const term = resolveFrequencyLookupText(token, matchMode).trim();
|
||||
if (!term) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const readingRaw =
|
||||
token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null;
|
||||
termReadingList.push({ term, reading: readingRaw });
|
||||
for (const term of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
|
||||
termReadingList.push({ term, reading: readingRaw });
|
||||
}
|
||||
}
|
||||
|
||||
return termReadingList;
|
||||
}
|
||||
|
||||
function buildYomitanFrequencyRankMap(
|
||||
frequencies: ReadonlyArray<{ term: string; frequency: number; dictionaryPriority?: number }>,
|
||||
): Map<string, number> {
|
||||
const rankByTerm = new Map<string, { rank: number; dictionaryPriority: number }>();
|
||||
function makeYomitanFrequencyPairKey(term: string, reading: string | null): string {
|
||||
return `${term}\u0000${reading ?? ''}`;
|
||||
}
|
||||
|
||||
interface NormalizedYomitanTermFrequency extends YomitanTermFrequency {
|
||||
reading: string | null;
|
||||
frequency: number;
|
||||
}
|
||||
|
||||
interface YomitanFrequencyIndex {
|
||||
byPair: Map<string, NormalizedYomitanTermFrequency[]>;
|
||||
byTerm: Map<string, NormalizedYomitanTermFrequency[]>;
|
||||
}
|
||||
|
||||
function appendYomitanFrequencyEntry(
|
||||
map: Map<string, NormalizedYomitanTermFrequency[]>,
|
||||
key: string,
|
||||
entry: NormalizedYomitanTermFrequency,
|
||||
): void {
|
||||
const existing = map.get(key);
|
||||
if (existing) {
|
||||
existing.push(entry);
|
||||
return;
|
||||
}
|
||||
|
||||
map.set(key, [entry]);
|
||||
}
|
||||
|
||||
function buildYomitanFrequencyIndex(
|
||||
frequencies: ReadonlyArray<YomitanTermFrequency>,
|
||||
): YomitanFrequencyIndex {
|
||||
const byPair = new Map<string, NormalizedYomitanTermFrequency[]>();
|
||||
const byTerm = new Map<string, NormalizedYomitanTermFrequency[]>();
|
||||
for (const frequency of frequencies) {
|
||||
const normalizedTerm = frequency.term.trim();
|
||||
const term = frequency.term.trim();
|
||||
const rank = normalizePositiveFrequencyRank(frequency.frequency);
|
||||
if (!normalizedTerm || rank === null) {
|
||||
if (!term || rank === null) {
|
||||
continue;
|
||||
}
|
||||
const dictionaryPriority =
|
||||
typeof frequency.dictionaryPriority === 'number' &&
|
||||
Number.isFinite(frequency.dictionaryPriority)
|
||||
? Math.max(0, Math.floor(frequency.dictionaryPriority))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const current = rankByTerm.get(normalizedTerm);
|
||||
|
||||
const reading =
|
||||
typeof frequency.reading === 'string' && frequency.reading.trim().length > 0
|
||||
? frequency.reading.trim()
|
||||
: null;
|
||||
const normalizedEntry: NormalizedYomitanTermFrequency = {
|
||||
...frequency,
|
||||
term,
|
||||
reading,
|
||||
frequency: rank,
|
||||
};
|
||||
appendYomitanFrequencyEntry(
|
||||
byPair,
|
||||
makeYomitanFrequencyPairKey(term, reading),
|
||||
normalizedEntry,
|
||||
);
|
||||
appendYomitanFrequencyEntry(byTerm, term, normalizedEntry);
|
||||
}
|
||||
|
||||
return { byPair, byTerm };
|
||||
}
|
||||
|
||||
function selectBestYomitanFrequencyRank(
|
||||
entries: ReadonlyArray<NormalizedYomitanTermFrequency>,
|
||||
): number | null {
|
||||
let bestEntry: NormalizedYomitanTermFrequency | null = null;
|
||||
for (const entry of entries) {
|
||||
if (
|
||||
current === undefined ||
|
||||
dictionaryPriority < current.dictionaryPriority ||
|
||||
(dictionaryPriority === current.dictionaryPriority && rank < current.rank)
|
||||
bestEntry === null ||
|
||||
entry.dictionaryPriority < bestEntry.dictionaryPriority ||
|
||||
(entry.dictionaryPriority === bestEntry.dictionaryPriority &&
|
||||
entry.frequency < bestEntry.frequency)
|
||||
) {
|
||||
rankByTerm.set(normalizedTerm, { rank, dictionaryPriority });
|
||||
bestEntry = entry;
|
||||
}
|
||||
}
|
||||
|
||||
const collapsedRankByTerm = new Map<string, number>();
|
||||
for (const [term, entry] of rankByTerm.entries()) {
|
||||
collapsedRankByTerm.set(term, entry.rank);
|
||||
return bestEntry?.frequency ?? null;
|
||||
}
|
||||
|
||||
function getYomitanFrequencyRank(
|
||||
token: MergedToken,
|
||||
candidateText: string,
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
frequencyIndex: YomitanFrequencyIndex,
|
||||
): number | null {
|
||||
const normalizedCandidateText = candidateText.trim();
|
||||
if (!normalizedCandidateText) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return collapsedRankByTerm;
|
||||
const reading =
|
||||
typeof token.reading === 'string' && token.reading.trim().length > 0
|
||||
? token.reading.trim()
|
||||
: null;
|
||||
const pairEntries =
|
||||
frequencyIndex.byPair.get(makeYomitanFrequencyPairKey(normalizedCandidateText, reading)) ?? [];
|
||||
const candidateEntries =
|
||||
pairEntries.length > 0
|
||||
? pairEntries
|
||||
: (frequencyIndex.byTerm.get(normalizedCandidateText) ?? []);
|
||||
if (candidateEntries.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedHeadword = token.headword.trim();
|
||||
const normalizedSurface = token.surface.trim();
|
||||
const isInflectedHeadwordFallback =
|
||||
matchMode === 'headword' &&
|
||||
normalizedCandidateText === normalizedHeadword &&
|
||||
normalizedSurface.length > 0 &&
|
||||
normalizedSurface !== normalizedHeadword;
|
||||
|
||||
return selectBestYomitanFrequencyRank(candidateEntries);
|
||||
}
|
||||
|
||||
function getLocalFrequencyRank(
|
||||
@@ -416,7 +542,7 @@ function getLocalFrequencyRank(
|
||||
function applyFrequencyRanks(
|
||||
tokens: MergedToken[],
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
yomitanRankByTerm: Map<string, number>,
|
||||
yomitanFrequencyIndex: YomitanFrequencyIndex,
|
||||
getFrequencyRank: FrequencyDictionaryLookup | undefined,
|
||||
): MergedToken[] {
|
||||
if (tokens.length === 0) {
|
||||
@@ -441,12 +567,19 @@ function applyFrequencyRanks(
|
||||
};
|
||||
}
|
||||
|
||||
const yomitanRank = yomitanRankByTerm.get(lookupText);
|
||||
if (yomitanRank !== undefined) {
|
||||
return {
|
||||
...token,
|
||||
frequencyRank: yomitanRank,
|
||||
};
|
||||
for (const candidateText of resolveYomitanFrequencyLookupTexts(token, matchMode)) {
|
||||
const yomitanRank = getYomitanFrequencyRank(
|
||||
token,
|
||||
candidateText,
|
||||
matchMode,
|
||||
yomitanFrequencyIndex,
|
||||
);
|
||||
if (yomitanRank !== null) {
|
||||
return {
|
||||
...token,
|
||||
frequencyRank: yomitanRank,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (!getFrequencyRank) {
|
||||
@@ -501,6 +634,7 @@ async function parseWithYomitanInternalParser(
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
frequencyRank: token.frequencyRank,
|
||||
}),
|
||||
),
|
||||
);
|
||||
@@ -510,7 +644,7 @@ async function parseWithYomitanInternalParser(
|
||||
}
|
||||
deps.onTokenizationReady?.(text);
|
||||
|
||||
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
|
||||
const frequencyRankPromise: Promise<YomitanFrequencyIndex> = options.frequencyEnabled
|
||||
? (async () => {
|
||||
const frequencyMatchMode = options.frequencyMatchMode;
|
||||
const termReadingList = buildYomitanFrequencyTermReadingList(
|
||||
@@ -522,9 +656,9 @@ async function parseWithYomitanInternalParser(
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
return buildYomitanFrequencyRankMap(yomitanFrequencies);
|
||||
return buildYomitanFrequencyIndex(yomitanFrequencies);
|
||||
})()
|
||||
: Promise.resolve(new Map<string, number>());
|
||||
: Promise.resolve({ byPair: new Map(), byTerm: new Map() });
|
||||
|
||||
const mecabEnrichmentPromise: Promise<MergedToken[]> = needsMecabPosEnrichment(options)
|
||||
? (async () => {
|
||||
@@ -545,7 +679,7 @@ async function parseWithYomitanInternalParser(
|
||||
})()
|
||||
: Promise.resolve(normalizedSelectedTokens);
|
||||
|
||||
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
|
||||
const [yomitanFrequencyIndex, enrichedTokens] = await Promise.all([
|
||||
frequencyRankPromise,
|
||||
mecabEnrichmentPromise,
|
||||
]);
|
||||
@@ -554,7 +688,7 @@ async function parseWithYomitanInternalParser(
|
||||
return applyFrequencyRanks(
|
||||
enrichedTokens,
|
||||
options.frequencyMatchMode,
|
||||
yomitanRankByTerm,
|
||||
yomitanFrequencyIndex,
|
||||
deps.getFrequencyRank,
|
||||
);
|
||||
}
|
||||
@@ -585,9 +719,12 @@ export async function tokenizeSubtitle(
|
||||
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
const annotatedTokens = await stripSubtitleAnnotationMetadata(
|
||||
await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
|
||||
);
|
||||
return {
|
||||
text: displayText,
|
||||
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
|
||||
tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { MergedToken, PartOfSpeech } from '../../../types';
|
||||
import { annotateTokens, AnnotationStageDeps } from './annotation-stage';
|
||||
import {
|
||||
annotateTokens,
|
||||
AnnotationStageDeps,
|
||||
shouldExcludeTokenFromSubtitleAnnotations,
|
||||
stripSubtitleAnnotationMetadata,
|
||||
} from './annotation-stage';
|
||||
|
||||
function makeToken(overrides: Partial<MergedToken> = {}): MergedToken {
|
||||
return {
|
||||
@@ -50,6 +55,29 @@ test('annotateTokens known-word match mode uses headword vs surface', () => {
|
||||
assert.equal(surfaceResult[0]?.isKnown, false);
|
||||
});
|
||||
|
||||
test('annotateTokens falls back to reading for known-word matches when headword lookup misses', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '大体',
|
||||
headword: '大体',
|
||||
reading: 'だいたい',
|
||||
frequencyRank: 1895,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'だいたい',
|
||||
getJlptLevel: (text) => (text === '大体' ? 'N4' : null),
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.jlptLevel, 'N4');
|
||||
assert.equal(result[0]?.frequencyRank, 1895);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -150,6 +178,278 @@ test('annotateTokens handles JLPT disabled and eligibility exclusion paths', ()
|
||||
assert.equal(excludedLookupCalls, 0);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory ending variants', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'んです',
|
||||
headword: 'ん',
|
||||
reading: 'ンデス',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '非自立',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'のだ',
|
||||
headword: 'の',
|
||||
reading: 'ノダ',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '非自立',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'んだ',
|
||||
headword: 'ん',
|
||||
reading: 'ンダ',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '非自立',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'のです',
|
||||
headword: 'の',
|
||||
reading: 'ノデス',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '非自立',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'なんです',
|
||||
headword: 'だ',
|
||||
reading: 'ナンデス',
|
||||
pos1: '助動詞|名詞|助動詞',
|
||||
pos2: '|非自立',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'んでした',
|
||||
headword: 'ん',
|
||||
reading: 'ンデシタ',
|
||||
pos1: '助動詞|助動詞|助動詞',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'のでは',
|
||||
headword: 'の',
|
||||
reading: 'ノデハ',
|
||||
pos1: '助詞|接続詞',
|
||||
}),
|
||||
];
|
||||
|
||||
for (const token of tokens) {
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory pondering endings', () => {
|
||||
const token = makeToken({
|
||||
surface: 'のかな',
|
||||
headword: 'の',
|
||||
reading: 'ノカナ',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '非自立',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes auxiliary-stem そうだ grammar tails', () => {
|
||||
const token = makeToken({
|
||||
surface: 'そうだ',
|
||||
headword: 'そうだ',
|
||||
reading: 'ソウダ',
|
||||
pos1: '名詞|助動詞',
|
||||
pos2: '特殊',
|
||||
pos3: '助動詞語幹',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside explanatory ending family', () => {
|
||||
const token = makeToken({
|
||||
surface: '問題',
|
||||
headword: '問題',
|
||||
reading: 'モンダイ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
reading: 'ハ',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'です',
|
||||
headword: 'です',
|
||||
reading: 'デス',
|
||||
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||
pos1: '助動詞',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'この',
|
||||
headword: 'この',
|
||||
reading: 'コノ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '連体詞',
|
||||
}),
|
||||
];
|
||||
|
||||
for (const token of tokens) {
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations keeps mixed content tokens with trailing helpers', () => {
|
||||
const token = makeToken({
|
||||
surface: '行きます',
|
||||
headword: '行く',
|
||||
reading: 'イキマス',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞|助動詞',
|
||||
pos2: '自立',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes merged lexical tokens with trailing quote particles', () => {
|
||||
const token = makeToken({
|
||||
surface: 'どうしてもって',
|
||||
headword: 'どうしても',
|
||||
reading: 'ドウシテモッテ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '副詞|助詞',
|
||||
pos2: '一般|格助詞',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes kana-only demonstrative helper merges', () => {
|
||||
const token = makeToken({
|
||||
surface: 'これで',
|
||||
headword: 'これ',
|
||||
reading: 'コレデ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞|助詞',
|
||||
pos2: '代名詞|格助詞',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
|
||||
const token = makeToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
reading: 'ハ',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: true,
|
||||
isNameMatch: true,
|
||||
jlptLevel: 'N5',
|
||||
frequencyRank: 12,
|
||||
});
|
||||
|
||||
assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: false,
|
||||
jlptLevel: undefined,
|
||||
frequencyRank: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
test('stripSubtitleAnnotationMetadata leaves content tokens unchanged', () => {
|
||||
const token = makeToken({
|
||||
surface: '猫',
|
||||
headword: '猫',
|
||||
reading: 'ネコ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
isKnown: true,
|
||||
jlptLevel: 'N5',
|
||||
frequencyRank: 42,
|
||||
});
|
||||
|
||||
assert.strictEqual(stripSubtitleAnnotationMetadata(token), token);
|
||||
});
|
||||
|
||||
test('annotateTokens prioritizes name matches over n+1, frequency, and JLPT when enabled', () => {
|
||||
let jlptLookupCalls = 0;
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'オリヴィア',
|
||||
reading: 'オリヴィア',
|
||||
headword: 'オリヴィア',
|
||||
isNameMatch: true,
|
||||
frequencyRank: 42,
|
||||
startPos: 0,
|
||||
endPos: 5,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
getJlptLevel: () => {
|
||||
jlptLookupCalls += 1;
|
||||
return 'N2';
|
||||
},
|
||||
}),
|
||||
{
|
||||
nameMatchEnabled: true,
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isNameMatch, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
assert.equal(jlptLookupCalls, 0);
|
||||
});
|
||||
|
||||
test('annotateTokens keeps other annotations for name matches when name highlighting is disabled', () => {
|
||||
let jlptLookupCalls = 0;
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'オリヴィア',
|
||||
reading: 'オリヴィア',
|
||||
headword: 'オリヴィア',
|
||||
isNameMatch: true,
|
||||
frequencyRank: 42,
|
||||
startPos: 0,
|
||||
endPos: 5,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
getJlptLevel: () => {
|
||||
jlptLookupCalls += 1;
|
||||
return 'N2';
|
||||
},
|
||||
}),
|
||||
{
|
||||
nameMatchEnabled: false,
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isNameMatch, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[0]?.frequencyRank, 42);
|
||||
assert.equal(result[0]?.jlptLevel, 'N2');
|
||||
assert.equal(jlptLookupCalls, 1);
|
||||
});
|
||||
|
||||
test('annotateTokens N+1 handoff marks expected target when threshold is satisfied', () => {
|
||||
const tokens = [
|
||||
makeToken({ surface: '私', headword: '私', startPos: 0, endPos: 1 }),
|
||||
@@ -206,8 +506,8 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[1]?.isKnown, true);
|
||||
assert.equal(result[2]?.isKnown, true);
|
||||
assert.equal(result[1]?.isKnown, false);
|
||||
assert.equal(result[2]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
@@ -293,6 +593,32 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for non-independent kanji noun tokens under unified gate', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '者',
|
||||
reading: 'もの',
|
||||
headword: '者',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '名詞',
|
||||
pos2: '非自立',
|
||||
pos3: '一般',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
frequencyRank: 475,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -444,3 +770,33 @@ test('annotateTokens excludes composite tokens when all component pos tags are e
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens applies one shared exclusion gate across known N+1 frequency and JLPT', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'これで',
|
||||
headword: 'これ',
|
||||
reading: 'コレデ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞|助詞',
|
||||
pos2: '代名詞|格助詞',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
frequencyRank: 9,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'これ',
|
||||
getJlptLevel: (text) => (text === 'これ' ? 'N5' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
@@ -9,11 +9,65 @@ import {
|
||||
} from '../../../token-pos2-exclusions';
|
||||
import { JlptLevel, MergedToken, NPlusOneMatchMode, PartOfSpeech } from '../../../types';
|
||||
import { shouldIgnoreJlptByTerm, shouldIgnoreJlptForMecabPos1 } from '../jlpt-token-filter';
|
||||
import {
|
||||
shouldExcludeTokenFromSubtitleAnnotations as sharedShouldExcludeTokenFromSubtitleAnnotations,
|
||||
stripSubtitleAnnotationMetadata as sharedStripSubtitleAnnotationMetadata,
|
||||
} from './subtitle-annotation-filter';
|
||||
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
|
||||
'ああ',
|
||||
'ええ',
|
||||
'うう',
|
||||
'おお',
|
||||
'はあ',
|
||||
'はは',
|
||||
'へえ',
|
||||
'ふう',
|
||||
'ほう',
|
||||
]);
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
|
||||
'だ',
|
||||
'です',
|
||||
'でした',
|
||||
'だった',
|
||||
'では',
|
||||
'じゃ',
|
||||
'でしょう',
|
||||
'だろう',
|
||||
] as const;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
|
||||
'',
|
||||
'か',
|
||||
'ね',
|
||||
'よ',
|
||||
'な',
|
||||
'よね',
|
||||
'かな',
|
||||
'かね',
|
||||
] as const;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
|
||||
(particle) => `${prefix}${core}${particle}`,
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
|
||||
'って',
|
||||
'ってよ',
|
||||
'ってね',
|
||||
'ってな',
|
||||
'ってさ',
|
||||
'ってか',
|
||||
'ってば',
|
||||
]);
|
||||
|
||||
const jlptLevelLookupCaches = new WeakMap<
|
||||
(text: string) => JlptLevel | null,
|
||||
@@ -28,6 +82,7 @@ export interface AnnotationStageDeps {
|
||||
|
||||
export interface AnnotationStageOptions {
|
||||
nPlusOneEnabled?: boolean;
|
||||
nameMatchEnabled?: boolean;
|
||||
jlptEnabled?: boolean;
|
||||
frequencyEnabled?: boolean;
|
||||
minSentenceWordsForNPlusOne?: number;
|
||||
@@ -43,33 +98,27 @@ function resolveKnownWordText(
|
||||
return matchMode === 'surface' ? surface : headword;
|
||||
}
|
||||
|
||||
function applyKnownWordMarking(
|
||||
tokens: MergedToken[],
|
||||
isKnownWord: (text: string) => boolean,
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
|
||||
|
||||
return {
|
||||
...token,
|
||||
isKnown: token.isKnown || (matchText ? isKnownWord(matchText) : false),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function normalizePos1Tag(pos1: string | undefined): string {
|
||||
return typeof pos1 === 'string' ? pos1.trim() : '';
|
||||
}
|
||||
|
||||
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_POS1 = new Set(['感動詞']);
|
||||
const SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1 = new Set(['助詞', '助動詞', '連体詞']);
|
||||
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
|
||||
|
||||
function splitNormalizedTagParts(normalizedTag: string): string[] {
|
||||
if (!normalizedTag) {
|
||||
return false;
|
||||
return [];
|
||||
}
|
||||
const parts = normalizedTag
|
||||
|
||||
return normalizedTag
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
}
|
||||
|
||||
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
|
||||
const parts = splitNormalizedTagParts(normalizedTag);
|
||||
if (parts.length === 0) {
|
||||
return false;
|
||||
}
|
||||
@@ -78,6 +127,50 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
|
||||
return parts.some((part) => exclusions.has(part));
|
||||
}
|
||||
|
||||
function isExcludedFromSubtitleAnnotationsByPos1(normalizedPos1: string): boolean {
|
||||
const parts = splitNormalizedTagParts(normalizedPos1);
|
||||
if (parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return parts.length > 0 && parts.every((part) => SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(part));
|
||||
}
|
||||
|
||||
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
|
||||
const normalizedSurface = normalizeJlptTextForExclusion(token.surface);
|
||||
const normalizedHeadword = normalizeJlptTextForExclusion(token.headword);
|
||||
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const suffix = normalizedSurface.slice(normalizedHeadword.length);
|
||||
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
|
||||
if (pos1Parts.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const [leadingPos1, ...trailingPos1] = pos1Parts;
|
||||
if (!leadingPos1 || SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(leadingPos1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
|
||||
}
|
||||
|
||||
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
|
||||
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos3Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos3));
|
||||
return pos3Parts.includes('助動詞語幹');
|
||||
}
|
||||
|
||||
function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
|
||||
if (options.pos1Exclusions) {
|
||||
return options.pos1Exclusions;
|
||||
@@ -98,6 +191,61 @@ function normalizePos2Tag(pos2: string | undefined): string {
|
||||
return typeof pos2 === 'string' ? pos2.trim() : '';
|
||||
}
|
||||
|
||||
function hasKanjiChar(text: string): boolean {
|
||||
for (const char of text) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
(code >= 0x3400 && code <= 0x4dbf) ||
|
||||
(code >= 0x4e00 && code <= 0x9fff) ||
|
||||
(code >= 0xf900 && code <= 0xfaff)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function isExcludedComponent(
|
||||
pos1: string | undefined,
|
||||
pos2: string | undefined,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): boolean {
|
||||
return (
|
||||
(typeof pos1 === 'string' && pos1Exclusions.has(pos1)) ||
|
||||
(typeof pos2 === 'string' && pos2Exclusions.has(pos2))
|
||||
);
|
||||
}
|
||||
|
||||
function shouldAllowContentLedMergedTokenFrequency(
|
||||
normalizedPos1: string,
|
||||
normalizedPos2: string,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): boolean {
|
||||
const pos1Parts = splitNormalizedTagParts(normalizedPos1);
|
||||
if (pos1Parts.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos2Parts = splitNormalizedTagParts(normalizedPos2);
|
||||
if (isExcludedComponent(pos1Parts[0], pos2Parts[0], pos1Exclusions, pos2Exclusions)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const componentCount = Math.max(pos1Parts.length, pos2Parts.length);
|
||||
for (let index = 1; index < componentCount; index += 1) {
|
||||
if (!isExcludedComponent(pos1Parts[index], pos2Parts[index], pos1Exclusions, pos2Exclusions)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function isFrequencyExcludedByPos(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
@@ -109,13 +257,20 @@ function isFrequencyExcludedByPos(
|
||||
|
||||
const normalizedPos1 = normalizePos1Tag(token.pos1);
|
||||
const hasPos1 = normalizedPos1.length > 0;
|
||||
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
|
||||
const normalizedPos2 = normalizePos2Tag(token.pos2);
|
||||
const hasPos2 = normalizedPos2.length > 0;
|
||||
const allowContentLedMergedToken = shouldAllowContentLedMergedTokenFrequency(
|
||||
normalizedPos1,
|
||||
normalizedPos2,
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
);
|
||||
|
||||
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions) && !allowContentLedMergedToken) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedPos2 = normalizePos2Tag(token.pos2);
|
||||
const hasPos2 = normalizedPos2.length > 0;
|
||||
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
|
||||
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions) && !allowContentLedMergedToken) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -133,26 +288,43 @@ function isFrequencyExcludedByPos(
|
||||
);
|
||||
}
|
||||
|
||||
function applyFrequencyMarking(
|
||||
tokens: MergedToken[],
|
||||
function shouldKeepFrequencyForNonIndependentKanjiNoun(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) {
|
||||
return { ...token, frequencyRank: undefined };
|
||||
}
|
||||
): boolean {
|
||||
if (pos1Exclusions.has('名詞')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
|
||||
const rank = Math.max(1, Math.floor(token.frequencyRank));
|
||||
return { ...token, frequencyRank: rank };
|
||||
}
|
||||
const rank =
|
||||
typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)
|
||||
? Math.max(1, Math.floor(token.frequencyRank))
|
||||
: null;
|
||||
if (rank === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
frequencyRank: undefined,
|
||||
};
|
||||
});
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
|
||||
const pos2Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos2));
|
||||
if (pos1Parts.length !== 1 || pos2Parts.length !== 1) {
|
||||
return false;
|
||||
}
|
||||
if (pos1Parts[0] !== '名詞' || pos2Parts[0] !== '非自立') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return hasKanjiChar(token.surface) || hasKanjiChar(token.headword);
|
||||
}
|
||||
|
||||
export function shouldExcludeTokenFromVocabularyPersistence(
|
||||
token: MergedToken,
|
||||
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
|
||||
): boolean {
|
||||
return isFrequencyExcludedByPos(
|
||||
token,
|
||||
resolvePos1Exclusions(options),
|
||||
resolvePos2Exclusions(options),
|
||||
);
|
||||
}
|
||||
|
||||
function getCachedJlptLevel(
|
||||
@@ -312,6 +484,23 @@ function isReduplicatedKanaSfx(text: string): boolean {
|
||||
return chars.slice(0, half).join('') === chars.slice(half).join('');
|
||||
}
|
||||
|
||||
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
|
||||
const normalized = normalizeJlptTextForExclusion(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isReduplicatedKanaSfx(normalized)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (normalized.length <= 1 || !normalized.endsWith('と')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return isReduplicatedKanaSfx(normalized.slice(0, -1));
|
||||
}
|
||||
|
||||
function hasAdjacentKanaRepeat(text: string): boolean {
|
||||
const normalized = normalizeJlptTextForExclusion(text);
|
||||
if (!normalized) {
|
||||
@@ -386,12 +575,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
const candidates = [
|
||||
resolveJlptLookupText(token),
|
||||
token.surface,
|
||||
token.reading,
|
||||
token.headword,
|
||||
].filter(
|
||||
const candidates = [resolveJlptLookupText(token), token.surface, token.headword].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
@@ -414,24 +598,110 @@ function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function applyJlptMarking(
|
||||
tokens: MergedToken[],
|
||||
getJlptLevel: (text: string) => JlptLevel | null,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
if (!isJlptEligibleToken(token)) {
|
||||
return { ...token, jlptLevel: undefined };
|
||||
function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
|
||||
const candidates = [token.surface, token.reading, resolveJlptLookupText(token)].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const trimmedCandidate = candidate.trim();
|
||||
if (!trimmedCandidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
|
||||
const fallbackLevel =
|
||||
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
|
||||
const normalizedCandidate = normalizeJlptTextForExclusion(trimmedCandidate);
|
||||
if (!normalizedCandidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
|
||||
};
|
||||
});
|
||||
if (
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmedCandidate) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmedCandidate) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalizedCandidate)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
isTrailingSmallTsuKanaSfx(trimmedCandidate) ||
|
||||
isTrailingSmallTsuKanaSfx(normalizedCandidate) ||
|
||||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmedCandidate) ||
|
||||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalizedCandidate)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function shouldExcludeTokenFromSubtitleAnnotations(token: MergedToken): boolean {
|
||||
return sharedShouldExcludeTokenFromSubtitleAnnotations(token);
|
||||
}
|
||||
|
||||
export function stripSubtitleAnnotationMetadata(token: MergedToken): MergedToken {
|
||||
return sharedStripSubtitleAnnotationMetadata(token);
|
||||
}
|
||||
|
||||
function computeTokenKnownStatus(
|
||||
token: MergedToken,
|
||||
isKnownWord: (text: string) => boolean,
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
): boolean {
|
||||
const matchText = resolveKnownWordText(token.surface, token.headword, knownWordMatchMode);
|
||||
if (token.isKnown || (matchText ? isKnownWord(matchText) : false)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedReading = token.reading.trim();
|
||||
if (!normalizedReading) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
|
||||
}
|
||||
|
||||
function filterTokenFrequencyRank(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): number | undefined {
|
||||
if (
|
||||
isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions) &&
|
||||
!shouldKeepFrequencyForNonIndependentKanjiNoun(token, pos1Exclusions)
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
|
||||
return Math.max(1, Math.floor(token.frequencyRank));
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function computeTokenJlptLevel(
|
||||
token: MergedToken,
|
||||
getJlptLevel: (text: string) => JlptLevel | null,
|
||||
): JlptLevel | undefined {
|
||||
if (!isJlptEligibleToken(token)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const primaryLevel = getCachedJlptLevel(resolveJlptLookupText(token), getJlptLevel);
|
||||
const fallbackLevel =
|
||||
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
|
||||
|
||||
const level = primaryLevel ?? fallbackLevel ?? token.jlptLevel;
|
||||
return level ?? undefined;
|
||||
}
|
||||
|
||||
function hasPrioritizedNameMatch(
|
||||
token: MergedToken,
|
||||
options: Pick<AnnotationStageOptions, 'nameMatchEnabled'>,
|
||||
): boolean {
|
||||
return options.nameMatchEnabled !== false && token.isNameMatch === true;
|
||||
}
|
||||
|
||||
export function annotateTokens(
|
||||
@@ -442,36 +712,50 @@ export function annotateTokens(
|
||||
const pos1Exclusions = resolvePos1Exclusions(options);
|
||||
const pos2Exclusions = resolvePos2Exclusions(options);
|
||||
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
|
||||
const knownMarkedTokens = nPlusOneEnabled
|
||||
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: tokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
|
||||
const nameMatchEnabled = options.nameMatchEnabled !== false;
|
||||
const frequencyEnabled = options.frequencyEnabled !== false;
|
||||
const frequencyMarkedTokens = frequencyEnabled
|
||||
? applyFrequencyMarking(knownMarkedTokens, pos1Exclusions, pos2Exclusions)
|
||||
: knownMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
frequencyRank: undefined,
|
||||
}));
|
||||
|
||||
const jlptEnabled = options.jlptEnabled !== false;
|
||||
const jlptMarkedTokens = jlptEnabled
|
||||
? applyJlptMarking(frequencyMarkedTokens, deps.getJlptLevel)
|
||||
: frequencyMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
jlptLevel: undefined,
|
||||
}));
|
||||
|
||||
// Single pass: compute known word status, frequency filtering, and JLPT level together
|
||||
const annotated = tokens.map((token) => {
|
||||
if (
|
||||
sharedShouldExcludeTokenFromSubtitleAnnotations(token, {
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
})
|
||||
) {
|
||||
return sharedStripSubtitleAnnotationMetadata(token, {
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
});
|
||||
}
|
||||
|
||||
const prioritizedNameMatch = nameMatchEnabled && token.isNameMatch === true;
|
||||
const isKnown = nPlusOneEnabled
|
||||
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: false;
|
||||
|
||||
const frequencyRank =
|
||||
frequencyEnabled && !prioritizedNameMatch
|
||||
? filterTokenFrequencyRank(token, pos1Exclusions, pos2Exclusions)
|
||||
: undefined;
|
||||
|
||||
const jlptLevel =
|
||||
jlptEnabled && !prioritizedNameMatch
|
||||
? computeTokenJlptLevel(token, deps.getJlptLevel)
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
...token,
|
||||
isKnown,
|
||||
isNPlusOneTarget: nPlusOneEnabled && !prioritizedNameMatch ? token.isNPlusOneTarget : false,
|
||||
frequencyRank,
|
||||
jlptLevel,
|
||||
};
|
||||
});
|
||||
|
||||
if (!nPlusOneEnabled) {
|
||||
return jlptMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
return annotated;
|
||||
}
|
||||
|
||||
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
|
||||
@@ -482,10 +766,25 @@ export function annotateTokens(
|
||||
? minSentenceWordsForNPlusOne
|
||||
: 3;
|
||||
|
||||
return markNPlusOneTargets(
|
||||
jlptMarkedTokens,
|
||||
const nPlusOneMarked = markNPlusOneTargets(
|
||||
annotated,
|
||||
sanitizedMinSentenceWordsForNPlusOne,
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
);
|
||||
|
||||
if (!nameMatchEnabled) {
|
||||
return nPlusOneMarked;
|
||||
}
|
||||
|
||||
return nPlusOneMarked.map((token) =>
|
||||
hasPrioritizedNameMatch(token, options)
|
||||
? {
|
||||
...token,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
}
|
||||
: token,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -212,3 +212,57 @@ test('merges trailing katakana continuation without headword into previous token
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
// Regression: merged content+function token candidate must not beat a multi-token split
|
||||
// candidate that preserves the content token as a standalone frequency-eligible unit.
|
||||
// Background: Yomitan scanning can produce a single-token candidate where a content word
|
||||
// is merged with trailing function particles (e.g. かかってこいよ → headword かかってくる).
|
||||
// When a competing multi-token candidate splits content and function separately, the
|
||||
// multi-token candidate should win so the content token remains frequency-highlightable.
|
||||
test('multi-token candidate beats single merged content+function token candidate (frequency regression)', () => {
|
||||
// Candidate A: single merged token — content verb fused with trailing sentence-final particle
|
||||
// This is the "bad" candidate: downstream annotation would exclude frequency for the whole
|
||||
// token because the merged pos1 would contain a function-word component.
|
||||
const mergedCandidate = makeParseItem('scanning-parser', [
|
||||
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
|
||||
]);
|
||||
|
||||
// Candidate B: two tokens — content verb surface + particle separately.
|
||||
// The content token is frequency-eligible on its own.
|
||||
const splitCandidate = makeParseItem('scanning-parser', [
|
||||
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
|
||||
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
|
||||
]);
|
||||
|
||||
// When merged candidate comes first in the array, multi-token split still wins.
|
||||
const tokens = selectYomitanParseTokens(
|
||||
[mergedCandidate, splitCandidate],
|
||||
() => false,
|
||||
'headword',
|
||||
);
|
||||
assert.equal(tokens?.length, 2);
|
||||
assert.equal(tokens?.[0]?.surface, 'かかってこい');
|
||||
assert.equal(tokens?.[0]?.headword, 'かかってくる');
|
||||
assert.equal(tokens?.[1]?.surface, 'よ');
|
||||
});
|
||||
|
||||
test('multi-token candidate beats single merged content+function token regardless of input order', () => {
|
||||
const mergedCandidate = makeParseItem('scanning-parser', [
|
||||
[{ text: 'かかってこいよ', reading: 'かかってこいよ', headword: 'かかってくる' }],
|
||||
]);
|
||||
|
||||
const splitCandidate = makeParseItem('scanning-parser', [
|
||||
[{ text: 'かかってこい', reading: 'かかってこい', headword: 'かかってくる' }],
|
||||
[{ text: 'よ', reading: 'よ', headword: 'よ' }],
|
||||
]);
|
||||
|
||||
// Split candidate comes first — should still win over merged.
|
||||
const tokens = selectYomitanParseTokens(
|
||||
[splitCandidate, mergedCandidate],
|
||||
() => false,
|
||||
'headword',
|
||||
);
|
||||
assert.equal(tokens?.length, 2);
|
||||
assert.equal(tokens?.[0]?.surface, 'かかってこい');
|
||||
assert.equal(tokens?.[1]?.surface, 'よ');
|
||||
});
|
||||
|
||||
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { PartOfSpeech } from '../../../types';
|
||||
|
||||
function normalizePosTag(value: string | null | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
|
||||
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
|
||||
}
|
||||
|
||||
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
|
||||
switch (normalizePosTag(pos1)) {
|
||||
case '名詞':
|
||||
return PartOfSpeech.noun;
|
||||
case '動詞':
|
||||
return PartOfSpeech.verb;
|
||||
case '形容詞':
|
||||
return PartOfSpeech.i_adjective;
|
||||
case '形状詞':
|
||||
case '形容動詞':
|
||||
return PartOfSpeech.na_adjective;
|
||||
case '助詞':
|
||||
return PartOfSpeech.particle;
|
||||
case '助動詞':
|
||||
return PartOfSpeech.bound_auxiliary;
|
||||
case '記号':
|
||||
case '補助記号':
|
||||
return PartOfSpeech.symbol;
|
||||
default:
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
}
|
||||
|
||||
export function deriveStoredPartOfSpeech(input: {
|
||||
partOfSpeech?: string | null;
|
||||
pos1?: string | null;
|
||||
}): PartOfSpeech {
|
||||
const pos1Parts = normalizePosTag(input.pos1)
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
|
||||
if (pos1Parts.length > 0) {
|
||||
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
|
||||
if (derivedParts.length === 1) {
|
||||
return derivedParts[0]!;
|
||||
}
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
|
||||
if (isPartOfSpeechValue(input.partOfSpeech)) {
|
||||
return input.partOfSpeech;
|
||||
}
|
||||
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
352
src/core/services/tokenizer/subtitle-annotation-filter.ts
Normal file
352
src/core/services/tokenizer/subtitle-annotation-filter.ts
Normal file
@@ -0,0 +1,352 @@
|
||||
import {
|
||||
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
|
||||
resolveAnnotationPos1ExclusionSet,
|
||||
} from '../../../token-pos1-exclusions';
|
||||
import {
|
||||
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
|
||||
resolveAnnotationPos2ExclusionSet,
|
||||
} from '../../../token-pos2-exclusions';
|
||||
import { MergedToken, PartOfSpeech } from '../../../types';
|
||||
import { shouldIgnoreJlptByTerm } from '../jlpt-token-filter';
|
||||
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
|
||||
'ああ',
|
||||
'ええ',
|
||||
'うう',
|
||||
'おお',
|
||||
'はあ',
|
||||
'はは',
|
||||
'へえ',
|
||||
'ふう',
|
||||
'ほう',
|
||||
]);
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
|
||||
'だ',
|
||||
'です',
|
||||
'でした',
|
||||
'だった',
|
||||
'では',
|
||||
'じゃ',
|
||||
'でしょう',
|
||||
'だろう',
|
||||
] as const;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
|
||||
'',
|
||||
'か',
|
||||
'ね',
|
||||
'よ',
|
||||
'な',
|
||||
'よね',
|
||||
'かな',
|
||||
'かね',
|
||||
] as const;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES = ['か', 'かな', 'かね'] as const;
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
|
||||
(particle) => `${prefix}${core}${particle}`,
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
|
||||
'って',
|
||||
'ってよ',
|
||||
'ってね',
|
||||
'ってな',
|
||||
'ってさ',
|
||||
'ってか',
|
||||
'ってば',
|
||||
]);
|
||||
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
|
||||
|
||||
export interface SubtitleAnnotationFilterOptions {
|
||||
pos1Exclusions?: ReadonlySet<string>;
|
||||
pos2Exclusions?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
function normalizePosTag(pos: string | undefined): string {
|
||||
return typeof pos === 'string' ? pos.trim() : '';
|
||||
}
|
||||
|
||||
function splitNormalizedTagParts(normalizedTag: string): string[] {
|
||||
if (!normalizedTag) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return normalizedTag
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
}
|
||||
|
||||
function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<string>): boolean {
|
||||
const parts = splitNormalizedTagParts(normalizedTag);
|
||||
if (parts.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return parts.every((part) => exclusions.has(part));
|
||||
}
|
||||
|
||||
function resolvePos1Exclusions(
|
||||
options: SubtitleAnnotationFilterOptions = {},
|
||||
): ReadonlySet<string> {
|
||||
if (options.pos1Exclusions) {
|
||||
return options.pos1Exclusions;
|
||||
}
|
||||
|
||||
return resolveAnnotationPos1ExclusionSet(DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG);
|
||||
}
|
||||
|
||||
function resolvePos2Exclusions(
|
||||
options: SubtitleAnnotationFilterOptions = {},
|
||||
): ReadonlySet<string> {
|
||||
if (options.pos2Exclusions) {
|
||||
return options.pos2Exclusions;
|
||||
}
|
||||
|
||||
return resolveAnnotationPos2ExclusionSet(DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG);
|
||||
}
|
||||
|
||||
function normalizeKana(text: string): string {
|
||||
const raw = text.trim();
|
||||
if (!raw) {
|
||||
return '';
|
||||
}
|
||||
|
||||
let normalized = '';
|
||||
for (const char of raw) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
|
||||
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
|
||||
continue;
|
||||
}
|
||||
|
||||
normalized += char;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function isKanaChar(char: string): boolean {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
}
|
||||
|
||||
function isTrailingSmallTsuKanaSfx(text: string): boolean {
|
||||
const normalized = normalizeKana(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (chars.length < 2 || chars.length > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return chars[chars.length - 1] === 'っ';
|
||||
}
|
||||
|
||||
function isReduplicatedKanaSfx(text: string): boolean {
|
||||
const normalized = normalizeKana(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
if (chars.length < 4 || chars.length % 2 !== 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!chars.every(isKanaChar)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const half = chars.length / 2;
|
||||
return chars.slice(0, half).join('') === chars.slice(half).join('');
|
||||
}
|
||||
|
||||
function isReduplicatedKanaSfxWithOptionalTrailingTo(text: string): boolean {
|
||||
const normalized = normalizeKana(text);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isReduplicatedKanaSfx(normalized)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (normalized.length <= 1 || !normalized.endsWith('と')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return isReduplicatedKanaSfx(normalized.slice(0, -1));
|
||||
}
|
||||
|
||||
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
|
||||
const normalizedSurface = normalizeKana(token.surface);
|
||||
const normalizedHeadword = normalizeKana(token.headword);
|
||||
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const suffix = normalizedSurface.slice(normalizedHeadword.length);
|
||||
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
|
||||
if (pos1Parts.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const [leadingPos1, ...trailingPos1] = pos1Parts;
|
||||
if (!leadingPos1 || resolvePos1Exclusions().has(leadingPos1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
|
||||
}
|
||||
|
||||
function isAuxiliaryStemGrammarTailToken(token: MergedToken): boolean {
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
|
||||
if (pos1Parts.length === 0 || !pos1Parts.every((part) => AUXILIARY_STEM_GRAMMAR_TAIL_POS1.has(part))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos3Parts = splitNormalizedTagParts(normalizePosTag(token.pos3));
|
||||
return pos3Parts.includes('助動詞語幹');
|
||||
}
|
||||
|
||||
function isExcludedByTerm(token: MergedToken): boolean {
|
||||
const candidates = [token.surface, token.reading, token.headword].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const trimmed = candidate.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const normalized = normalizeKana(trimmed);
|
||||
if (!normalized) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.some((prefix) =>
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES.some(
|
||||
(suffix) => normalized === `${prefix}${suffix}`,
|
||||
),
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmed) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalized) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmed) ||
|
||||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalized) ||
|
||||
shouldIgnoreJlptByTerm(trimmed) ||
|
||||
shouldIgnoreJlptByTerm(normalized)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
isTrailingSmallTsuKanaSfx(trimmed) ||
|
||||
isTrailingSmallTsuKanaSfx(normalized) ||
|
||||
isReduplicatedKanaSfxWithOptionalTrailingTo(trimmed) ||
|
||||
isReduplicatedKanaSfxWithOptionalTrailingTo(normalized)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function shouldExcludeTokenFromSubtitleAnnotations(
|
||||
token: MergedToken,
|
||||
options: SubtitleAnnotationFilterOptions = {},
|
||||
): boolean {
|
||||
const pos1Exclusions = resolvePos1Exclusions(options);
|
||||
const pos2Exclusions = resolvePos2Exclusions(options);
|
||||
const normalizedPos1 = normalizePosTag(token.pos1);
|
||||
const normalizedPos2 = normalizePosTag(token.pos2);
|
||||
const hasPos1 = normalizedPos1.length > 0;
|
||||
const hasPos2 = normalizedPos2.length > 0;
|
||||
|
||||
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isExcludedByTagSet(normalizedPos2, pos2Exclusions)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
!hasPos1 &&
|
||||
!hasPos2 &&
|
||||
(token.partOfSpeech === PartOfSpeech.particle ||
|
||||
token.partOfSpeech === PartOfSpeech.bound_auxiliary ||
|
||||
token.partOfSpeech === PartOfSpeech.symbol)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isAuxiliaryStemGrammarTailToken(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isExcludedTrailingParticleMergedToken(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return isExcludedByTerm(token);
|
||||
}
|
||||
|
||||
export function stripSubtitleAnnotationMetadata(
|
||||
token: MergedToken,
|
||||
options: SubtitleAnnotationFilterOptions = {},
|
||||
): MergedToken {
|
||||
if (!shouldExcludeTokenFromSubtitleAnnotations(token, options)) {
|
||||
return token;
|
||||
}
|
||||
|
||||
return {
|
||||
...token,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: false,
|
||||
jlptLevel: undefined,
|
||||
frequencyRank: undefined,
|
||||
};
|
||||
}
|
||||
@@ -188,6 +188,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
hasReading: true,
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 0,
|
||||
frequency: 77,
|
||||
@@ -197,6 +198,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
|
||||
{
|
||||
term: '鍛える',
|
||||
reading: 'きたえる',
|
||||
hasReading: false,
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 1,
|
||||
frequency: 46961,
|
||||
@@ -217,9 +219,11 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async
|
||||
|
||||
assert.equal(result.length, 2);
|
||||
assert.equal(result[0]?.term, '猫');
|
||||
assert.equal(result[0]?.hasReading, true);
|
||||
assert.equal(result[0]?.frequency, 77);
|
||||
assert.equal(result[0]?.dictionaryPriority, 0);
|
||||
assert.equal(result[1]?.term, '鍛える');
|
||||
assert.equal(result[1]?.hasReading, false);
|
||||
assert.equal(result[1]?.frequency, 2847);
|
||||
assert.match(scriptValue, /getTermFrequencies/);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
@@ -247,6 +251,96 @@ test('requestYomitanTermFrequencies prefers primary rank from displayValue array
|
||||
assert.equal(result[0]?.frequency, 7141);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies prefers primary rank from displayValue string pair when raw frequency matches trailing count', async () => {
|
||||
const deps = createDeps(async () => [
|
||||
{
|
||||
term: '潜む',
|
||||
reading: 'ひそむ',
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 0,
|
||||
frequency: 121,
|
||||
displayValue: '118,121',
|
||||
displayValueParsed: false,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0]?.term, '潜む');
|
||||
assert.equal(result[0]?.frequency, 118);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies uses leading display digits for displayValue strings', async () => {
|
||||
const deps = createDeps(async () => [
|
||||
{
|
||||
term: '例',
|
||||
reading: 'れい',
|
||||
dictionary: 'freq-dict',
|
||||
dictionaryPriority: 0,
|
||||
frequency: 1234,
|
||||
displayValue: '1,234',
|
||||
displayValueParsed: false,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '例', reading: 'れい' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0]?.term, '例');
|
||||
assert.equal(result[0]?.frequency, 1);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies ignores occurrence-based dictionaries for rank tagging', async () => {
|
||||
let metadataScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('getTermFrequencies')) {
|
||||
return [
|
||||
{
|
||||
term: '潜む',
|
||||
reading: 'ひそむ',
|
||||
dictionary: 'CC100',
|
||||
frequency: 118121,
|
||||
displayValue: null,
|
||||
displayValueParsed: false,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
if (script.includes('optionsGetFull')) {
|
||||
metadataScript = script;
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profileIndex: 0,
|
||||
scanLength: 40,
|
||||
dictionaries: ['CC100'],
|
||||
dictionaryPriorityByName: { CC100: 0 },
|
||||
dictionaryFrequencyModeByName: { CC100: 'occurrence-based' },
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [{ name: 'CC100', enabled: true, id: 0 }],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return [];
|
||||
});
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.deepEqual(result, []);
|
||||
assert.match(metadataScript, /getDictionaryInfo/);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies requests term-only fallback only after reading miss', async () => {
|
||||
const frequencyScripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
@@ -485,6 +579,317 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of
|
||||
assert.match(scannerScript ?? '', /deinflect:\s*true/);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens extracts best frequency rank from selected termsFind entry', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profileIndex: 0,
|
||||
scanLength: 40,
|
||||
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
|
||||
dictionaryPriorityByName: {
|
||||
'JPDBv2㋕': 0,
|
||||
Jiten: 1,
|
||||
CC100: 2,
|
||||
},
|
||||
dictionaryFrequencyModeByName: {
|
||||
'JPDBv2㋕': 'rank-based',
|
||||
Jiten: 'rank-based',
|
||||
CC100: 'rank-based',
|
||||
},
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [
|
||||
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
|
||||
{ name: 'Jiten', enabled: true, id: 1 },
|
||||
{ name: 'CC100', enabled: true, id: 2 },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens('潜み', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action !== 'termsFind') {
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
}
|
||||
|
||||
const text = (params as { text?: string } | undefined)?.text ?? '';
|
||||
if (!text.startsWith('潜み')) {
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
originalTextLength: 2,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '潜む',
|
||||
reading: 'ひそむ',
|
||||
sources: [{ originalText: '潜み', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
frequencies: [
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'JPDBv2㋕',
|
||||
frequency: 20181,
|
||||
displayValue: '4073,20181句',
|
||||
},
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'Jiten',
|
||||
frequency: 28594,
|
||||
displayValue: '4592,28594句',
|
||||
},
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'CC100',
|
||||
frequency: 118121,
|
||||
displayValue: null,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
assert.deepEqual(result, [
|
||||
{
|
||||
surface: '潜み',
|
||||
reading: 'ひそ',
|
||||
headword: '潜む',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
isNameMatch: false,
|
||||
frequencyRank: 4073,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens uses frequency from later exact-match entry when first exact entry has none', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profileIndex: 0,
|
||||
scanLength: 40,
|
||||
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
|
||||
dictionaryPriorityByName: {
|
||||
'JPDBv2㋕': 0,
|
||||
Jiten: 1,
|
||||
CC100: 2,
|
||||
},
|
||||
dictionaryFrequencyModeByName: {
|
||||
'JPDBv2㋕': 'rank-based',
|
||||
Jiten: 'rank-based',
|
||||
CC100: 'rank-based',
|
||||
},
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [
|
||||
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
|
||||
{ name: 'Jiten', enabled: true, id: 1 },
|
||||
{ name: 'CC100', enabled: true, id: 2 },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens('者', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action !== 'termsFind') {
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
}
|
||||
|
||||
const text = (params as { text?: string } | undefined)?.text ?? '';
|
||||
if (!text.startsWith('者')) {
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
originalTextLength: 1,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '者',
|
||||
reading: 'もの',
|
||||
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
frequencies: [],
|
||||
},
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '者',
|
||||
reading: 'もの',
|
||||
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
frequencies: [
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'JPDBv2㋕',
|
||||
frequency: 79601,
|
||||
displayValue: '475,79601句',
|
||||
},
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'Jiten',
|
||||
frequency: 338,
|
||||
displayValue: '338',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
assert.deepEqual(result, [
|
||||
{
|
||||
surface: '者',
|
||||
reading: 'もの',
|
||||
headword: '者',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
isNameMatch: false,
|
||||
frequencyRank: 475,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens can use frequency from later exact secondary-match entry', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profileIndex: 0,
|
||||
scanLength: 40,
|
||||
dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'],
|
||||
dictionaryPriorityByName: {
|
||||
'JPDBv2㋕': 0,
|
||||
Jiten: 1,
|
||||
CC100: 2,
|
||||
},
|
||||
dictionaryFrequencyModeByName: {
|
||||
'JPDBv2㋕': 'rank-based',
|
||||
Jiten: 'rank-based',
|
||||
CC100: 'rank-based',
|
||||
},
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
dictionaries: [
|
||||
{ name: 'JPDBv2㋕', enabled: true, id: 0 },
|
||||
{ name: 'Jiten', enabled: true, id: 1 },
|
||||
{ name: 'CC100', enabled: true, id: 2 },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens('者', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action !== 'termsFind') {
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
}
|
||||
|
||||
const text = (params as { text?: string } | undefined)?.text ?? '';
|
||||
if (!text.startsWith('者')) {
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
originalTextLength: 1,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '者',
|
||||
reading: 'もの',
|
||||
sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
frequencies: [],
|
||||
},
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '者',
|
||||
reading: 'もの',
|
||||
sources: [{ originalText: '者', isPrimary: false, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
frequencies: [
|
||||
{
|
||||
headwordIndex: 0,
|
||||
dictionary: 'JPDBv2㋕',
|
||||
frequency: 79601,
|
||||
displayValue: '475,79601句',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
assert.deepEqual(result, [
|
||||
{
|
||||
surface: '者',
|
||||
reading: 'もの',
|
||||
headword: '者',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
isNameMatch: false,
|
||||
frequencyRank: 475,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('optionsGetFull')) {
|
||||
|
||||
@@ -20,19 +20,24 @@ interface YomitanParserRuntimeDeps {
|
||||
createYomitanExtensionWindow?: (pageName: string) => Promise<BrowserWindow | null>;
|
||||
}
|
||||
|
||||
type YomitanFrequencyMode = 'occurrence-based' | 'rank-based';
|
||||
|
||||
export interface YomitanDictionaryInfo {
|
||||
title: string;
|
||||
revision?: string | number;
|
||||
frequencyMode?: YomitanFrequencyMode;
|
||||
}
|
||||
|
||||
export interface YomitanTermFrequency {
|
||||
term: string;
|
||||
reading: string | null;
|
||||
hasReading: boolean;
|
||||
dictionary: string;
|
||||
dictionaryPriority: number;
|
||||
frequency: number;
|
||||
displayValue: string | null;
|
||||
displayValueParsed: boolean;
|
||||
frequencyDerivedFromDisplayValue: boolean;
|
||||
}
|
||||
|
||||
export interface YomitanTermReadingPair {
|
||||
@@ -47,6 +52,7 @@ export interface YomitanScanToken {
|
||||
startPos: number;
|
||||
endPos: number;
|
||||
isNameMatch?: boolean;
|
||||
frequencyRank?: number;
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
@@ -54,6 +60,7 @@ interface YomitanProfileMetadata {
|
||||
scanLength: number;
|
||||
dictionaries: string[];
|
||||
dictionaryPriorityByName: Record<string, number>;
|
||||
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>;
|
||||
}
|
||||
|
||||
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
|
||||
@@ -78,7 +85,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
|
||||
typeof entry.headword === 'string' &&
|
||||
typeof entry.startPos === 'number' &&
|
||||
typeof entry.endPos === 'number' &&
|
||||
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
|
||||
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean') &&
|
||||
(entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number'),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -117,24 +125,22 @@ function parsePositiveFrequencyString(value: string): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0];
|
||||
if (!numericPrefix) {
|
||||
const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
|
||||
if (!numericMatch) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const chunks = numericPrefix.split(',');
|
||||
const normalizedNumber =
|
||||
chunks.length <= 1
|
||||
? (chunks[0] ?? '')
|
||||
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
|
||||
? chunks.join('')
|
||||
: (chunks[0] ?? '');
|
||||
const parsed = Number.parseInt(normalizedNumber, 10);
|
||||
const parsed = Number.parseFloat(numericMatch);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
const normalized = Math.floor(parsed);
|
||||
if (!Number.isFinite(normalized) || normalized <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function parsePositiveFrequencyValue(value: unknown): number | null {
|
||||
@@ -159,6 +165,19 @@ function parsePositiveFrequencyValue(value: unknown): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseDisplayFrequencyValue(value: unknown): number | null {
|
||||
if (typeof value === 'string') {
|
||||
const leadingDigits = value.trim().match(/^\d+/)?.[0];
|
||||
if (!leadingDigits) {
|
||||
return null;
|
||||
}
|
||||
const parsed = Number.parseInt(leadingDigits, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
||||
}
|
||||
|
||||
return parsePositiveFrequencyValue(value);
|
||||
}
|
||||
|
||||
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
if (!isObject(value)) {
|
||||
return null;
|
||||
@@ -170,7 +189,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
const displayValueRaw = value.displayValue;
|
||||
const parsedDisplayFrequency =
|
||||
displayValueRaw !== null && displayValueRaw !== undefined
|
||||
? parsePositiveFrequencyValue(displayValueRaw)
|
||||
? parseDisplayFrequencyValue(displayValueRaw)
|
||||
: null;
|
||||
const frequency = parsedDisplayFrequency ?? rawFrequency;
|
||||
if (!term || !dictionary || frequency === null) {
|
||||
@@ -184,17 +203,20 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
|
||||
const reading =
|
||||
value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
|
||||
const hasReading = value.hasReading === false ? false : reading !== null;
|
||||
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
|
||||
const displayValueParsed = value.displayValueParsed === true;
|
||||
|
||||
return {
|
||||
term,
|
||||
reading,
|
||||
hasReading,
|
||||
dictionary,
|
||||
dictionaryPriority,
|
||||
frequency,
|
||||
displayValue,
|
||||
displayValueParsed,
|
||||
frequencyDerivedFromDisplayValue: parsedDisplayFrequency !== null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -300,17 +322,34 @@ function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null
|
||||
}
|
||||
}
|
||||
|
||||
const dictionaryFrequencyModeByNameRaw = value.dictionaryFrequencyModeByName;
|
||||
const dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>> = {};
|
||||
if (isObject(dictionaryFrequencyModeByNameRaw)) {
|
||||
for (const [name, frequencyModeRaw] of Object.entries(dictionaryFrequencyModeByNameRaw)) {
|
||||
const normalizedName = name.trim();
|
||||
if (!normalizedName) {
|
||||
continue;
|
||||
}
|
||||
if (frequencyModeRaw !== 'occurrence-based' && frequencyModeRaw !== 'rank-based') {
|
||||
continue;
|
||||
}
|
||||
dictionaryFrequencyModeByName[normalizedName] = frequencyModeRaw;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
profileIndex,
|
||||
scanLength,
|
||||
dictionaries,
|
||||
dictionaryPriorityByName,
|
||||
dictionaryFrequencyModeByName,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeFrequencyEntriesWithPriority(
|
||||
rawResult: unknown[],
|
||||
dictionaryPriorityByName: Record<string, number>,
|
||||
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
|
||||
): YomitanTermFrequency[] {
|
||||
const normalized: YomitanTermFrequency[] = [];
|
||||
for (const entry of rawResult) {
|
||||
@@ -319,6 +358,10 @@ function normalizeFrequencyEntriesWithPriority(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (dictionaryFrequencyModeByName[frequency.dictionary] === 'occurrence-based') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary];
|
||||
normalized.push({
|
||||
...frequency,
|
||||
@@ -425,8 +468,34 @@ async function requestYomitanProfileMetadata(
|
||||
acc[entry.name] = index;
|
||||
return acc;
|
||||
}, {});
|
||||
let dictionaryFrequencyModeByName = {};
|
||||
try {
|
||||
const dictionaryInfo = await invoke("getDictionaryInfo", undefined);
|
||||
dictionaryFrequencyModeByName = Array.isArray(dictionaryInfo)
|
||||
? dictionaryInfo.reduce((acc, entry) => {
|
||||
if (!entry || typeof entry !== "object" || typeof entry.title !== "string") {
|
||||
return acc;
|
||||
}
|
||||
if (
|
||||
entry.frequencyMode === "occurrence-based" ||
|
||||
entry.frequencyMode === "rank-based"
|
||||
) {
|
||||
acc[entry.title] = entry.frequencyMode;
|
||||
}
|
||||
return acc;
|
||||
}, {})
|
||||
: {};
|
||||
} catch {
|
||||
dictionaryFrequencyModeByName = {};
|
||||
}
|
||||
|
||||
return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName };
|
||||
return {
|
||||
profileIndex,
|
||||
scanLength,
|
||||
dictionaries,
|
||||
dictionaryPriorityByName,
|
||||
dictionaryFrequencyModeByName
|
||||
};
|
||||
})();
|
||||
`;
|
||||
|
||||
@@ -774,7 +843,133 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
function getPreferredHeadword(dictionaryEntries, token) {
|
||||
function parsePositiveFrequencyNumber(value) {
|
||||
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
||||
return Math.max(1, Math.floor(value));
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
const numericMatch = value.trim().match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0];
|
||||
if (!numericMatch) { return null; }
|
||||
const parsed = Number.parseFloat(numericMatch);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) { return null; }
|
||||
return Math.max(1, Math.floor(parsed));
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) {
|
||||
const parsed = parsePositiveFrequencyNumber(item);
|
||||
if (parsed !== null) { return parsed; }
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
function parseDisplayFrequencyNumber(value) {
|
||||
if (typeof value === 'string') {
|
||||
const leadingDigits = value.trim().match(/^\d+/)?.[0];
|
||||
if (!leadingDigits) { return null; }
|
||||
const parsed = Number.parseInt(leadingDigits, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
||||
}
|
||||
return parsePositiveFrequencyNumber(value);
|
||||
}
|
||||
function getFrequencyDictionaryName(frequency) {
|
||||
const candidates = [
|
||||
frequency?.dictionary,
|
||||
frequency?.dictionaryName,
|
||||
frequency?.name,
|
||||
frequency?.title,
|
||||
frequency?.dictionaryTitle,
|
||||
frequency?.dictionaryAlias
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === 'string' && candidate.trim().length > 0) {
|
||||
return candidate.trim();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
function getBestFrequencyRank(dictionaryEntry, headwordIndex, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
|
||||
let best = null;
|
||||
const headwordCount = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords.length : 0;
|
||||
for (const frequency of dictionaryEntry?.frequencies || []) {
|
||||
if (!frequency || typeof frequency !== 'object') { continue; }
|
||||
const frequencyHeadwordIndex = frequency.headwordIndex;
|
||||
if (typeof frequencyHeadwordIndex === 'number') {
|
||||
if (frequencyHeadwordIndex !== headwordIndex) { continue; }
|
||||
} else if (headwordCount > 1) {
|
||||
continue;
|
||||
}
|
||||
const dictionary = getFrequencyDictionaryName(frequency);
|
||||
if (!dictionary) { continue; }
|
||||
if (dictionaryFrequencyModeByName[dictionary] === 'occurrence-based') { continue; }
|
||||
const rank =
|
||||
parseDisplayFrequencyNumber(frequency.displayValue) ??
|
||||
parsePositiveFrequencyNumber(frequency.frequency);
|
||||
if (rank === null) { continue; }
|
||||
const priorityRaw = dictionaryPriorityByName[dictionary];
|
||||
const fallbackPriority =
|
||||
typeof frequency.dictionaryIndex === 'number' && Number.isFinite(frequency.dictionaryIndex)
|
||||
? Math.max(0, Math.floor(frequency.dictionaryIndex))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const priority =
|
||||
typeof priorityRaw === 'number' && Number.isFinite(priorityRaw)
|
||||
? Math.max(0, Math.floor(priorityRaw))
|
||||
: fallbackPriority;
|
||||
if (best === null || priority < best.priority || (priority === best.priority && rank < best.rank)) {
|
||||
best = { priority, rank };
|
||||
}
|
||||
}
|
||||
return best?.rank ?? null;
|
||||
}
|
||||
function hasExactSource(headword, token, requirePrimary) {
|
||||
for (const src of headword.sources || []) {
|
||||
if (src.originalText !== token) { continue; }
|
||||
if (requirePrimary && !src.isPrimary) { continue; }
|
||||
if (src.matchType !== 'exact') { continue; }
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
function collectExactHeadwordMatches(dictionaryEntries, token, requirePrimary) {
|
||||
const matches = [];
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
const headwords = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords : [];
|
||||
for (let headwordIndex = 0; headwordIndex < headwords.length; headwordIndex += 1) {
|
||||
const headword = headwords[headwordIndex];
|
||||
if (!hasExactSource(headword, token, requirePrimary)) { continue; }
|
||||
matches.push({ dictionaryEntry, headword, headwordIndex });
|
||||
}
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
function sameHeadword(match, preferredMatch) {
|
||||
if (!match || !preferredMatch) {
|
||||
return false;
|
||||
}
|
||||
if (match.headword?.term !== preferredMatch.headword?.term) {
|
||||
return false;
|
||||
}
|
||||
const matchReading = typeof match.headword?.reading === 'string' ? match.headword.reading : '';
|
||||
const preferredReading =
|
||||
typeof preferredMatch.headword?.reading === 'string' ? preferredMatch.headword.reading : '';
|
||||
return matchReading === preferredReading;
|
||||
}
|
||||
function getBestFrequencyRankForMatches(matches, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
|
||||
let best = null;
|
||||
for (const match of matches) {
|
||||
const rank = getBestFrequencyRank(
|
||||
match.dictionaryEntry,
|
||||
match.headwordIndex,
|
||||
dictionaryPriorityByName,
|
||||
dictionaryFrequencyModeByName
|
||||
);
|
||||
if (rank === null) { continue; }
|
||||
if (best === null || rank < best) {
|
||||
best = rank;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
function getPreferredHeadword(dictionaryEntries, token, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
|
||||
function appendDictionaryNames(target, value) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return;
|
||||
@@ -813,36 +1008,33 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
}
|
||||
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
|
||||
}
|
||||
function hasExactPrimarySource(headword, token) {
|
||||
for (const src of headword.sources || []) {
|
||||
if (src.originalText !== token) { continue; }
|
||||
if (!src.isPrimary) { continue; }
|
||||
if (src.matchType !== 'exact') { continue; }
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const exactPrimaryMatches = collectExactHeadwordMatches(dictionaryEntries, token, true);
|
||||
let matchedNameDictionary = false;
|
||||
if (includeNameMatchMetadata) {
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
for (const match of exactPrimaryMatches) {
|
||||
if (match.dictionaryEntry !== dictionaryEntry) { continue; }
|
||||
matchedNameDictionary = true;
|
||||
break;
|
||||
}
|
||||
if (matchedNameDictionary) { break; }
|
||||
}
|
||||
}
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
return {
|
||||
term: headword.term,
|
||||
reading: headword.reading,
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
|
||||
};
|
||||
}
|
||||
const preferredMatch = exactPrimaryMatches[0];
|
||||
if (preferredMatch) {
|
||||
const exactFrequencyMatches = collectExactHeadwordMatches(dictionaryEntries, token, false)
|
||||
.filter((match) => sameHeadword(match, preferredMatch));
|
||||
return {
|
||||
term: preferredMatch.headword.term,
|
||||
reading: preferredMatch.headword.reading,
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(preferredMatch.dictionaryEntry),
|
||||
frequencyRank: getBestFrequencyRankForMatches(
|
||||
exactFrequencyMatches.length > 0 ? exactFrequencyMatches : exactPrimaryMatches,
|
||||
dictionaryPriorityByName,
|
||||
dictionaryFrequencyModeByName
|
||||
)
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -853,6 +1045,8 @@ function buildYomitanScanningScript(
|
||||
profileIndex: number,
|
||||
scanLength: number,
|
||||
includeNameMatchMetadata: boolean,
|
||||
dictionaryPriorityByName: Record<string, number>,
|
||||
dictionaryFrequencyModeByName: Partial<Record<string, YomitanFrequencyMode>>,
|
||||
): string {
|
||||
return `
|
||||
(async () => {
|
||||
@@ -876,6 +1070,8 @@ function buildYomitanScanningScript(
|
||||
});
|
||||
${YOMITAN_SCANNING_HELPERS}
|
||||
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
|
||||
const dictionaryPriorityByName = ${JSON.stringify(dictionaryPriorityByName)};
|
||||
const dictionaryFrequencyModeByName = ${JSON.stringify(dictionaryFrequencyModeByName)};
|
||||
const text = ${JSON.stringify(text)};
|
||||
const details = {matchType: "exact", deinflect: true};
|
||||
const tokens = [];
|
||||
@@ -889,7 +1085,12 @@ ${YOMITAN_SCANNING_HELPERS}
|
||||
const originalTextLength = typeof result?.originalTextLength === "number" ? result.originalTextLength : 0;
|
||||
if (dictionaryEntries.length > 0 && originalTextLength > 0 && (originalTextLength !== character.length || isCodePointJapanese(codePoint))) {
|
||||
const source = substring.substring(0, originalTextLength);
|
||||
const preferredHeadword = getPreferredHeadword(dictionaryEntries, source);
|
||||
const preferredHeadword = getPreferredHeadword(
|
||||
dictionaryEntries,
|
||||
source,
|
||||
dictionaryPriorityByName,
|
||||
dictionaryFrequencyModeByName
|
||||
);
|
||||
if (preferredHeadword && typeof preferredHeadword.term === "string") {
|
||||
const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : "";
|
||||
const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source);
|
||||
@@ -900,6 +1101,10 @@ ${YOMITAN_SCANNING_HELPERS}
|
||||
startPos: i,
|
||||
endPos: i + originalTextLength,
|
||||
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
|
||||
frequencyRank:
|
||||
typeof preferredHeadword.frequencyRank === "number" && Number.isFinite(preferredHeadword.frequencyRank)
|
||||
? Math.max(1, Math.floor(preferredHeadword.frequencyRank))
|
||||
: undefined,
|
||||
});
|
||||
i += originalTextLength;
|
||||
continue;
|
||||
@@ -1036,6 +1241,8 @@ export async function requestYomitanScanTokens(
|
||||
profileIndex,
|
||||
scanLength,
|
||||
options?.includeNameMatchMetadata === true,
|
||||
metadata?.dictionaryPriorityByName ?? {},
|
||||
metadata?.dictionaryFrequencyModeByName ?? {},
|
||||
),
|
||||
true,
|
||||
);
|
||||
@@ -1099,7 +1306,11 @@ async function fetchYomitanTermFrequencies(
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
return Array.isArray(rawResult)
|
||||
? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName)
|
||||
? normalizeFrequencyEntriesWithPriority(
|
||||
rawResult,
|
||||
metadata.dictionaryPriorityByName,
|
||||
metadata.dictionaryFrequencyModeByName,
|
||||
)
|
||||
: [];
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
@@ -1541,10 +1752,15 @@ export async function getYomitanDictionaryInfo(
|
||||
.map((entry) => {
|
||||
const title = typeof entry.title === 'string' ? entry.title.trim() : '';
|
||||
const revision = entry.revision;
|
||||
const frequencyMode: YomitanFrequencyMode | undefined =
|
||||
entry.frequencyMode === 'occurrence-based' || entry.frequencyMode === 'rank-based'
|
||||
? entry.frequencyMode
|
||||
: undefined;
|
||||
return {
|
||||
title,
|
||||
revision:
|
||||
typeof revision === 'string' || typeof revision === 'number' ? revision : undefined,
|
||||
frequencyMode,
|
||||
};
|
||||
})
|
||||
.filter((entry) => entry.title.length > 0);
|
||||
@@ -1763,3 +1979,34 @@ export async function removeYomitanDictionarySettings(
|
||||
|
||||
return await setYomitanSettingsFull(optionsFull, deps, logger);
|
||||
}
|
||||
|
||||
export async function addYomitanNoteViaSearch(
|
||||
word: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<number | null> {
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const escapedWord = JSON.stringify(word);
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
if (typeof window.__subminerAddNote !== 'function') {
|
||||
throw new Error('Yomitan search page bridge not initialized');
|
||||
}
|
||||
return await window.__subminerAddNote(${escapedWord});
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const noteId = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
return typeof noteId === 'number' ? noteId : null;
|
||||
} catch (err) {
|
||||
logger.error('Yomitan addNoteFromWord failed:', (err as Error).message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user