make pretty

This commit is contained in:
2026-03-02 02:45:51 -08:00
parent 83d21c4b6d
commit be4db24861
42 changed files with 395 additions and 336 deletions

View File

@@ -46,23 +46,31 @@ export function pruneRetention(
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
const deletedSessionEvents = (db
.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`)
.run(eventCutoff) as { changes: number }).changes;
const deletedTelemetryRows = (db
.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`)
.run(telemetryCutoff) as { changes: number }).changes;
const deletedDailyRows = (db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }).changes;
const deletedMonthlyRows = (db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(monthCutoff)) as { changes: number }).changes;
const deletedEndedSessions = (db
.prepare(
`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`,
)
.run(telemetryCutoff) as { changes: number }).changes;
const deletedSessionEvents = (
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
changes: number;
}
).changes;
const deletedTelemetryRows = (
db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff) as {
changes: number;
}
).changes;
const deletedDailyRows = (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
).changes;
const deletedMonthlyRows = (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(monthCutoff)) as { changes: number }
).changes;
const deletedEndedSessions = (
db
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
.run(telemetryCutoff) as { changes: number }
).changes;
return {
deletedSessionEvents,

View File

@@ -17,6 +17,9 @@ test('extractLineVocabulary returns words and unique kanji', () => {
new Set(result.words.map((entry) => `${entry.headword}/${entry.word}`)),
new Set(['hello/hello', '你好/你好', '猫/猫']),
);
assert.equal(result.words.every((entry) => entry.reading === ''), true);
assert.equal(
result.words.every((entry) => entry.reading === ''),
true,
);
assert.deepEqual(new Set(result.kanji), new Set(['你', '好', '猫']));
});

View File

@@ -97,7 +97,8 @@ export function extractLineVocabulary(value: string): ExtractedLineVocabulary {
if (!cleaned) return { words: [], kanji: [] };
const wordSet = new Set<string>();
const tokenPattern = /[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
const tokenPattern =
/[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
const rawWords = cleaned.match(tokenPattern) ?? [];
for (const rawWord of rawWords) {
const normalizedWord = normalizeText(rawWord.toLowerCase());

View File

@@ -19,15 +19,8 @@ export function startSessionRecord(
CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?)
`,
)
.run(
sessionUuid,
videoId,
startedAtMs,
SESSION_STATUS_ACTIVE,
startedAtMs,
nowMs,
);
)
.run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, nowMs);
const sessionId = Number(result.lastInsertRowid);
return {
sessionId,

View File

@@ -59,9 +59,7 @@ testIfSqlite('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_rollup_state'));
const rollupStateRow = db
.prepare(
'SELECT state_value FROM imm_rollup_state WHERE state_key = ?',
)
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
.get('last_rollup_sample_ms') as {
state_value: number;
} | null;
@@ -188,7 +186,9 @@ testIfSqlite('executeQueuedWrite inserts and upserts word and kanji rows', () =>
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
const wordRow = db
.prepare('SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?')
.prepare(
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
)
.get('猫') as {
headword: string;
frequency: number;

View File

@@ -426,11 +426,7 @@ export function getOrCreateVideoRecord(
LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(
details.canonicalTitle || 'unknown',
Date.now(),
existing.video_id,
);
).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id);
return existing.video_id;
}

View File

@@ -129,7 +129,11 @@ interface QueuedKanjiWrite {
lastSeen: number;
}
export type QueuedWrite = QueuedTelemetryWrite | QueuedEventWrite | QueuedWordWrite | QueuedKanjiWrite;
export type QueuedWrite =
| QueuedTelemetryWrite
| QueuedEventWrite
| QueuedWordWrite
| QueuedKanjiWrite;
export interface VideoMetadata {
sourceType: number;

View File

@@ -31,7 +31,10 @@ test('createJlptVocabularyLookup loads JLPT bank entries and resolves known leve
assert.equal(lookup('猫'), 'N5');
assert.equal(lookup('犬'), 'N5');
assert.equal(lookup('鳥'), null);
assert.equal(logs.some((entry) => entry.includes('JLPT dictionary loaded from')), true);
assert.equal(
logs.some((entry) => entry.includes('JLPT dictionary loaded from')),
true,
);
});
test('createJlptVocabularyLookup does not require synchronous fs APIs', async () => {

View File

@@ -53,7 +53,9 @@ function parseAssStartTimes(content: string): number[] {
const starts: number[] = [];
const lines = content.split(/\r?\n/);
for (const line of lines) {
const match = line.match(/^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/);
const match = line.match(
/^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/,
);
if (!match) continue;
const [hoursRaw, minutesRaw, secondsRaw] = match[1]!.split(':');
if (secondsRaw === undefined) continue;

View File

@@ -2370,7 +2370,6 @@ test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async
assert.equal(frequencyCalls, 1);
});
test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and frequency annotations', async () => {
const result = await tokenizeSubtitle(
'になれば',

View File

@@ -92,13 +92,14 @@ interface TokenizerAnnotationOptions {
pos2Exclusions: ReadonlySet<string>;
}
let parserEnrichmentWorkerRuntimeModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
| null = null;
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
let parserEnrichmentFallbackModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-stage')>
| null = null;
let parserEnrichmentWorkerRuntimeModulePromise: Promise<
typeof import('./tokenizer/parser-enrichment-worker-runtime')
> | null = null;
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null =
null;
let parserEnrichmentFallbackModulePromise: Promise<
typeof import('./tokenizer/parser-enrichment-stage')
> | null = null;
const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet(
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
);
@@ -106,7 +107,10 @@ const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet(
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
);
function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
function getKnownWordLookup(
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): (text: string) => boolean {
if (!options.nPlusOneEnabled) {
return () => false;
}
@@ -126,7 +130,8 @@ async function enrichTokensWithMecabAsync(
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
if (!parserEnrichmentWorkerRuntimeModulePromise) {
parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
parserEnrichmentWorkerRuntimeModulePromise =
import('./tokenizer/parser-enrichment-worker-runtime');
}
try {
@@ -185,8 +190,7 @@ export function createTokenizerDepsRuntime(
getNPlusOneEnabled: options.getNPlusOneEnabled,
getJlptEnabled: options.getJlptEnabled,
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
getFrequencyDictionaryMatchMode:
options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
getFrequencyRank: options.getFrequencyRank,
getMinSentenceWordsForNPlusOne: options.getMinSentenceWordsForNPlusOne ?? (() => 3),
getYomitanGroupDebugEnabled: options.getYomitanGroupDebugEnabled ?? (() => false),
@@ -348,7 +352,8 @@ function buildYomitanFrequencyRankMap(
continue;
}
const dictionaryPriority =
typeof frequency.dictionaryPriority === 'number' && Number.isFinite(frequency.dictionaryPriority)
typeof frequency.dictionaryPriority === 'number' &&
Number.isFinite(frequency.dictionaryPriority)
? Math.max(0, Math.floor(frequency.dictionaryPriority))
: Number.MAX_SAFE_INTEGER;
const current = rankByTerm.get(normalizedTerm);
@@ -489,7 +494,11 @@ async function parseWithYomitanInternalParser(
normalizedSelectedTokens,
frequencyMatchMode,
);
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
const yomitanFrequencies = await requestYomitanTermFrequencies(
termReadingList,
deps,
logger,
);
return buildYomitanFrequencyRankMap(yomitanFrequencies);
})()
: Promise.resolve(new Map<string, number>());

View File

@@ -101,7 +101,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',
let sentinelFilterCalls = 0;
const originalFilter = Array.prototype.filter;
Array.prototype.filter = (function filterWithSentinelCheck(
Array.prototype.filter = function filterWithSentinelCheck(
this: unknown[],
...args: any[]
): any[] {
@@ -113,7 +113,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',
}
}
return (originalFilter as (...params: any[]) => any[]).apply(this, args);
}) as typeof Array.prototype.filter;
} as typeof Array.prototype.filter;
try {
const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);

View File

@@ -182,7 +182,8 @@ function pickClosestMecabPosMetadataBySurface(
startDistance < bestSurfaceMatchDistance ||
(startDistance === bestSurfaceMatchDistance &&
(endDistance < bestSurfaceMatchEndDistance ||
(endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
(endDistance === bestSurfaceMatchEndDistance &&
candidate.index < bestSurfaceMatchIndex)))
) {
bestSurfaceMatchDistance = startDistance;
bestSurfaceMatchEndDistance = endDistance;
@@ -199,7 +200,8 @@ function pickClosestMecabPosMetadataBySurface(
startDistance < bestSurfaceMatchDistance ||
(startDistance === bestSurfaceMatchDistance &&
(endDistance < bestSurfaceMatchEndDistance ||
(endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
(endDistance === bestSurfaceMatchEndDistance &&
candidate.index < bestSurfaceMatchIndex)))
) {
bestSurfaceMatchDistance = startDistance;
bestSurfaceMatchEndDistance = endDistance;
@@ -274,9 +276,15 @@ function pickClosestMecabPosMetadataByOverlap(
const overlappingTokensByMecabOrder = overlappingTokens
.slice()
.sort((left, right) => left.index - right.index);
const overlapPos1 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos1));
const overlapPos2 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos2));
const overlapPos3 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos3));
const overlapPos1 = joinUniqueTags(
overlappingTokensByMecabOrder.map((candidate) => candidate.pos1),
);
const overlapPos2 = joinUniqueTags(
overlappingTokensByMecabOrder.map((candidate) => candidate.pos2),
);
const overlapPos3 = joinUniqueTags(
overlappingTokensByMecabOrder.map((candidate) => candidate.pos3),
);
return {
pos1: overlapPos1 ?? bestToken.pos1,

View File

@@ -39,7 +39,10 @@ interface YomitanProfileMetadata {
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
const yomitanFrequencyCacheByWindow = new WeakMap<
BrowserWindow,
Map<string, YomitanTermFrequency[]>
>();
function isObject(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === 'object');
@@ -87,7 +90,7 @@ function parsePositiveFrequencyString(value: string): number | null {
const chunks = numericPrefix.split(',');
const normalizedNumber =
chunks.length <= 1
? chunks[0] ?? ''
? (chunks[0] ?? '')
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
? chunks.join('')
: (chunks[0] ?? '');
@@ -145,11 +148,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
: Number.MAX_SAFE_INTEGER;
const reading =
value.reading === null
? null
: typeof value.reading === 'string'
? value.reading
: null;
value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
const displayValueParsed = value.displayValueParsed === true;
@@ -164,7 +163,9 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
};
}
function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
function normalizeTermReadingList(
termReadingList: YomitanTermReadingPair[],
): YomitanTermReadingPair[] {
const normalized: YomitanTermReadingPair[] = [];
const seen = new Set<string>();
@@ -174,7 +175,9 @@ function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): Yo
continue;
}
const reading =
typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
typeof pair.reading === 'string' && pair.reading.trim().length > 0
? pair.reading.trim()
: null;
const key = `${term}\u0000${reading ?? ''}`;
if (seen.has(key)) {
continue;
@@ -298,7 +301,9 @@ function groupFrequencyEntriesByPair(
const grouped = new Map<string, YomitanTermFrequency[]>();
for (const entry of entries) {
const reading =
typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
typeof entry.reading === 'string' && entry.reading.trim().length > 0
? entry.reading.trim()
: null;
const key = makeTermReadingCacheKey(entry.term.trim(), reading);
const existing = grouped.get(key);
if (existing) {
@@ -805,7 +810,11 @@ export async function requestYomitanTermFrequencies(
);
if (fallbackFetchResult !== null) {
fallbackFetchedEntries = fallbackFetchResult;
cacheFrequencyEntriesForPairs(frequencyCache, fallbackTermReadingList, fallbackFetchedEntries);
cacheFrequencyEntriesForPairs(
frequencyCache,
fallbackTermReadingList,
fallbackFetchedEntries,
);
}
for (const pair of missingTermReadingList) {
@@ -829,7 +838,9 @@ export async function requestYomitanTermFrequencies(
[...missingTermReadingList, ...fallbackTermReadingList].map((pair) => pair.term),
);
const cachedResult = buildCachedResult();
const unmatchedEntries = allFetchedEntries.filter((entry) => !queriedTerms.has(entry.term.trim()));
const unmatchedEntries = allFetchedEntries.filter(
(entry) => !queriedTerms.has(entry.term.trim()),
);
return [...cachedResult, ...unmatchedEntries];
}