make pretty

2026-06-09 15:13:32 -07:00 · 2026-03-02 02:45:51 -08:00
parent 83d21c4b6d
commit be4db24861
42 changed files with 395 additions and 336 deletions
@@ -46,23 +46,31 @@ export function pruneRetention(
  const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
  const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;

-  const deletedSessionEvents = (db
-    .prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`)
-    .run(eventCutoff) as { changes: number }).changes;
-  const deletedTelemetryRows = (db
-    .prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`)
-    .run(telemetryCutoff) as { changes: number }).changes;
-  const deletedDailyRows = (db
-    .prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
-    .run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }).changes;
-  const deletedMonthlyRows = (db
-    .prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
-    .run(toMonthKey(monthCutoff)) as { changes: number }).changes;
-  const deletedEndedSessions = (db
-    .prepare(
-      `DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`,
-    )
-    .run(telemetryCutoff) as { changes: number }).changes;
+  const deletedSessionEvents = (
+    db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
+      changes: number;
+    }
+  ).changes;
+  const deletedTelemetryRows = (
+    db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff) as {
+      changes: number;
+    }
+  ).changes;
+  const deletedDailyRows = (
+    db
+      .prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
+      .run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
+  ).changes;
+  const deletedMonthlyRows = (
+    db
+      .prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
+      .run(toMonthKey(monthCutoff)) as { changes: number }
+  ).changes;
+  const deletedEndedSessions = (
+    db
+      .prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
+      .run(telemetryCutoff) as { changes: number }
+  ).changes;

  return {
    deletedSessionEvents,
@@ -17,6 +17,9 @@ test('extractLineVocabulary returns words and unique kanji', () => {
    new Set(result.words.map((entry) => `${entry.headword}/${entry.word}`)),
    new Set(['hello/hello', '你好/你好', '猫/猫']),
  );
-  assert.equal(result.words.every((entry) => entry.reading === ''), true);
+  assert.equal(
+    result.words.every((entry) => entry.reading === ''),
+    true,
+  );
  assert.deepEqual(new Set(result.kanji), new Set(['你', '好', '猫']));
 });
@@ -97,7 +97,8 @@ export function extractLineVocabulary(value: string): ExtractedLineVocabulary {
  if (!cleaned) return { words: [], kanji: [] };

  const wordSet = new Set<string>();
-  const tokenPattern = /[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
+  const tokenPattern =
+    /[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
  const rawWords = cleaned.match(tokenPattern) ?? [];
  for (const rawWord of rawWords) {
    const normalizedWord = normalizeText(rawWord.toLowerCase());
@@ -19,15 +19,8 @@ export function startSessionRecord(
          CREATED_DATE, LAST_UPDATE_DATE
      ) VALUES (?, ?, ?, ?, ?, ?)
    `,
-  )
-    .run(
-      sessionUuid,
-      videoId,
-      startedAtMs,
-      SESSION_STATUS_ACTIVE,
-      startedAtMs,
-      nowMs,
-    );
+    )
+    .run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, nowMs);
  const sessionId = Number(result.lastInsertRowid);
  return {
    sessionId,
@@ -59,9 +59,7 @@ testIfSqlite('ensureSchema creates immersion core tables', () => {
    assert.ok(tableNames.has('imm_rollup_state'));

    const rollupStateRow = db
-      .prepare(
-        'SELECT state_value FROM imm_rollup_state WHERE state_key = ?',
-      )
+      .prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
      .get('last_rollup_sample_ms') as {
      state_value: number;
    } | null;
@@ -188,7 +186,9 @@ testIfSqlite('executeQueuedWrite inserts and upserts word and kanji rows', () =>
    stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);

    const wordRow = db
-      .prepare('SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?')
+      .prepare(
+        'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
+      )
      .get('猫') as {
      headword: string;
      frequency: number;
@@ -426,11 +426,7 @@ export function getOrCreateVideoRecord(
          LAST_UPDATE_DATE = ?
        WHERE video_id = ?
      `,
-    ).run(
-      details.canonicalTitle || 'unknown',
-      Date.now(),
-      existing.video_id,
-    );
+    ).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id);
    return existing.video_id;
  }

@@ -129,7 +129,11 @@ interface QueuedKanjiWrite {
  lastSeen: number;
 }

-export type QueuedWrite = QueuedTelemetryWrite | QueuedEventWrite | QueuedWordWrite | QueuedKanjiWrite;
+export type QueuedWrite =
+  | QueuedTelemetryWrite
+  | QueuedEventWrite
+  | QueuedWordWrite
+  | QueuedKanjiWrite;

 export interface VideoMetadata {
  sourceType: number;
@@ -31,7 +31,10 @@ test('createJlptVocabularyLookup loads JLPT bank entries and resolves known leve
  assert.equal(lookup('猫'), 'N5');
  assert.equal(lookup('犬'), 'N5');
  assert.equal(lookup('鳥'), null);
-  assert.equal(logs.some((entry) => entry.includes('JLPT dictionary loaded from')), true);
+  assert.equal(
+    logs.some((entry) => entry.includes('JLPT dictionary loaded from')),
+    true,
+  );
 });

 test('createJlptVocabularyLookup does not require synchronous fs APIs', async () => {
@@ -53,7 +53,9 @@ function parseAssStartTimes(content: string): number[] {
  const starts: number[] = [];
  const lines = content.split(/\r?\n/);
  for (const line of lines) {
-    const match = line.match(/^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/);
+    const match = line.match(
+      /^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/,
+    );
    if (!match) continue;
    const [hoursRaw, minutesRaw, secondsRaw] = match[1]!.split(':');
    if (secondsRaw === undefined) continue;
@@ -2370,7 +2370,6 @@ test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async
  assert.equal(frequencyCalls, 1);
 });

-
 test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and frequency annotations', async () => {
  const result = await tokenizeSubtitle(
    'になれば',
@@ -92,13 +92,14 @@ interface TokenizerAnnotationOptions {
  pos2Exclusions: ReadonlySet<string>;
 }

-let parserEnrichmentWorkerRuntimeModulePromise:
-  | Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
-  | null = null;
-let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
-let parserEnrichmentFallbackModulePromise:
-  | Promise<typeof import('./tokenizer/parser-enrichment-stage')>
-  | null = null;
+let parserEnrichmentWorkerRuntimeModulePromise: Promise<
+  typeof import('./tokenizer/parser-enrichment-worker-runtime')
+> | null = null;
+let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null =
+  null;
+let parserEnrichmentFallbackModulePromise: Promise<
+  typeof import('./tokenizer/parser-enrichment-stage')
+> | null = null;
 const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet(
  DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
 );
@@ -106,7 +107,10 @@ const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet(
  DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
 );

-function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
+function getKnownWordLookup(
+  deps: TokenizerServiceDeps,
+  options: TokenizerAnnotationOptions,
+): (text: string) => boolean {
  if (!options.nPlusOneEnabled) {
    return () => false;
  }
@@ -126,7 +130,8 @@ async function enrichTokensWithMecabAsync(
  mecabTokens: MergedToken[] | null,
 ): Promise<MergedToken[]> {
  if (!parserEnrichmentWorkerRuntimeModulePromise) {
-    parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
+    parserEnrichmentWorkerRuntimeModulePromise =
+      import('./tokenizer/parser-enrichment-worker-runtime');
  }

  try {
@@ -185,8 +190,7 @@ export function createTokenizerDepsRuntime(
    getNPlusOneEnabled: options.getNPlusOneEnabled,
    getJlptEnabled: options.getJlptEnabled,
    getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
-    getFrequencyDictionaryMatchMode:
-      options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
+    getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
    getFrequencyRank: options.getFrequencyRank,
    getMinSentenceWordsForNPlusOne: options.getMinSentenceWordsForNPlusOne ?? (() => 3),
    getYomitanGroupDebugEnabled: options.getYomitanGroupDebugEnabled ?? (() => false),
@@ -348,7 +352,8 @@ function buildYomitanFrequencyRankMap(
      continue;
    }
    const dictionaryPriority =
-      typeof frequency.dictionaryPriority === 'number' && Number.isFinite(frequency.dictionaryPriority)
+      typeof frequency.dictionaryPriority === 'number' &&
+      Number.isFinite(frequency.dictionaryPriority)
        ? Math.max(0, Math.floor(frequency.dictionaryPriority))
        : Number.MAX_SAFE_INTEGER;
    const current = rankByTerm.get(normalizedTerm);
@@ -489,7 +494,11 @@ async function parseWithYomitanInternalParser(
          normalizedSelectedTokens,
          frequencyMatchMode,
        );
-        const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
+        const yomitanFrequencies = await requestYomitanTermFrequencies(
+          termReadingList,
+          deps,
+          logger,
+        );
        return buildYomitanFrequencyRankMap(yomitanFrequencies);
      })()
    : Promise.resolve(new Map<string, number>());
@@ -101,7 +101,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',

  let sentinelFilterCalls = 0;
  const originalFilter = Array.prototype.filter;
-  Array.prototype.filter = (function filterWithSentinelCheck(
+  Array.prototype.filter = function filterWithSentinelCheck(
    this: unknown[],
    ...args: any[]
  ): any[] {
@@ -113,7 +113,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',
      }
    }
    return (originalFilter as (...params: any[]) => any[]).apply(this, args);
-  }) as typeof Array.prototype.filter;
+  } as typeof Array.prototype.filter;

  try {
    const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);
@@ -182,7 +182,8 @@ function pickClosestMecabPosMetadataBySurface(
        startDistance < bestSurfaceMatchDistance ||
        (startDistance === bestSurfaceMatchDistance &&
          (endDistance < bestSurfaceMatchEndDistance ||
-            (endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
+            (endDistance === bestSurfaceMatchEndDistance &&
+              candidate.index < bestSurfaceMatchIndex)))
      ) {
        bestSurfaceMatchDistance = startDistance;
        bestSurfaceMatchEndDistance = endDistance;
@@ -199,7 +200,8 @@ function pickClosestMecabPosMetadataBySurface(
        startDistance < bestSurfaceMatchDistance ||
        (startDistance === bestSurfaceMatchDistance &&
          (endDistance < bestSurfaceMatchEndDistance ||
-            (endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
+            (endDistance === bestSurfaceMatchEndDistance &&
+              candidate.index < bestSurfaceMatchIndex)))
      ) {
        bestSurfaceMatchDistance = startDistance;
        bestSurfaceMatchEndDistance = endDistance;
@@ -274,9 +276,15 @@ function pickClosestMecabPosMetadataByOverlap(
  const overlappingTokensByMecabOrder = overlappingTokens
    .slice()
    .sort((left, right) => left.index - right.index);
-  const overlapPos1 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos1));
-  const overlapPos2 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos2));
-  const overlapPos3 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos3));
+  const overlapPos1 = joinUniqueTags(
+    overlappingTokensByMecabOrder.map((candidate) => candidate.pos1),
+  );
+  const overlapPos2 = joinUniqueTags(
+    overlappingTokensByMecabOrder.map((candidate) => candidate.pos2),
+  );
+  const overlapPos3 = joinUniqueTags(
+    overlappingTokensByMecabOrder.map((candidate) => candidate.pos3),
+  );

  return {
    pos1: overlapPos1 ?? bestToken.pos1,
@@ -39,7 +39,10 @@ interface YomitanProfileMetadata {

 const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
 const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
-const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
+const yomitanFrequencyCacheByWindow = new WeakMap<
+  BrowserWindow,
+  Map<string, YomitanTermFrequency[]>
+>();

 function isObject(value: unknown): value is Record<string, unknown> {
  return Boolean(value && typeof value === 'object');
@@ -87,7 +90,7 @@ function parsePositiveFrequencyString(value: string): number | null {
  const chunks = numericPrefix.split(',');
  const normalizedNumber =
    chunks.length <= 1
-      ? chunks[0] ?? ''
+      ? (chunks[0] ?? '')
      : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
        ? chunks.join('')
        : (chunks[0] ?? '');
@@ -145,11 +148,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
      : Number.MAX_SAFE_INTEGER;

  const reading =
-    value.reading === null
-      ? null
-      : typeof value.reading === 'string'
-        ? value.reading
-        : null;
+    value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
  const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
  const displayValueParsed = value.displayValueParsed === true;

@@ -164,7 +163,9 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
  };
 }

-function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
+function normalizeTermReadingList(
+  termReadingList: YomitanTermReadingPair[],
+): YomitanTermReadingPair[] {
  const normalized: YomitanTermReadingPair[] = [];
  const seen = new Set<string>();

@@ -174,7 +175,9 @@ function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): Yo
      continue;
    }
    const reading =
-      typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
+      typeof pair.reading === 'string' && pair.reading.trim().length > 0
+        ? pair.reading.trim()
+        : null;
    const key = `${term}\u0000${reading ?? ''}`;
    if (seen.has(key)) {
      continue;
@@ -298,7 +301,9 @@ function groupFrequencyEntriesByPair(
  const grouped = new Map<string, YomitanTermFrequency[]>();
  for (const entry of entries) {
    const reading =
-      typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
+      typeof entry.reading === 'string' && entry.reading.trim().length > 0
+        ? entry.reading.trim()
+        : null;
    const key = makeTermReadingCacheKey(entry.term.trim(), reading);
    const existing = grouped.get(key);
    if (existing) {
@@ -805,7 +810,11 @@ export async function requestYomitanTermFrequencies(
    );
    if (fallbackFetchResult !== null) {
      fallbackFetchedEntries = fallbackFetchResult;
-      cacheFrequencyEntriesForPairs(frequencyCache, fallbackTermReadingList, fallbackFetchedEntries);
+      cacheFrequencyEntriesForPairs(
+        frequencyCache,
+        fallbackTermReadingList,
+        fallbackFetchedEntries,
+      );
    }

    for (const pair of missingTermReadingList) {
@@ -829,7 +838,9 @@ export async function requestYomitanTermFrequencies(
    [...missingTermReadingList, ...fallbackTermReadingList].map((pair) => pair.term),
  );
  const cachedResult = buildCachedResult();
-  const unmatchedEntries = allFetchedEntries.filter((entry) => !queriedTerms.has(entry.term.trim()));
+  const unmatchedEntries = allFetchedEntries.filter(
+    (entry) => !queriedTerms.has(entry.term.trim()),
+  );
  return [...cachedResult, ...unmatchedEntries];
 }