mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-02 18:22:42 -08:00
make pretty
This commit is contained in:
@@ -46,23 +46,31 @@ export function pruneRetention(
|
||||
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
|
||||
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
|
||||
|
||||
const deletedSessionEvents = (db
|
||||
.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`)
|
||||
.run(eventCutoff) as { changes: number }).changes;
|
||||
const deletedTelemetryRows = (db
|
||||
.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`)
|
||||
.run(telemetryCutoff) as { changes: number }).changes;
|
||||
const deletedDailyRows = (db
|
||||
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
|
||||
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }).changes;
|
||||
const deletedMonthlyRows = (db
|
||||
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
|
||||
.run(toMonthKey(monthCutoff)) as { changes: number }).changes;
|
||||
const deletedEndedSessions = (db
|
||||
.prepare(
|
||||
`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`,
|
||||
)
|
||||
.run(telemetryCutoff) as { changes: number }).changes;
|
||||
const deletedSessionEvents = (
|
||||
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
|
||||
changes: number;
|
||||
}
|
||||
).changes;
|
||||
const deletedTelemetryRows = (
|
||||
db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff) as {
|
||||
changes: number;
|
||||
}
|
||||
).changes;
|
||||
const deletedDailyRows = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
|
||||
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
|
||||
).changes;
|
||||
const deletedMonthlyRows = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
|
||||
.run(toMonthKey(monthCutoff)) as { changes: number }
|
||||
).changes;
|
||||
const deletedEndedSessions = (
|
||||
db
|
||||
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
|
||||
.run(telemetryCutoff) as { changes: number }
|
||||
).changes;
|
||||
|
||||
return {
|
||||
deletedSessionEvents,
|
||||
|
||||
@@ -17,6 +17,9 @@ test('extractLineVocabulary returns words and unique kanji', () => {
|
||||
new Set(result.words.map((entry) => `${entry.headword}/${entry.word}`)),
|
||||
new Set(['hello/hello', '你好/你好', '猫/猫']),
|
||||
);
|
||||
assert.equal(result.words.every((entry) => entry.reading === ''), true);
|
||||
assert.equal(
|
||||
result.words.every((entry) => entry.reading === ''),
|
||||
true,
|
||||
);
|
||||
assert.deepEqual(new Set(result.kanji), new Set(['你', '好', '猫']));
|
||||
});
|
||||
|
||||
@@ -97,7 +97,8 @@ export function extractLineVocabulary(value: string): ExtractedLineVocabulary {
|
||||
if (!cleaned) return { words: [], kanji: [] };
|
||||
|
||||
const wordSet = new Set<string>();
|
||||
const tokenPattern = /[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
|
||||
const tokenPattern =
|
||||
/[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
|
||||
const rawWords = cleaned.match(tokenPattern) ?? [];
|
||||
for (const rawWord of rawWords) {
|
||||
const normalizedWord = normalizeText(rawWord.toLowerCase());
|
||||
|
||||
@@ -19,15 +19,8 @@ export function startSessionRecord(
|
||||
CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
`,
|
||||
)
|
||||
.run(
|
||||
sessionUuid,
|
||||
videoId,
|
||||
startedAtMs,
|
||||
SESSION_STATUS_ACTIVE,
|
||||
startedAtMs,
|
||||
nowMs,
|
||||
);
|
||||
)
|
||||
.run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, nowMs);
|
||||
const sessionId = Number(result.lastInsertRowid);
|
||||
return {
|
||||
sessionId,
|
||||
|
||||
@@ -59,9 +59,7 @@ testIfSqlite('ensureSchema creates immersion core tables', () => {
|
||||
assert.ok(tableNames.has('imm_rollup_state'));
|
||||
|
||||
const rollupStateRow = db
|
||||
.prepare(
|
||||
'SELECT state_value FROM imm_rollup_state WHERE state_key = ?',
|
||||
)
|
||||
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
|
||||
.get('last_rollup_sample_ms') as {
|
||||
state_value: number;
|
||||
} | null;
|
||||
@@ -188,7 +186,9 @@ testIfSqlite('executeQueuedWrite inserts and upserts word and kanji rows', () =>
|
||||
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
|
||||
|
||||
const wordRow = db
|
||||
.prepare('SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?')
|
||||
.prepare(
|
||||
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
|
||||
)
|
||||
.get('猫') as {
|
||||
headword: string;
|
||||
frequency: number;
|
||||
|
||||
@@ -426,11 +426,7 @@ export function getOrCreateVideoRecord(
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE video_id = ?
|
||||
`,
|
||||
).run(
|
||||
details.canonicalTitle || 'unknown',
|
||||
Date.now(),
|
||||
existing.video_id,
|
||||
);
|
||||
).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id);
|
||||
return existing.video_id;
|
||||
}
|
||||
|
||||
|
||||
@@ -129,7 +129,11 @@ interface QueuedKanjiWrite {
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export type QueuedWrite = QueuedTelemetryWrite | QueuedEventWrite | QueuedWordWrite | QueuedKanjiWrite;
|
||||
export type QueuedWrite =
|
||||
| QueuedTelemetryWrite
|
||||
| QueuedEventWrite
|
||||
| QueuedWordWrite
|
||||
| QueuedKanjiWrite;
|
||||
|
||||
export interface VideoMetadata {
|
||||
sourceType: number;
|
||||
|
||||
@@ -31,7 +31,10 @@ test('createJlptVocabularyLookup loads JLPT bank entries and resolves known leve
|
||||
assert.equal(lookup('猫'), 'N5');
|
||||
assert.equal(lookup('犬'), 'N5');
|
||||
assert.equal(lookup('鳥'), null);
|
||||
assert.equal(logs.some((entry) => entry.includes('JLPT dictionary loaded from')), true);
|
||||
assert.equal(
|
||||
logs.some((entry) => entry.includes('JLPT dictionary loaded from')),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test('createJlptVocabularyLookup does not require synchronous fs APIs', async () => {
|
||||
|
||||
@@ -53,7 +53,9 @@ function parseAssStartTimes(content: string): number[] {
|
||||
const starts: number[] = [];
|
||||
const lines = content.split(/\r?\n/);
|
||||
for (const line of lines) {
|
||||
const match = line.match(/^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/);
|
||||
const match = line.match(
|
||||
/^Dialogue:[^,]*,(\d+:\d{2}:\d{2}\.\d{1,2}),\d+:\d{2}:\d{2}\.\d{1,2},/,
|
||||
);
|
||||
if (!match) continue;
|
||||
const [hoursRaw, minutesRaw, secondsRaw] = match[1]!.split(':');
|
||||
if (secondsRaw === undefined) continue;
|
||||
|
||||
@@ -2370,7 +2370,6 @@ test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async
|
||||
assert.equal(frequencyCalls, 1);
|
||||
});
|
||||
|
||||
|
||||
test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and frequency annotations', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'になれば',
|
||||
|
||||
@@ -92,13 +92,14 @@ interface TokenizerAnnotationOptions {
|
||||
pos2Exclusions: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
let parserEnrichmentWorkerRuntimeModulePromise:
|
||||
| Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
|
||||
| null = null;
|
||||
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
|
||||
let parserEnrichmentFallbackModulePromise:
|
||||
| Promise<typeof import('./tokenizer/parser-enrichment-stage')>
|
||||
| null = null;
|
||||
let parserEnrichmentWorkerRuntimeModulePromise: Promise<
|
||||
typeof import('./tokenizer/parser-enrichment-worker-runtime')
|
||||
> | null = null;
|
||||
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null =
|
||||
null;
|
||||
let parserEnrichmentFallbackModulePromise: Promise<
|
||||
typeof import('./tokenizer/parser-enrichment-stage')
|
||||
> | null = null;
|
||||
const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet(
|
||||
DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG,
|
||||
);
|
||||
@@ -106,7 +107,10 @@ const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet(
|
||||
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
|
||||
);
|
||||
|
||||
function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
|
||||
function getKnownWordLookup(
|
||||
deps: TokenizerServiceDeps,
|
||||
options: TokenizerAnnotationOptions,
|
||||
): (text: string) => boolean {
|
||||
if (!options.nPlusOneEnabled) {
|
||||
return () => false;
|
||||
}
|
||||
@@ -126,7 +130,8 @@ async function enrichTokensWithMecabAsync(
|
||||
mecabTokens: MergedToken[] | null,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!parserEnrichmentWorkerRuntimeModulePromise) {
|
||||
parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
|
||||
parserEnrichmentWorkerRuntimeModulePromise =
|
||||
import('./tokenizer/parser-enrichment-worker-runtime');
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -185,8 +190,7 @@ export function createTokenizerDepsRuntime(
|
||||
getNPlusOneEnabled: options.getNPlusOneEnabled,
|
||||
getJlptEnabled: options.getJlptEnabled,
|
||||
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
|
||||
getFrequencyDictionaryMatchMode:
|
||||
options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
|
||||
getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
|
||||
getFrequencyRank: options.getFrequencyRank,
|
||||
getMinSentenceWordsForNPlusOne: options.getMinSentenceWordsForNPlusOne ?? (() => 3),
|
||||
getYomitanGroupDebugEnabled: options.getYomitanGroupDebugEnabled ?? (() => false),
|
||||
@@ -348,7 +352,8 @@ function buildYomitanFrequencyRankMap(
|
||||
continue;
|
||||
}
|
||||
const dictionaryPriority =
|
||||
typeof frequency.dictionaryPriority === 'number' && Number.isFinite(frequency.dictionaryPriority)
|
||||
typeof frequency.dictionaryPriority === 'number' &&
|
||||
Number.isFinite(frequency.dictionaryPriority)
|
||||
? Math.max(0, Math.floor(frequency.dictionaryPriority))
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
const current = rankByTerm.get(normalizedTerm);
|
||||
@@ -489,7 +494,11 @@ async function parseWithYomitanInternalParser(
|
||||
normalizedSelectedTokens,
|
||||
frequencyMatchMode,
|
||||
);
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(
|
||||
termReadingList,
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
return buildYomitanFrequencyRankMap(yomitanFrequencies);
|
||||
})()
|
||||
: Promise.resolve(new Map<string, number>());
|
||||
|
||||
@@ -101,7 +101,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',
|
||||
|
||||
let sentinelFilterCalls = 0;
|
||||
const originalFilter = Array.prototype.filter;
|
||||
Array.prototype.filter = (function filterWithSentinelCheck(
|
||||
Array.prototype.filter = function filterWithSentinelCheck(
|
||||
this: unknown[],
|
||||
...args: any[]
|
||||
): any[] {
|
||||
@@ -113,7 +113,7 @@ test('enrichTokensWithMecabPos1 avoids repeated active-candidate filter scans',
|
||||
}
|
||||
}
|
||||
return (originalFilter as (...params: any[]) => any[]).apply(this, args);
|
||||
}) as typeof Array.prototype.filter;
|
||||
} as typeof Array.prototype.filter;
|
||||
|
||||
try {
|
||||
const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
|
||||
@@ -182,7 +182,8 @@ function pickClosestMecabPosMetadataBySurface(
|
||||
startDistance < bestSurfaceMatchDistance ||
|
||||
(startDistance === bestSurfaceMatchDistance &&
|
||||
(endDistance < bestSurfaceMatchEndDistance ||
|
||||
(endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
|
||||
(endDistance === bestSurfaceMatchEndDistance &&
|
||||
candidate.index < bestSurfaceMatchIndex)))
|
||||
) {
|
||||
bestSurfaceMatchDistance = startDistance;
|
||||
bestSurfaceMatchEndDistance = endDistance;
|
||||
@@ -199,7 +200,8 @@ function pickClosestMecabPosMetadataBySurface(
|
||||
startDistance < bestSurfaceMatchDistance ||
|
||||
(startDistance === bestSurfaceMatchDistance &&
|
||||
(endDistance < bestSurfaceMatchEndDistance ||
|
||||
(endDistance === bestSurfaceMatchEndDistance && candidate.index < bestSurfaceMatchIndex)))
|
||||
(endDistance === bestSurfaceMatchEndDistance &&
|
||||
candidate.index < bestSurfaceMatchIndex)))
|
||||
) {
|
||||
bestSurfaceMatchDistance = startDistance;
|
||||
bestSurfaceMatchEndDistance = endDistance;
|
||||
@@ -274,9 +276,15 @@ function pickClosestMecabPosMetadataByOverlap(
|
||||
const overlappingTokensByMecabOrder = overlappingTokens
|
||||
.slice()
|
||||
.sort((left, right) => left.index - right.index);
|
||||
const overlapPos1 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos1));
|
||||
const overlapPos2 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos2));
|
||||
const overlapPos3 = joinUniqueTags(overlappingTokensByMecabOrder.map((candidate) => candidate.pos3));
|
||||
const overlapPos1 = joinUniqueTags(
|
||||
overlappingTokensByMecabOrder.map((candidate) => candidate.pos1),
|
||||
);
|
||||
const overlapPos2 = joinUniqueTags(
|
||||
overlappingTokensByMecabOrder.map((candidate) => candidate.pos2),
|
||||
);
|
||||
const overlapPos3 = joinUniqueTags(
|
||||
overlappingTokensByMecabOrder.map((candidate) => candidate.pos3),
|
||||
);
|
||||
|
||||
return {
|
||||
pos1: overlapPos1 ?? bestToken.pos1,
|
||||
|
||||
@@ -39,7 +39,10 @@ interface YomitanProfileMetadata {
|
||||
|
||||
const DEFAULT_YOMITAN_SCAN_LENGTH = 40;
|
||||
const yomitanProfileMetadataByWindow = new WeakMap<BrowserWindow, YomitanProfileMetadata>();
|
||||
const yomitanFrequencyCacheByWindow = new WeakMap<BrowserWindow, Map<string, YomitanTermFrequency[]>>();
|
||||
const yomitanFrequencyCacheByWindow = new WeakMap<
|
||||
BrowserWindow,
|
||||
Map<string, YomitanTermFrequency[]>
|
||||
>();
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === 'object');
|
||||
@@ -87,7 +90,7 @@ function parsePositiveFrequencyString(value: string): number | null {
|
||||
const chunks = numericPrefix.split(',');
|
||||
const normalizedNumber =
|
||||
chunks.length <= 1
|
||||
? chunks[0] ?? ''
|
||||
? (chunks[0] ?? '')
|
||||
: chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk))
|
||||
? chunks.join('')
|
||||
: (chunks[0] ?? '');
|
||||
@@ -145,11 +148,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
: Number.MAX_SAFE_INTEGER;
|
||||
|
||||
const reading =
|
||||
value.reading === null
|
||||
? null
|
||||
: typeof value.reading === 'string'
|
||||
? value.reading
|
||||
: null;
|
||||
value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null;
|
||||
const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null;
|
||||
const displayValueParsed = value.displayValueParsed === true;
|
||||
|
||||
@@ -164,7 +163,9 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
|
||||
function normalizeTermReadingList(
|
||||
termReadingList: YomitanTermReadingPair[],
|
||||
): YomitanTermReadingPair[] {
|
||||
const normalized: YomitanTermReadingPair[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
@@ -174,7 +175,9 @@ function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): Yo
|
||||
continue;
|
||||
}
|
||||
const reading =
|
||||
typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
|
||||
typeof pair.reading === 'string' && pair.reading.trim().length > 0
|
||||
? pair.reading.trim()
|
||||
: null;
|
||||
const key = `${term}\u0000${reading ?? ''}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
@@ -298,7 +301,9 @@ function groupFrequencyEntriesByPair(
|
||||
const grouped = new Map<string, YomitanTermFrequency[]>();
|
||||
for (const entry of entries) {
|
||||
const reading =
|
||||
typeof entry.reading === 'string' && entry.reading.trim().length > 0 ? entry.reading.trim() : null;
|
||||
typeof entry.reading === 'string' && entry.reading.trim().length > 0
|
||||
? entry.reading.trim()
|
||||
: null;
|
||||
const key = makeTermReadingCacheKey(entry.term.trim(), reading);
|
||||
const existing = grouped.get(key);
|
||||
if (existing) {
|
||||
@@ -805,7 +810,11 @@ export async function requestYomitanTermFrequencies(
|
||||
);
|
||||
if (fallbackFetchResult !== null) {
|
||||
fallbackFetchedEntries = fallbackFetchResult;
|
||||
cacheFrequencyEntriesForPairs(frequencyCache, fallbackTermReadingList, fallbackFetchedEntries);
|
||||
cacheFrequencyEntriesForPairs(
|
||||
frequencyCache,
|
||||
fallbackTermReadingList,
|
||||
fallbackFetchedEntries,
|
||||
);
|
||||
}
|
||||
|
||||
for (const pair of missingTermReadingList) {
|
||||
@@ -829,7 +838,9 @@ export async function requestYomitanTermFrequencies(
|
||||
[...missingTermReadingList, ...fallbackTermReadingList].map((pair) => pair.term),
|
||||
);
|
||||
const cachedResult = buildCachedResult();
|
||||
const unmatchedEntries = allFetchedEntries.filter((entry) => !queriedTerms.has(entry.term.trim()));
|
||||
const unmatchedEntries = allFetchedEntries.filter(
|
||||
(entry) => !queriedTerms.has(entry.term.trim()),
|
||||
);
|
||||
return [...cachedResult, ...unmatchedEntries];
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user