mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-03 06:22:41 -08:00
fix(startup): async dictionary loading and unblock first tokenization
- move JLPT/frequency dictionary init off sync fs APIs and add cooperative yielding during entry processing - decouple first tokenization from full warmup by gating only on Yomitan readiness while MeCab/dictionary warmups continue in parallel - update mpv pause-until-ready OSD copy to tokenization-focused wording and refresh gate regression assertions
This commit is contained in:
@@ -129,3 +129,39 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r
|
||||
assert.equal(lookup('鍛える'), 3272);
|
||||
assert.equal(lookup('高み'), 9933);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => {
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||
fs.writeFileSync(bankPath, JSON.stringify([['猫', 1, { frequency: { displayValue: 42 } }]]));
|
||||
|
||||
const readFileSync = fs.readFileSync;
|
||||
const readdirSync = fs.readdirSync;
|
||||
const statSync = fs.statSync;
|
||||
const existsSync = fs.existsSync;
|
||||
(fs as unknown as Record<string, unknown>).readFileSync = () => {
|
||||
throw new Error('sync read disabled');
|
||||
};
|
||||
(fs as unknown as Record<string, unknown>).readdirSync = () => {
|
||||
throw new Error('sync readdir disabled');
|
||||
};
|
||||
(fs as unknown as Record<string, unknown>).statSync = () => {
|
||||
throw new Error('sync stat disabled');
|
||||
};
|
||||
(fs as unknown as Record<string, unknown>).existsSync = () => {
|
||||
throw new Error('sync exists disabled');
|
||||
};
|
||||
|
||||
try {
|
||||
const lookup = await createFrequencyDictionaryLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: () => undefined,
|
||||
});
|
||||
assert.equal(lookup('猫'), 42);
|
||||
} finally {
|
||||
(fs as unknown as Record<string, unknown>).readFileSync = readFileSync;
|
||||
(fs as unknown as Record<string, unknown>).readdirSync = readdirSync;
|
||||
(fs as unknown as Record<string, unknown>).statSync = statSync;
|
||||
(fs as unknown as Record<string, unknown>).existsSync = existsSync;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import * as fs from 'node:fs';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
|
||||
export interface FrequencyDictionaryLookupOptions {
|
||||
@@ -13,6 +13,17 @@ interface FrequencyDictionaryEntry {
|
||||
|
||||
const FREQUENCY_BANK_FILE_GLOB = /^term_meta_bank_.*\.json$/;
|
||||
const NOOP_LOOKUP = (): null => null;
|
||||
const ENTRY_YIELD_INTERVAL = 5000;
|
||||
|
||||
function isErrorCode(error: unknown, code: string): boolean {
|
||||
return Boolean(error && typeof error === 'object' && (error as { code?: unknown }).code === code);
|
||||
}
|
||||
|
||||
async function yieldToEventLoop(): Promise<void> {
|
||||
await new Promise<void>((resolve) => {
|
||||
setImmediate(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeFrequencyTerm(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
@@ -93,16 +104,22 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
|
||||
};
|
||||
}
|
||||
|
||||
function addEntriesToMap(
|
||||
async function addEntriesToMap(
|
||||
rawEntries: unknown,
|
||||
terms: Map<string, number>,
|
||||
): { duplicateCount: number } {
|
||||
): Promise<{ duplicateCount: number }> {
|
||||
if (!Array.isArray(rawEntries)) {
|
||||
return { duplicateCount: 0 };
|
||||
}
|
||||
|
||||
let duplicateCount = 0;
|
||||
let processedCount = 0;
|
||||
for (const rawEntry of rawEntries) {
|
||||
processedCount += 1;
|
||||
if (processedCount % ENTRY_YIELD_INTERVAL === 0) {
|
||||
await yieldToEventLoop();
|
||||
}
|
||||
|
||||
const entry = asFrequencyDictionaryEntry(rawEntry);
|
||||
if (!entry) {
|
||||
continue;
|
||||
@@ -119,15 +136,15 @@ function addEntriesToMap(
|
||||
return { duplicateCount };
|
||||
}
|
||||
|
||||
function collectDictionaryFromPath(
|
||||
async function collectDictionaryFromPath(
|
||||
dictionaryPath: string,
|
||||
log: (message: string) => void,
|
||||
): Map<string, number> {
|
||||
): Promise<Map<string, number>> {
|
||||
const terms = new Map<string, number>();
|
||||
|
||||
let fileNames: string[];
|
||||
try {
|
||||
fileNames = fs.readdirSync(dictionaryPath);
|
||||
fileNames = await fs.readdir(dictionaryPath);
|
||||
} catch (error) {
|
||||
log(`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`);
|
||||
return terms;
|
||||
@@ -143,7 +160,7 @@ function collectDictionaryFromPath(
|
||||
const bankPath = path.join(dictionaryPath, bankFile);
|
||||
let rawText: string;
|
||||
try {
|
||||
rawText = fs.readFileSync(bankPath, 'utf-8');
|
||||
rawText = await fs.readFile(bankPath, 'utf-8');
|
||||
} catch {
|
||||
log(`Failed to read frequency dictionary file ${bankPath}`);
|
||||
continue;
|
||||
@@ -151,6 +168,7 @@ function collectDictionaryFromPath(
|
||||
|
||||
let rawEntries: unknown;
|
||||
try {
|
||||
await yieldToEventLoop();
|
||||
rawEntries = JSON.parse(rawText) as unknown;
|
||||
} catch {
|
||||
log(`Failed to parse frequency dictionary file as JSON: ${bankPath}`);
|
||||
@@ -158,7 +176,7 @@ function collectDictionaryFromPath(
|
||||
}
|
||||
|
||||
const beforeSize = terms.size;
|
||||
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
|
||||
const { duplicateCount } = await addEntriesToMap(rawEntries, terms);
|
||||
if (duplicateCount > 0) {
|
||||
log(
|
||||
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
|
||||
@@ -185,11 +203,11 @@ export async function createFrequencyDictionaryLookup(
|
||||
let isDirectory = false;
|
||||
|
||||
try {
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
isDirectory = (await fs.stat(dictionaryPath)).isDirectory();
|
||||
} catch (error) {
|
||||
if (isErrorCode(error, 'ENOENT')) {
|
||||
continue;
|
||||
}
|
||||
isDirectory = fs.statSync(dictionaryPath).isDirectory();
|
||||
} catch (error) {
|
||||
options.log(
|
||||
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
|
||||
);
|
||||
@@ -201,7 +219,7 @@ export async function createFrequencyDictionaryLookup(
|
||||
}
|
||||
|
||||
foundDictionaryPathCount += 1;
|
||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
const terms = await collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
if (terms.size > 0) {
|
||||
options.log(`Frequency dictionary loaded from ${dictionaryPath} (${terms.size} entries)`);
|
||||
return (term: string): number | null => {
|
||||
|
||||
72
src/core/services/jlpt-vocab.test.ts
Normal file
72
src/core/services/jlpt-vocab.test.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
|
||||
import { createJlptVocabularyLookup } from './jlpt-vocab';
|
||||
|
||||
test('createJlptVocabularyLookup loads JLPT bank entries and resolves known levels', async () => {
|
||||
const logs: string[] = [];
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-jlpt-dict-'));
|
||||
fs.writeFileSync(
|
||||
path.join(tempDir, 'term_meta_bank_5.json'),
|
||||
JSON.stringify([
|
||||
['猫', 1, { frequency: { displayValue: 1 } }],
|
||||
['犬', 2, { frequency: { displayValue: 2 } }],
|
||||
]),
|
||||
);
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_1.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_2.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_3.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_4.json'), JSON.stringify([]));
|
||||
|
||||
const lookup = await createJlptVocabularyLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(lookup('猫'), 'N5');
|
||||
assert.equal(lookup('犬'), 'N5');
|
||||
assert.equal(lookup('鳥'), null);
|
||||
assert.equal(logs.some((entry) => entry.includes('JLPT dictionary loaded from')), true);
|
||||
});
|
||||
|
||||
test('createJlptVocabularyLookup does not require synchronous fs APIs', async () => {
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-jlpt-dict-'));
|
||||
fs.writeFileSync(
|
||||
path.join(tempDir, 'term_meta_bank_4.json'),
|
||||
JSON.stringify([['見る', 1, { frequency: { displayValue: 3 } }]]),
|
||||
);
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_1.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_2.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_3.json'), JSON.stringify([]));
|
||||
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_5.json'), JSON.stringify([]));
|
||||
|
||||
const readFileSync = fs.readFileSync;
|
||||
const statSync = fs.statSync;
|
||||
const existsSync = fs.existsSync;
|
||||
(fs as unknown as Record<string, unknown>).readFileSync = () => {
|
||||
throw new Error('sync read disabled');
|
||||
};
|
||||
(fs as unknown as Record<string, unknown>).statSync = () => {
|
||||
throw new Error('sync stat disabled');
|
||||
};
|
||||
(fs as unknown as Record<string, unknown>).existsSync = () => {
|
||||
throw new Error('sync exists disabled');
|
||||
};
|
||||
|
||||
try {
|
||||
const lookup = await createJlptVocabularyLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: () => undefined,
|
||||
});
|
||||
assert.equal(lookup('見る'), 'N4');
|
||||
} finally {
|
||||
(fs as unknown as Record<string, unknown>).readFileSync = readFileSync;
|
||||
(fs as unknown as Record<string, unknown>).statSync = statSync;
|
||||
(fs as unknown as Record<string, unknown>).existsSync = existsSync;
|
||||
}
|
||||
});
|
||||
@@ -1,4 +1,4 @@
|
||||
import * as fs from 'fs';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'path';
|
||||
|
||||
import type { JlptLevel } from '../../types';
|
||||
@@ -24,6 +24,17 @@ const JLPT_LEVEL_PRECEDENCE: Record<JlptLevel, number> = {
|
||||
};
|
||||
|
||||
const NOOP_LOOKUP = (): null => null;
|
||||
const ENTRY_YIELD_INTERVAL = 5000;
|
||||
|
||||
function isErrorCode(error: unknown, code: string): boolean {
|
||||
return Boolean(error && typeof error === 'object' && (error as { code?: unknown }).code === code);
|
||||
}
|
||||
|
||||
async function yieldToEventLoop(): Promise<void> {
|
||||
await new Promise<void>((resolve) => {
|
||||
setImmediate(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeJlptTerm(value: string): string {
|
||||
return value.trim();
|
||||
@@ -36,12 +47,12 @@ function hasFrequencyDisplayValue(meta: unknown): boolean {
|
||||
return Object.prototype.hasOwnProperty.call(frequency as Record<string, unknown>, 'displayValue');
|
||||
}
|
||||
|
||||
function addEntriesToMap(
|
||||
async function addEntriesToMap(
|
||||
rawEntries: unknown,
|
||||
level: JlptLevel,
|
||||
terms: Map<string, JlptLevel>,
|
||||
log: (message: string) => void,
|
||||
): void {
|
||||
): Promise<void> {
|
||||
const shouldUpdateLevel = (
|
||||
existingLevel: JlptLevel | undefined,
|
||||
incomingLevel: JlptLevel,
|
||||
@@ -53,7 +64,13 @@ function addEntriesToMap(
|
||||
return;
|
||||
}
|
||||
|
||||
let processedCount = 0;
|
||||
for (const rawEntry of rawEntries) {
|
||||
processedCount += 1;
|
||||
if (processedCount % ENTRY_YIELD_INTERVAL === 0) {
|
||||
await yieldToEventLoop();
|
||||
}
|
||||
|
||||
if (!Array.isArray(rawEntry)) {
|
||||
continue;
|
||||
}
|
||||
@@ -84,22 +101,31 @@ function addEntriesToMap(
|
||||
}
|
||||
}
|
||||
|
||||
function collectDictionaryFromPath(
|
||||
async function collectDictionaryFromPath(
|
||||
dictionaryPath: string,
|
||||
log: (message: string) => void,
|
||||
): Map<string, JlptLevel> {
|
||||
): Promise<Map<string, JlptLevel>> {
|
||||
const terms = new Map<string, JlptLevel>();
|
||||
|
||||
for (const bank of JLPT_BANK_FILES) {
|
||||
const bankPath = path.join(dictionaryPath, bank.filename);
|
||||
if (!fs.existsSync(bankPath)) {
|
||||
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||
try {
|
||||
if (!(await fs.stat(bankPath)).isFile()) {
|
||||
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||
continue;
|
||||
}
|
||||
} catch (error) {
|
||||
if (isErrorCode(error, 'ENOENT')) {
|
||||
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||
continue;
|
||||
}
|
||||
log(`Failed to inspect JLPT bank file ${bankPath}: ${String(error)}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
let rawText: string;
|
||||
try {
|
||||
rawText = fs.readFileSync(bankPath, 'utf-8');
|
||||
rawText = await fs.readFile(bankPath, 'utf-8');
|
||||
} catch {
|
||||
log(`Failed to read JLPT bank file ${bankPath}`);
|
||||
continue;
|
||||
@@ -107,6 +133,7 @@ function collectDictionaryFromPath(
|
||||
|
||||
let rawEntries: unknown;
|
||||
try {
|
||||
await yieldToEventLoop();
|
||||
rawEntries = JSON.parse(rawText) as unknown;
|
||||
} catch {
|
||||
log(`Failed to parse JLPT bank file as JSON: ${bankPath}`);
|
||||
@@ -119,7 +146,7 @@ function collectDictionaryFromPath(
|
||||
}
|
||||
|
||||
const beforeSize = terms.size;
|
||||
addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||
await addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||
if (terms.size === beforeSize) {
|
||||
log(`JLPT bank file contained no extractable entries: ${bankPath}`);
|
||||
}
|
||||
@@ -137,17 +164,21 @@ export async function createJlptVocabularyLookup(
|
||||
const resolvedBanks: string[] = [];
|
||||
for (const dictionaryPath of options.searchPaths) {
|
||||
attemptedPaths.push(dictionaryPath);
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
||||
let isDirectory = false;
|
||||
try {
|
||||
isDirectory = (await fs.stat(dictionaryPath)).isDirectory();
|
||||
} catch (error) {
|
||||
if (isErrorCode(error, 'ENOENT')) {
|
||||
continue;
|
||||
}
|
||||
options.log(`Failed to inspect JLPT dictionary path ${dictionaryPath}: ${String(error)}`);
|
||||
continue;
|
||||
}
|
||||
if (!isDirectory) continue;
|
||||
|
||||
foundDictionaryPathCount += 1;
|
||||
|
||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
const terms = await collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
if (terms.size > 0) {
|
||||
resolvedBanks.push(dictionaryPath);
|
||||
foundBankCount += 1;
|
||||
|
||||
Reference in New Issue
Block a user