mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-02 06:22:42 -08:00
fix(startup): async dictionary loading and unblock first tokenization
- move JLPT/frequency dictionary init off sync fs APIs and add cooperative yielding during entry processing - decouple first tokenization from full warmup by gating only on Yomitan readiness while MeCab/dictionary warmups continue in parallel - update mpv pause-until-ready OSD copy to tokenization-focused wording and refresh gate regression assertions
This commit is contained in:
@@ -3,6 +3,8 @@ local M = {}
|
|||||||
local OVERLAY_START_RETRY_DELAY_SECONDS = 0.2
|
local OVERLAY_START_RETRY_DELAY_SECONDS = 0.2
|
||||||
local OVERLAY_START_MAX_ATTEMPTS = 6
|
local OVERLAY_START_MAX_ATTEMPTS = 6
|
||||||
local AUTO_PLAY_READY_TIMEOUT_SECONDS = 15
|
local AUTO_PLAY_READY_TIMEOUT_SECONDS = 15
|
||||||
|
local AUTO_PLAY_READY_LOADING_OSD = "Loading subtitle tokenization..."
|
||||||
|
local AUTO_PLAY_READY_READY_OSD = "Subtitle tokenization ready"
|
||||||
|
|
||||||
function M.create(ctx)
|
function M.create(ctx)
|
||||||
local mp = ctx.mp
|
local mp = ctx.mp
|
||||||
@@ -90,7 +92,7 @@ function M.create(ctx)
|
|||||||
end
|
end
|
||||||
disarm_auto_play_ready_gate()
|
disarm_auto_play_ready_gate()
|
||||||
mp.set_property_native("pause", false)
|
mp.set_property_native("pause", false)
|
||||||
show_osd("Subtitle annotations loaded")
|
show_osd(AUTO_PLAY_READY_READY_OSD)
|
||||||
subminer_log("info", "process", "Resuming playback after startup gate: " .. tostring(reason or "ready"))
|
subminer_log("info", "process", "Resuming playback after startup gate: " .. tostring(reason or "ready"))
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -101,11 +103,11 @@ function M.create(ctx)
|
|||||||
end
|
end
|
||||||
state.auto_play_ready_gate_armed = true
|
state.auto_play_ready_gate_armed = true
|
||||||
mp.set_property_native("pause", true)
|
mp.set_property_native("pause", true)
|
||||||
show_osd("Loading subtitle annotations...")
|
show_osd(AUTO_PLAY_READY_LOADING_OSD)
|
||||||
if type(mp.add_periodic_timer) == "function" then
|
if type(mp.add_periodic_timer) == "function" then
|
||||||
state.auto_play_ready_osd_timer = mp.add_periodic_timer(2.5, function()
|
state.auto_play_ready_osd_timer = mp.add_periodic_timer(2.5, function()
|
||||||
if state.auto_play_ready_gate_armed then
|
if state.auto_play_ready_gate_armed then
|
||||||
show_osd("Loading subtitle annotations...")
|
show_osd(AUTO_PLAY_READY_LOADING_OSD)
|
||||||
end
|
end
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -551,7 +551,7 @@ do
|
|||||||
"autoplay-ready script message should resume mpv playback"
|
"autoplay-ready script message should resume mpv playback"
|
||||||
)
|
)
|
||||||
assert_true(
|
assert_true(
|
||||||
has_osd_message(recorded.osd, "SubMiner: Loading subtitle annotations..."),
|
has_osd_message(recorded.osd, "SubMiner: Loading subtitle tokenization..."),
|
||||||
"pause-until-ready auto-start should show loading OSD message"
|
"pause-until-ready auto-start should show loading OSD message"
|
||||||
)
|
)
|
||||||
assert_true(
|
assert_true(
|
||||||
@@ -559,7 +559,7 @@ do
|
|||||||
"pause-until-ready auto-start should avoid replacing loading OSD with generic starting OSD"
|
"pause-until-ready auto-start should avoid replacing loading OSD with generic starting OSD"
|
||||||
)
|
)
|
||||||
assert_true(
|
assert_true(
|
||||||
has_osd_message(recorded.osd, "SubMiner: Subtitle annotations loaded"),
|
has_osd_message(recorded.osd, "SubMiner: Subtitle tokenization ready"),
|
||||||
"autoplay-ready should show loaded OSD message"
|
"autoplay-ready should show loaded OSD message"
|
||||||
)
|
)
|
||||||
assert_true(
|
assert_true(
|
||||||
|
|||||||
@@ -129,3 +129,39 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r
|
|||||||
assert.equal(lookup('鍛える'), 3272);
|
assert.equal(lookup('鍛える'), 3272);
|
||||||
assert.equal(lookup('高み'), 9933);
|
assert.equal(lookup('高み'), 9933);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => {
|
||||||
|
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||||
|
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||||
|
fs.writeFileSync(bankPath, JSON.stringify([['猫', 1, { frequency: { displayValue: 42 } }]]));
|
||||||
|
|
||||||
|
const readFileSync = fs.readFileSync;
|
||||||
|
const readdirSync = fs.readdirSync;
|
||||||
|
const statSync = fs.statSync;
|
||||||
|
const existsSync = fs.existsSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).readFileSync = () => {
|
||||||
|
throw new Error('sync read disabled');
|
||||||
|
};
|
||||||
|
(fs as unknown as Record<string, unknown>).readdirSync = () => {
|
||||||
|
throw new Error('sync readdir disabled');
|
||||||
|
};
|
||||||
|
(fs as unknown as Record<string, unknown>).statSync = () => {
|
||||||
|
throw new Error('sync stat disabled');
|
||||||
|
};
|
||||||
|
(fs as unknown as Record<string, unknown>).existsSync = () => {
|
||||||
|
throw new Error('sync exists disabled');
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const lookup = await createFrequencyDictionaryLookup({
|
||||||
|
searchPaths: [tempDir],
|
||||||
|
log: () => undefined,
|
||||||
|
});
|
||||||
|
assert.equal(lookup('猫'), 42);
|
||||||
|
} finally {
|
||||||
|
(fs as unknown as Record<string, unknown>).readFileSync = readFileSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).readdirSync = readdirSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).statSync = statSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).existsSync = existsSync;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import * as fs from 'node:fs';
|
import * as fs from 'node:fs/promises';
|
||||||
import * as path from 'node:path';
|
import * as path from 'node:path';
|
||||||
|
|
||||||
export interface FrequencyDictionaryLookupOptions {
|
export interface FrequencyDictionaryLookupOptions {
|
||||||
@@ -13,6 +13,17 @@ interface FrequencyDictionaryEntry {
|
|||||||
|
|
||||||
const FREQUENCY_BANK_FILE_GLOB = /^term_meta_bank_.*\.json$/;
|
const FREQUENCY_BANK_FILE_GLOB = /^term_meta_bank_.*\.json$/;
|
||||||
const NOOP_LOOKUP = (): null => null;
|
const NOOP_LOOKUP = (): null => null;
|
||||||
|
const ENTRY_YIELD_INTERVAL = 5000;
|
||||||
|
|
||||||
|
function isErrorCode(error: unknown, code: string): boolean {
|
||||||
|
return Boolean(error && typeof error === 'object' && (error as { code?: unknown }).code === code);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function yieldToEventLoop(): Promise<void> {
|
||||||
|
await new Promise<void>((resolve) => {
|
||||||
|
setImmediate(resolve);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeFrequencyTerm(value: string): string {
|
function normalizeFrequencyTerm(value: string): string {
|
||||||
return value.trim().toLowerCase();
|
return value.trim().toLowerCase();
|
||||||
@@ -93,16 +104,22 @@ function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry |
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function addEntriesToMap(
|
async function addEntriesToMap(
|
||||||
rawEntries: unknown,
|
rawEntries: unknown,
|
||||||
terms: Map<string, number>,
|
terms: Map<string, number>,
|
||||||
): { duplicateCount: number } {
|
): Promise<{ duplicateCount: number }> {
|
||||||
if (!Array.isArray(rawEntries)) {
|
if (!Array.isArray(rawEntries)) {
|
||||||
return { duplicateCount: 0 };
|
return { duplicateCount: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
let duplicateCount = 0;
|
let duplicateCount = 0;
|
||||||
|
let processedCount = 0;
|
||||||
for (const rawEntry of rawEntries) {
|
for (const rawEntry of rawEntries) {
|
||||||
|
processedCount += 1;
|
||||||
|
if (processedCount % ENTRY_YIELD_INTERVAL === 0) {
|
||||||
|
await yieldToEventLoop();
|
||||||
|
}
|
||||||
|
|
||||||
const entry = asFrequencyDictionaryEntry(rawEntry);
|
const entry = asFrequencyDictionaryEntry(rawEntry);
|
||||||
if (!entry) {
|
if (!entry) {
|
||||||
continue;
|
continue;
|
||||||
@@ -119,15 +136,15 @@ function addEntriesToMap(
|
|||||||
return { duplicateCount };
|
return { duplicateCount };
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectDictionaryFromPath(
|
async function collectDictionaryFromPath(
|
||||||
dictionaryPath: string,
|
dictionaryPath: string,
|
||||||
log: (message: string) => void,
|
log: (message: string) => void,
|
||||||
): Map<string, number> {
|
): Promise<Map<string, number>> {
|
||||||
const terms = new Map<string, number>();
|
const terms = new Map<string, number>();
|
||||||
|
|
||||||
let fileNames: string[];
|
let fileNames: string[];
|
||||||
try {
|
try {
|
||||||
fileNames = fs.readdirSync(dictionaryPath);
|
fileNames = await fs.readdir(dictionaryPath);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
log(`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`);
|
log(`Failed to read frequency dictionary directory ${dictionaryPath}: ${String(error)}`);
|
||||||
return terms;
|
return terms;
|
||||||
@@ -143,7 +160,7 @@ function collectDictionaryFromPath(
|
|||||||
const bankPath = path.join(dictionaryPath, bankFile);
|
const bankPath = path.join(dictionaryPath, bankFile);
|
||||||
let rawText: string;
|
let rawText: string;
|
||||||
try {
|
try {
|
||||||
rawText = fs.readFileSync(bankPath, 'utf-8');
|
rawText = await fs.readFile(bankPath, 'utf-8');
|
||||||
} catch {
|
} catch {
|
||||||
log(`Failed to read frequency dictionary file ${bankPath}`);
|
log(`Failed to read frequency dictionary file ${bankPath}`);
|
||||||
continue;
|
continue;
|
||||||
@@ -151,6 +168,7 @@ function collectDictionaryFromPath(
|
|||||||
|
|
||||||
let rawEntries: unknown;
|
let rawEntries: unknown;
|
||||||
try {
|
try {
|
||||||
|
await yieldToEventLoop();
|
||||||
rawEntries = JSON.parse(rawText) as unknown;
|
rawEntries = JSON.parse(rawText) as unknown;
|
||||||
} catch {
|
} catch {
|
||||||
log(`Failed to parse frequency dictionary file as JSON: ${bankPath}`);
|
log(`Failed to parse frequency dictionary file as JSON: ${bankPath}`);
|
||||||
@@ -158,7 +176,7 @@ function collectDictionaryFromPath(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const beforeSize = terms.size;
|
const beforeSize = terms.size;
|
||||||
const { duplicateCount } = addEntriesToMap(rawEntries, terms);
|
const { duplicateCount } = await addEntriesToMap(rawEntries, terms);
|
||||||
if (duplicateCount > 0) {
|
if (duplicateCount > 0) {
|
||||||
log(
|
log(
|
||||||
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
|
`Frequency dictionary ignored ${duplicateCount} duplicate term entr${
|
||||||
@@ -185,11 +203,11 @@ export async function createFrequencyDictionaryLookup(
|
|||||||
let isDirectory = false;
|
let isDirectory = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!fs.existsSync(dictionaryPath)) {
|
isDirectory = (await fs.stat(dictionaryPath)).isDirectory();
|
||||||
|
} catch (error) {
|
||||||
|
if (isErrorCode(error, 'ENOENT')) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
isDirectory = fs.statSync(dictionaryPath).isDirectory();
|
|
||||||
} catch (error) {
|
|
||||||
options.log(
|
options.log(
|
||||||
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
|
`Failed to inspect frequency dictionary path ${dictionaryPath}: ${String(error)}`,
|
||||||
);
|
);
|
||||||
@@ -201,7 +219,7 @@ export async function createFrequencyDictionaryLookup(
|
|||||||
}
|
}
|
||||||
|
|
||||||
foundDictionaryPathCount += 1;
|
foundDictionaryPathCount += 1;
|
||||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
const terms = await collectDictionaryFromPath(dictionaryPath, options.log);
|
||||||
if (terms.size > 0) {
|
if (terms.size > 0) {
|
||||||
options.log(`Frequency dictionary loaded from ${dictionaryPath} (${terms.size} entries)`);
|
options.log(`Frequency dictionary loaded from ${dictionaryPath} (${terms.size} entries)`);
|
||||||
return (term: string): number | null => {
|
return (term: string): number | null => {
|
||||||
|
|||||||
72
src/core/services/jlpt-vocab.test.ts
Normal file
72
src/core/services/jlpt-vocab.test.ts
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import os from 'node:os';
|
||||||
|
import path from 'node:path';
|
||||||
|
import test from 'node:test';
|
||||||
|
|
||||||
|
import { createJlptVocabularyLookup } from './jlpt-vocab';
|
||||||
|
|
||||||
|
test('createJlptVocabularyLookup loads JLPT bank entries and resolves known levels', async () => {
|
||||||
|
const logs: string[] = [];
|
||||||
|
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-jlpt-dict-'));
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(tempDir, 'term_meta_bank_5.json'),
|
||||||
|
JSON.stringify([
|
||||||
|
['猫', 1, { frequency: { displayValue: 1 } }],
|
||||||
|
['犬', 2, { frequency: { displayValue: 2 } }],
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_1.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_2.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_3.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_4.json'), JSON.stringify([]));
|
||||||
|
|
||||||
|
const lookup = await createJlptVocabularyLookup({
|
||||||
|
searchPaths: [tempDir],
|
||||||
|
log: (message) => {
|
||||||
|
logs.push(message);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(lookup('猫'), 'N5');
|
||||||
|
assert.equal(lookup('犬'), 'N5');
|
||||||
|
assert.equal(lookup('鳥'), null);
|
||||||
|
assert.equal(logs.some((entry) => entry.includes('JLPT dictionary loaded from')), true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('createJlptVocabularyLookup does not require synchronous fs APIs', async () => {
|
||||||
|
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-jlpt-dict-'));
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(tempDir, 'term_meta_bank_4.json'),
|
||||||
|
JSON.stringify([['見る', 1, { frequency: { displayValue: 3 } }]]),
|
||||||
|
);
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_1.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_2.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_3.json'), JSON.stringify([]));
|
||||||
|
fs.writeFileSync(path.join(tempDir, 'term_meta_bank_5.json'), JSON.stringify([]));
|
||||||
|
|
||||||
|
const readFileSync = fs.readFileSync;
|
||||||
|
const statSync = fs.statSync;
|
||||||
|
const existsSync = fs.existsSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).readFileSync = () => {
|
||||||
|
throw new Error('sync read disabled');
|
||||||
|
};
|
||||||
|
(fs as unknown as Record<string, unknown>).statSync = () => {
|
||||||
|
throw new Error('sync stat disabled');
|
||||||
|
};
|
||||||
|
(fs as unknown as Record<string, unknown>).existsSync = () => {
|
||||||
|
throw new Error('sync exists disabled');
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const lookup = await createJlptVocabularyLookup({
|
||||||
|
searchPaths: [tempDir],
|
||||||
|
log: () => undefined,
|
||||||
|
});
|
||||||
|
assert.equal(lookup('見る'), 'N4');
|
||||||
|
} finally {
|
||||||
|
(fs as unknown as Record<string, unknown>).readFileSync = readFileSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).statSync = statSync;
|
||||||
|
(fs as unknown as Record<string, unknown>).existsSync = existsSync;
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import * as fs from 'fs';
|
import * as fs from 'node:fs/promises';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
|
||||||
import type { JlptLevel } from '../../types';
|
import type { JlptLevel } from '../../types';
|
||||||
@@ -24,6 +24,17 @@ const JLPT_LEVEL_PRECEDENCE: Record<JlptLevel, number> = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const NOOP_LOOKUP = (): null => null;
|
const NOOP_LOOKUP = (): null => null;
|
||||||
|
const ENTRY_YIELD_INTERVAL = 5000;
|
||||||
|
|
||||||
|
function isErrorCode(error: unknown, code: string): boolean {
|
||||||
|
return Boolean(error && typeof error === 'object' && (error as { code?: unknown }).code === code);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function yieldToEventLoop(): Promise<void> {
|
||||||
|
await new Promise<void>((resolve) => {
|
||||||
|
setImmediate(resolve);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeJlptTerm(value: string): string {
|
function normalizeJlptTerm(value: string): string {
|
||||||
return value.trim();
|
return value.trim();
|
||||||
@@ -36,12 +47,12 @@ function hasFrequencyDisplayValue(meta: unknown): boolean {
|
|||||||
return Object.prototype.hasOwnProperty.call(frequency as Record<string, unknown>, 'displayValue');
|
return Object.prototype.hasOwnProperty.call(frequency as Record<string, unknown>, 'displayValue');
|
||||||
}
|
}
|
||||||
|
|
||||||
function addEntriesToMap(
|
async function addEntriesToMap(
|
||||||
rawEntries: unknown,
|
rawEntries: unknown,
|
||||||
level: JlptLevel,
|
level: JlptLevel,
|
||||||
terms: Map<string, JlptLevel>,
|
terms: Map<string, JlptLevel>,
|
||||||
log: (message: string) => void,
|
log: (message: string) => void,
|
||||||
): void {
|
): Promise<void> {
|
||||||
const shouldUpdateLevel = (
|
const shouldUpdateLevel = (
|
||||||
existingLevel: JlptLevel | undefined,
|
existingLevel: JlptLevel | undefined,
|
||||||
incomingLevel: JlptLevel,
|
incomingLevel: JlptLevel,
|
||||||
@@ -53,7 +64,13 @@ function addEntriesToMap(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let processedCount = 0;
|
||||||
for (const rawEntry of rawEntries) {
|
for (const rawEntry of rawEntries) {
|
||||||
|
processedCount += 1;
|
||||||
|
if (processedCount % ENTRY_YIELD_INTERVAL === 0) {
|
||||||
|
await yieldToEventLoop();
|
||||||
|
}
|
||||||
|
|
||||||
if (!Array.isArray(rawEntry)) {
|
if (!Array.isArray(rawEntry)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -84,22 +101,31 @@ function addEntriesToMap(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function collectDictionaryFromPath(
|
async function collectDictionaryFromPath(
|
||||||
dictionaryPath: string,
|
dictionaryPath: string,
|
||||||
log: (message: string) => void,
|
log: (message: string) => void,
|
||||||
): Map<string, JlptLevel> {
|
): Promise<Map<string, JlptLevel>> {
|
||||||
const terms = new Map<string, JlptLevel>();
|
const terms = new Map<string, JlptLevel>();
|
||||||
|
|
||||||
for (const bank of JLPT_BANK_FILES) {
|
for (const bank of JLPT_BANK_FILES) {
|
||||||
const bankPath = path.join(dictionaryPath, bank.filename);
|
const bankPath = path.join(dictionaryPath, bank.filename);
|
||||||
if (!fs.existsSync(bankPath)) {
|
try {
|
||||||
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
if (!(await fs.stat(bankPath)).isFile()) {
|
||||||
|
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (isErrorCode(error, 'ENOENT')) {
|
||||||
|
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
log(`Failed to inspect JLPT bank file ${bankPath}: ${String(error)}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let rawText: string;
|
let rawText: string;
|
||||||
try {
|
try {
|
||||||
rawText = fs.readFileSync(bankPath, 'utf-8');
|
rawText = await fs.readFile(bankPath, 'utf-8');
|
||||||
} catch {
|
} catch {
|
||||||
log(`Failed to read JLPT bank file ${bankPath}`);
|
log(`Failed to read JLPT bank file ${bankPath}`);
|
||||||
continue;
|
continue;
|
||||||
@@ -107,6 +133,7 @@ function collectDictionaryFromPath(
|
|||||||
|
|
||||||
let rawEntries: unknown;
|
let rawEntries: unknown;
|
||||||
try {
|
try {
|
||||||
|
await yieldToEventLoop();
|
||||||
rawEntries = JSON.parse(rawText) as unknown;
|
rawEntries = JSON.parse(rawText) as unknown;
|
||||||
} catch {
|
} catch {
|
||||||
log(`Failed to parse JLPT bank file as JSON: ${bankPath}`);
|
log(`Failed to parse JLPT bank file as JSON: ${bankPath}`);
|
||||||
@@ -119,7 +146,7 @@ function collectDictionaryFromPath(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const beforeSize = terms.size;
|
const beforeSize = terms.size;
|
||||||
addEntriesToMap(rawEntries, bank.level, terms, log);
|
await addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||||
if (terms.size === beforeSize) {
|
if (terms.size === beforeSize) {
|
||||||
log(`JLPT bank file contained no extractable entries: ${bankPath}`);
|
log(`JLPT bank file contained no extractable entries: ${bankPath}`);
|
||||||
}
|
}
|
||||||
@@ -137,17 +164,21 @@ export async function createJlptVocabularyLookup(
|
|||||||
const resolvedBanks: string[] = [];
|
const resolvedBanks: string[] = [];
|
||||||
for (const dictionaryPath of options.searchPaths) {
|
for (const dictionaryPath of options.searchPaths) {
|
||||||
attemptedPaths.push(dictionaryPath);
|
attemptedPaths.push(dictionaryPath);
|
||||||
if (!fs.existsSync(dictionaryPath)) {
|
let isDirectory = false;
|
||||||
continue;
|
try {
|
||||||
}
|
isDirectory = (await fs.stat(dictionaryPath)).isDirectory();
|
||||||
|
} catch (error) {
|
||||||
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
if (isErrorCode(error, 'ENOENT')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
options.log(`Failed to inspect JLPT dictionary path ${dictionaryPath}: ${String(error)}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (!isDirectory) continue;
|
||||||
|
|
||||||
foundDictionaryPathCount += 1;
|
foundDictionaryPathCount += 1;
|
||||||
|
|
||||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
const terms = await collectDictionaryFromPath(dictionaryPath, options.log);
|
||||||
if (terms.size > 0) {
|
if (terms.size > 0) {
|
||||||
resolvedBanks.push(dictionaryPath);
|
resolvedBanks.push(dictionaryPath);
|
||||||
foundBankCount += 1;
|
foundBankCount += 1;
|
||||||
|
|||||||
@@ -22,6 +22,14 @@ const BASE_METRICS: MpvSubtitleRenderMetrics = {
|
|||||||
osdDimensions: null,
|
osdDimensions: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function createDeferred(): { promise: Promise<void>; resolve: () => void } {
|
||||||
|
let resolve!: () => void;
|
||||||
|
const promise = new Promise<void>((nextResolve) => {
|
||||||
|
resolve = nextResolve;
|
||||||
|
});
|
||||||
|
return { promise, resolve };
|
||||||
|
}
|
||||||
|
|
||||||
test('composeMpvRuntimeHandlers returns callable handlers and forwards to injected deps', async () => {
|
test('composeMpvRuntimeHandlers returns callable handlers and forwards to injected deps', async () => {
|
||||||
const calls: string[] = [];
|
const calls: string[] = [];
|
||||||
let started = false;
|
let started = false;
|
||||||
@@ -516,3 +524,137 @@ test('composeMpvRuntimeHandlers runs tokenization warmup once across sequential
|
|||||||
assert.equal(prewarmJlptCalls, 1);
|
assert.equal(prewarmJlptCalls, 1);
|
||||||
assert.equal(prewarmFrequencyCalls, 1);
|
assert.equal(prewarmFrequencyCalls, 1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('composeMpvRuntimeHandlers does not block first tokenization on dictionary or MeCab warmup', async () => {
|
||||||
|
const jlptDeferred = createDeferred();
|
||||||
|
const frequencyDeferred = createDeferred();
|
||||||
|
const mecabDeferred = createDeferred();
|
||||||
|
let tokenizeResolved = false;
|
||||||
|
|
||||||
|
const composed = composeMpvRuntimeHandlers<
|
||||||
|
{ connect: () => void; on: () => void },
|
||||||
|
{ isKnownWord: () => boolean },
|
||||||
|
{ text: string }
|
||||||
|
>({
|
||||||
|
bindMpvMainEventHandlersMainDeps: {
|
||||||
|
appState: {
|
||||||
|
initialArgs: null,
|
||||||
|
overlayRuntimeInitialized: true,
|
||||||
|
mpvClient: null,
|
||||||
|
immersionTracker: null,
|
||||||
|
subtitleTimingTracker: null,
|
||||||
|
currentSubText: '',
|
||||||
|
currentSubAssText: '',
|
||||||
|
playbackPaused: null,
|
||||||
|
previousSecondarySubVisibility: null,
|
||||||
|
},
|
||||||
|
getQuitOnDisconnectArmed: () => false,
|
||||||
|
scheduleQuitCheck: () => {},
|
||||||
|
quitApp: () => {},
|
||||||
|
reportJellyfinRemoteStopped: () => {},
|
||||||
|
syncOverlayMpvSubtitleSuppression: () => {},
|
||||||
|
maybeRunAnilistPostWatchUpdate: async () => {},
|
||||||
|
logSubtitleTimingError: () => {},
|
||||||
|
broadcastToOverlayWindows: () => {},
|
||||||
|
onSubtitleChange: () => {},
|
||||||
|
refreshDiscordPresence: () => {},
|
||||||
|
ensureImmersionTrackerInitialized: () => {},
|
||||||
|
updateCurrentMediaPath: () => {},
|
||||||
|
restoreMpvSubVisibility: () => {},
|
||||||
|
getCurrentAnilistMediaKey: () => null,
|
||||||
|
resetAnilistMediaTracking: () => {},
|
||||||
|
maybeProbeAnilistDuration: () => {},
|
||||||
|
ensureAnilistMediaGuess: () => {},
|
||||||
|
syncImmersionMediaState: () => {},
|
||||||
|
updateCurrentMediaTitle: () => {},
|
||||||
|
resetAnilistMediaGuessState: () => {},
|
||||||
|
reportJellyfinRemoteProgress: () => {},
|
||||||
|
updateSubtitleRenderMetrics: () => {},
|
||||||
|
},
|
||||||
|
mpvClientRuntimeServiceFactoryMainDeps: {
|
||||||
|
createClient: class {
|
||||||
|
connect(): void {}
|
||||||
|
on(): void {}
|
||||||
|
},
|
||||||
|
getSocketPath: () => '/tmp/mpv.sock',
|
||||||
|
getResolvedConfig: () => ({ auto_start_overlay: false }),
|
||||||
|
isAutoStartOverlayEnabled: () => false,
|
||||||
|
setOverlayVisible: () => {},
|
||||||
|
isVisibleOverlayVisible: () => false,
|
||||||
|
getReconnectTimer: () => null,
|
||||||
|
setReconnectTimer: () => {},
|
||||||
|
},
|
||||||
|
updateMpvSubtitleRenderMetricsMainDeps: {
|
||||||
|
getCurrentMetrics: () => BASE_METRICS,
|
||||||
|
setCurrentMetrics: () => {},
|
||||||
|
applyPatch: (current, patch) => ({ next: { ...current, ...patch }, changed: true }),
|
||||||
|
broadcastMetrics: () => {},
|
||||||
|
},
|
||||||
|
tokenizer: {
|
||||||
|
buildTokenizerDepsMainDeps: {
|
||||||
|
getYomitanExt: () => null,
|
||||||
|
getYomitanParserWindow: () => null,
|
||||||
|
setYomitanParserWindow: () => {},
|
||||||
|
getYomitanParserReadyPromise: () => null,
|
||||||
|
setYomitanParserReadyPromise: () => {},
|
||||||
|
getYomitanParserInitPromise: () => null,
|
||||||
|
setYomitanParserInitPromise: () => {},
|
||||||
|
isKnownWord: () => false,
|
||||||
|
recordLookup: () => {},
|
||||||
|
getKnownWordMatchMode: () => 'headword',
|
||||||
|
getNPlusOneEnabled: () => true,
|
||||||
|
getMinSentenceWordsForNPlusOne: () => 3,
|
||||||
|
getJlptLevel: () => null,
|
||||||
|
getJlptEnabled: () => true,
|
||||||
|
getFrequencyDictionaryEnabled: () => true,
|
||||||
|
getFrequencyDictionaryMatchMode: () => 'headword',
|
||||||
|
getFrequencyRank: () => null,
|
||||||
|
getYomitanGroupDebugEnabled: () => false,
|
||||||
|
getMecabTokenizer: () => null,
|
||||||
|
},
|
||||||
|
createTokenizerRuntimeDeps: () => ({ isKnownWord: () => false }),
|
||||||
|
tokenizeSubtitle: async (text) => ({ text }),
|
||||||
|
createMecabTokenizerAndCheckMainDeps: {
|
||||||
|
getMecabTokenizer: () => null,
|
||||||
|
setMecabTokenizer: () => {},
|
||||||
|
createMecabTokenizer: () => ({ id: 'mecab' }),
|
||||||
|
checkAvailability: async () => mecabDeferred.promise,
|
||||||
|
},
|
||||||
|
prewarmSubtitleDictionariesMainDeps: {
|
||||||
|
ensureJlptDictionaryLookup: async () => jlptDeferred.promise,
|
||||||
|
ensureFrequencyDictionaryLookup: async () => frequencyDeferred.promise,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
warmups: {
|
||||||
|
launchBackgroundWarmupTaskMainDeps: {
|
||||||
|
now: () => 0,
|
||||||
|
logDebug: () => {},
|
||||||
|
logWarn: () => {},
|
||||||
|
},
|
||||||
|
startBackgroundWarmupsMainDeps: {
|
||||||
|
getStarted: () => false,
|
||||||
|
setStarted: () => {},
|
||||||
|
isTexthookerOnlyMode: () => false,
|
||||||
|
ensureYomitanExtensionLoaded: async () => undefined,
|
||||||
|
shouldWarmupMecab: () => false,
|
||||||
|
shouldWarmupYomitanExtension: () => false,
|
||||||
|
shouldWarmupSubtitleDictionaries: () => false,
|
||||||
|
shouldWarmupJellyfinRemoteSession: () => false,
|
||||||
|
shouldAutoConnectJellyfinRemote: () => false,
|
||||||
|
startJellyfinRemoteSession: async () => {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const tokenizePromise = composed.tokenizeSubtitle('first line').then(() => {
|
||||||
|
tokenizeResolved = true;
|
||||||
|
});
|
||||||
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||||
|
assert.equal(tokenizeResolved, true);
|
||||||
|
|
||||||
|
jlptDeferred.resolve();
|
||||||
|
frequencyDeferred.resolve();
|
||||||
|
mecabDeferred.resolve();
|
||||||
|
await tokenizePromise;
|
||||||
|
await composed.startTokenizationWarmups();
|
||||||
|
});
|
||||||
|
|||||||
@@ -142,21 +142,40 @@ export function composeMpvRuntimeHandlers<
|
|||||||
return nPlusOneEnabled || jlptEnabled || frequencyEnabled;
|
return nPlusOneEnabled || jlptEnabled || frequencyEnabled;
|
||||||
};
|
};
|
||||||
let tokenizationWarmupInFlight: Promise<void> | null = null;
|
let tokenizationWarmupInFlight: Promise<void> | null = null;
|
||||||
|
let tokenizationPrerequisiteWarmupInFlight: Promise<void> | null = null;
|
||||||
|
let tokenizationPrerequisiteWarmupCompleted = false;
|
||||||
let tokenizationWarmupCompleted = false;
|
let tokenizationWarmupCompleted = false;
|
||||||
|
const ensureTokenizationPrerequisites = (): Promise<void> => {
|
||||||
|
if (tokenizationPrerequisiteWarmupCompleted) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
if (!tokenizationPrerequisiteWarmupInFlight) {
|
||||||
|
tokenizationPrerequisiteWarmupInFlight = options.warmups.startBackgroundWarmupsMainDeps
|
||||||
|
.ensureYomitanExtensionLoaded()
|
||||||
|
.then(() => {
|
||||||
|
tokenizationPrerequisiteWarmupCompleted = true;
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
tokenizationPrerequisiteWarmupInFlight = null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return tokenizationPrerequisiteWarmupInFlight;
|
||||||
|
};
|
||||||
const startTokenizationWarmups = (): Promise<void> => {
|
const startTokenizationWarmups = (): Promise<void> => {
|
||||||
if (tokenizationWarmupCompleted) {
|
if (tokenizationWarmupCompleted) {
|
||||||
return Promise.resolve();
|
return Promise.resolve();
|
||||||
}
|
}
|
||||||
if (!tokenizationWarmupInFlight) {
|
if (!tokenizationWarmupInFlight) {
|
||||||
tokenizationWarmupInFlight = (async () => {
|
tokenizationWarmupInFlight = (async () => {
|
||||||
await options.warmups.startBackgroundWarmupsMainDeps.ensureYomitanExtensionLoaded();
|
const warmupTasks: Promise<unknown>[] = [ensureTokenizationPrerequisites()];
|
||||||
if (
|
if (
|
||||||
shouldInitializeMecabForAnnotations() &&
|
shouldInitializeMecabForAnnotations() &&
|
||||||
!options.tokenizer.createMecabTokenizerAndCheckMainDeps.getMecabTokenizer()
|
!options.tokenizer.createMecabTokenizerAndCheckMainDeps.getMecabTokenizer()
|
||||||
) {
|
) {
|
||||||
await createMecabTokenizerAndCheck().catch(() => {});
|
warmupTasks.push(createMecabTokenizerAndCheck().catch(() => {}));
|
||||||
}
|
}
|
||||||
await prewarmSubtitleDictionaries({ showLoadingOsd: true });
|
warmupTasks.push(prewarmSubtitleDictionaries({ showLoadingOsd: true }).catch(() => {}));
|
||||||
|
await Promise.all(warmupTasks);
|
||||||
tokenizationWarmupCompleted = true;
|
tokenizationWarmupCompleted = true;
|
||||||
})().finally(() => {
|
})().finally(() => {
|
||||||
tokenizationWarmupInFlight = null;
|
tokenizationWarmupInFlight = null;
|
||||||
@@ -165,9 +184,8 @@ export function composeMpvRuntimeHandlers<
|
|||||||
return tokenizationWarmupInFlight;
|
return tokenizationWarmupInFlight;
|
||||||
};
|
};
|
||||||
const tokenizeSubtitle = async (text: string): Promise<TTokenizedSubtitle> => {
|
const tokenizeSubtitle = async (text: string): Promise<TTokenizedSubtitle> => {
|
||||||
if (!tokenizationWarmupCompleted) {
|
if (!tokenizationWarmupCompleted) void startTokenizationWarmups();
|
||||||
await startTokenizationWarmups();
|
await ensureTokenizationPrerequisites();
|
||||||
}
|
|
||||||
return options.tokenizer.tokenizeSubtitle(
|
return options.tokenizer.tokenizeSubtitle(
|
||||||
text,
|
text,
|
||||||
options.tokenizer.createTokenizerRuntimeDeps(buildTokenizerDepsHandler()),
|
options.tokenizer.createTokenizerRuntimeDeps(buildTokenizerDepsHandler()),
|
||||||
|
|||||||
Reference in New Issue
Block a user