feat(anki): add proxy transport and tokenizer annotation controls

This commit is contained in:
2026-02-27 21:25:26 -08:00
parent 34a0feae71
commit f8e961d105
26 changed files with 1453 additions and 60 deletions

View File

@@ -0,0 +1,133 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { AnkiConnectProxyServer } from './anki-connect-proxy';
async function waitForCondition(
condition: () => boolean,
timeoutMs = 2000,
intervalMs = 10,
): Promise<void> {
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
if (condition()) return;
await new Promise((resolve) => setTimeout(resolve, intervalMs));
}
throw new Error('Timed out waiting for condition');
}
test('proxy enqueues addNote result for enrichment', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 42, error: null }), 'utf8'),
);
await waitForCondition(() => processed.length === 1);
assert.deepEqual(processed, [42]);
});
test('proxy de-duplicates addNotes IDs within the same response', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
await new Promise((resolve) => setTimeout(resolve, 5));
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNotes' },
Buffer.from(JSON.stringify({ result: [101, 102, 101, null], error: null }), 'utf8'),
);
await waitForCondition(() => processed.length === 2);
assert.deepEqual(processed, [101, 102]);
});
test('proxy skips auto-enrichment when auto-update is disabled', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => false,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 303, error: null }), 'utf8'),
);
await new Promise((resolve) => setTimeout(resolve, 30));
assert.deepEqual(processed, []);
});
test('proxy ignores addNote when upstream response reports error', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 123, error: 'duplicate' }), 'utf8'),
);
await new Promise((resolve) => setTimeout(resolve, 30));
assert.deepEqual(processed, []);
});
test('proxy detects self-referential loop configuration', () => {
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async () => undefined,
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
const result = (proxy as unknown as {
isSelfReferentialProxy: (options: {
host: string;
port: number;
upstreamUrl: string;
}) => boolean;
}).isSelfReferentialProxy({
host: '127.0.0.1',
port: 8766,
upstreamUrl: 'http://localhost:8766',
});
assert.equal(result, true);
});

View File

@@ -0,0 +1,314 @@
import http, { IncomingMessage, ServerResponse } from 'node:http';
import axios, { AxiosInstance } from 'axios';
interface StartProxyOptions {
host: string;
port: number;
upstreamUrl: string;
}
interface AnkiConnectEnvelope {
result: unknown;
error: unknown;
}
export interface AnkiConnectProxyServerDeps {
shouldAutoUpdateNewCards: () => boolean;
processNewCard: (noteId: number) => Promise<void>;
logInfo: (message: string, ...args: unknown[]) => void;
logWarn: (message: string, ...args: unknown[]) => void;
logError: (message: string, ...args: unknown[]) => void;
}
export class AnkiConnectProxyServer {
private server: http.Server | null = null;
private client: AxiosInstance;
private pendingNoteIds: number[] = [];
private pendingNoteIdSet = new Set<number>();
private inFlightNoteIds = new Set<number>();
private processingQueue = false;
constructor(private readonly deps: AnkiConnectProxyServerDeps) {
this.client = axios.create({
timeout: 15000,
validateStatus: () => true,
responseType: 'arraybuffer',
});
}
get isRunning(): boolean {
return this.server !== null;
}
start(options: StartProxyOptions): void {
this.stop();
if (this.isSelfReferentialProxy(options)) {
this.deps.logError(
'[anki-proxy] Proxy upstream points to proxy host/port; refusing to start to avoid loop.',
);
return;
}
this.server = http.createServer((req, res) => {
void this.handleRequest(req, res, options.upstreamUrl);
});
this.server.on('error', (error) => {
this.deps.logError('[anki-proxy] Server error:', (error as Error).message);
});
this.server.listen(options.port, options.host, () => {
this.deps.logInfo(
`[anki-proxy] Listening on http://${options.host}:${options.port} -> ${options.upstreamUrl}`,
);
});
}
stop(): void {
if (this.server) {
this.server.close();
this.server = null;
this.deps.logInfo('[anki-proxy] Stopped');
}
this.pendingNoteIds = [];
this.pendingNoteIdSet.clear();
this.inFlightNoteIds.clear();
this.processingQueue = false;
}
private isSelfReferentialProxy(options: StartProxyOptions): boolean {
try {
const upstream = new URL(options.upstreamUrl);
const normalizedUpstreamHost = upstream.hostname.toLowerCase();
const normalizedBindHost = options.host.toLowerCase();
const upstreamPort =
upstream.port.length > 0
? Number(upstream.port)
: upstream.protocol === 'https:'
? 443
: 80;
const hostMatches =
normalizedUpstreamHost === normalizedBindHost ||
(normalizedUpstreamHost === 'localhost' && normalizedBindHost === '127.0.0.1') ||
(normalizedUpstreamHost === '127.0.0.1' && normalizedBindHost === 'localhost');
return hostMatches && upstreamPort === options.port;
} catch {
return false;
}
}
private async handleRequest(
req: IncomingMessage,
res: ServerResponse<IncomingMessage>,
upstreamUrl: string,
): Promise<void> {
this.setCorsHeaders(res);
if (req.method === 'OPTIONS') {
res.statusCode = 204;
res.end();
return;
}
if (!req.method || (req.method !== 'GET' && req.method !== 'POST')) {
res.statusCode = 405;
res.end('Method Not Allowed');
return;
}
let rawBody: Buffer = Buffer.alloc(0);
if (req.method === 'POST') {
rawBody = await this.readRequestBody(req);
}
let requestJson: Record<string, unknown> | null = null;
if (req.method === 'POST' && rawBody.length > 0) {
requestJson = this.tryParseJson(rawBody);
}
try {
const targetUrl = new URL(req.url || '/', upstreamUrl).toString();
const contentType =
typeof req.headers['content-type'] === 'string'
? req.headers['content-type']
: 'application/json';
const upstreamResponse = await this.client.request<ArrayBuffer>({
url: targetUrl,
method: req.method,
data: req.method === 'POST' ? rawBody : undefined,
headers: {
'content-type': contentType,
},
});
const responseBody: Buffer = Buffer.isBuffer(upstreamResponse.data)
? upstreamResponse.data
: Buffer.from(new Uint8Array(upstreamResponse.data));
this.copyUpstreamHeaders(res, upstreamResponse.headers as Record<string, unknown>);
res.statusCode = upstreamResponse.status;
res.end(responseBody);
if (req.method === 'POST') {
this.maybeEnqueueFromRequest(requestJson, responseBody);
}
} catch (error) {
this.deps.logWarn('[anki-proxy] Failed to forward request:', (error as Error).message);
res.statusCode = 502;
res.end('Bad Gateway');
}
}
private maybeEnqueueFromRequest(
requestJson: Record<string, unknown> | null,
responseBody: Buffer,
): void {
if (!requestJson || !this.deps.shouldAutoUpdateNewCards()) {
return;
}
const action =
typeof requestJson.action === 'string' ? requestJson.action : String(requestJson.action ?? '');
if (action !== 'addNote' && action !== 'addNotes') {
return;
}
const responseJson = this.tryParseJson(responseBody) as AnkiConnectEnvelope | null;
if (!responseJson || responseJson.error !== null) {
return;
}
const noteIds =
action === 'addNote'
? this.collectSingleResultId(responseJson.result)
: this.collectBatchResultIds(responseJson.result);
if (noteIds.length === 0) {
return;
}
this.enqueueNotes(noteIds);
}
private collectSingleResultId(value: unknown): number[] {
if (typeof value === 'number' && Number.isInteger(value) && value > 0) {
return [value];
}
return [];
}
private collectBatchResultIds(value: unknown): number[] {
if (!Array.isArray(value)) {
return [];
}
return value.filter((entry): entry is number => {
return typeof entry === 'number' && Number.isInteger(entry) && entry > 0;
});
}
private enqueueNotes(noteIds: number[]): void {
let enqueuedCount = 0;
for (const noteId of noteIds) {
if (this.pendingNoteIdSet.has(noteId) || this.inFlightNoteIds.has(noteId)) {
continue;
}
this.pendingNoteIds.push(noteId);
this.pendingNoteIdSet.add(noteId);
enqueuedCount += 1;
}
if (enqueuedCount === 0) {
return;
}
this.deps.logInfo(`[anki-proxy] Enqueued ${enqueuedCount} note(s) for enrichment`);
this.processQueue();
}
private processQueue(): void {
if (this.processingQueue) {
return;
}
this.processingQueue = true;
void (async () => {
try {
while (this.pendingNoteIds.length > 0) {
const noteId = this.pendingNoteIds.shift();
if (noteId === undefined) {
continue;
}
this.pendingNoteIdSet.delete(noteId);
if (!this.deps.shouldAutoUpdateNewCards()) {
continue;
}
this.inFlightNoteIds.add(noteId);
try {
await this.deps.processNewCard(noteId);
} catch (error) {
this.deps.logWarn(
`[anki-proxy] Failed to auto-enrich note ${noteId}:`,
(error as Error).message,
);
} finally {
this.inFlightNoteIds.delete(noteId);
}
}
} finally {
this.processingQueue = false;
if (this.pendingNoteIds.length > 0) {
this.processQueue();
}
}
})();
}
private async readRequestBody(req: IncomingMessage): Promise<Buffer> {
const chunks: Buffer[] = [];
for await (const chunk of req) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
return Buffer.concat(chunks);
}
private tryParseJson(rawBody: Buffer): Record<string, unknown> | null {
if (rawBody.length === 0) {
return null;
}
try {
const parsed = JSON.parse(rawBody.toString('utf8'));
return parsed && typeof parsed === 'object' ? (parsed as Record<string, unknown>) : null;
} catch {
return null;
}
}
private setCorsHeaders(res: ServerResponse<IncomingMessage>): void {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
}
private copyUpstreamHeaders(
res: ServerResponse<IncomingMessage>,
headers: Record<string, unknown>,
): void {
for (const [key, value] of Object.entries(headers)) {
if (value === undefined) {
continue;
}
if (key.toLowerCase() === 'content-length') {
continue;
}
if (Array.isArray(value)) {
res.setHeader(
key,
value.map((entry) => String(entry)),
);
} else {
res.setHeader(key, String(value));
}
}
}
}

View File

@@ -8,6 +8,12 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick<
enabled: false,
url: 'http://127.0.0.1:8765',
pollingRate: 3000,
proxy: {
enabled: false,
host: '127.0.0.1',
port: 8766,
upstreamUrl: 'http://127.0.0.1:8765',
},
tags: ['SubMiner'],
fields: {
audio: 'ExpressionAudio',

View File

@@ -18,6 +18,30 @@ export function buildIntegrationConfigOptionRegistry(
defaultValue: defaultConfig.ankiConnect.pollingRate,
description: 'Polling interval in milliseconds.',
},
{
path: 'ankiConnect.proxy.enabled',
kind: 'boolean',
defaultValue: defaultConfig.ankiConnect.proxy.enabled,
description: 'Enable local AnkiConnect-compatible proxy for push-based auto-enrichment.',
},
{
path: 'ankiConnect.proxy.host',
kind: 'string',
defaultValue: defaultConfig.ankiConnect.proxy.host,
description: 'Bind host for local AnkiConnect proxy.',
},
{
path: 'ankiConnect.proxy.port',
kind: 'number',
defaultValue: defaultConfig.ankiConnect.proxy.port,
description: 'Bind port for local AnkiConnect proxy.',
},
{
path: 'ankiConnect.proxy.upstreamUrl',
kind: 'string',
defaultValue: defaultConfig.ankiConnect.proxy.upstreamUrl,
description: 'Upstream AnkiConnect URL proxied by local AnkiConnect proxy.',
},
{
path: 'ankiConnect.tags',
kind: 'array',

View File

@@ -66,3 +66,44 @@ test('warns and falls back for invalid nPlusOne.decks entries', () => {
);
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.decks'));
});
test('accepts valid proxy settings', () => {
const { context, warnings } = makeContext({
proxy: {
enabled: true,
host: '127.0.0.1',
port: 9999,
upstreamUrl: 'http://127.0.0.1:8765',
},
});
applyAnkiConnectResolution(context);
assert.equal(context.resolved.ankiConnect.proxy.enabled, true);
assert.equal(context.resolved.ankiConnect.proxy.host, '127.0.0.1');
assert.equal(context.resolved.ankiConnect.proxy.port, 9999);
assert.equal(context.resolved.ankiConnect.proxy.upstreamUrl, 'http://127.0.0.1:8765');
assert.equal(
warnings.some((warning) => warning.path.startsWith('ankiConnect.proxy')),
false,
);
});
test('warns and falls back for invalid proxy settings', () => {
const { context, warnings } = makeContext({
proxy: {
enabled: 'yes',
host: '',
port: -1,
upstreamUrl: '',
},
});
applyAnkiConnectResolution(context);
assert.deepEqual(context.resolved.ankiConnect.proxy, DEFAULT_CONFIG.ankiConnect.proxy);
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.enabled'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.host'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.port'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.upstreamUrl'));
});

View File

@@ -12,6 +12,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
const fields = isObject(ac.fields) ? (ac.fields as Record<string, unknown>) : {};
const media = isObject(ac.media) ? (ac.media as Record<string, unknown>) : {};
const metadata = isObject(ac.metadata) ? (ac.metadata as Record<string, unknown>) : {};
const proxy = isObject(ac.proxy) ? (ac.proxy as Record<string, unknown>) : {};
const aiSource = isObject(ac.ai) ? ac.ai : isObject(ac.openRouter) ? ac.openRouter : {};
const legacyKeys = new Set([
'audioField',
@@ -85,6 +86,9 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
? (ac.behavior as (typeof context.resolved)['ankiConnect']['behavior'])
: {}),
},
proxy: {
...context.resolved.ankiConnect.proxy,
},
metadata: {
...context.resolved.ankiConnect.metadata,
...(isObject(ac.metadata)
@@ -153,6 +157,68 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
);
}
if (isObject(ac.proxy)) {
const proxyEnabled = asBoolean(proxy.enabled);
if (proxyEnabled !== undefined) {
context.resolved.ankiConnect.proxy.enabled = proxyEnabled;
} else if (proxy.enabled !== undefined) {
context.warn(
'ankiConnect.proxy.enabled',
proxy.enabled,
context.resolved.ankiConnect.proxy.enabled,
'Expected boolean.',
);
}
const proxyHost = asString(proxy.host);
if (proxyHost !== undefined && proxyHost.trim().length > 0) {
context.resolved.ankiConnect.proxy.host = proxyHost.trim();
} else if (proxy.host !== undefined) {
context.warn(
'ankiConnect.proxy.host',
proxy.host,
context.resolved.ankiConnect.proxy.host,
'Expected non-empty string.',
);
}
const proxyUpstreamUrl = asString(proxy.upstreamUrl);
if (proxyUpstreamUrl !== undefined && proxyUpstreamUrl.trim().length > 0) {
context.resolved.ankiConnect.proxy.upstreamUrl = proxyUpstreamUrl.trim();
} else if (proxy.upstreamUrl !== undefined) {
context.warn(
'ankiConnect.proxy.upstreamUrl',
proxy.upstreamUrl,
context.resolved.ankiConnect.proxy.upstreamUrl,
'Expected non-empty string.',
);
}
const proxyPort = asNumber(proxy.port);
if (
proxyPort !== undefined &&
Number.isInteger(proxyPort) &&
proxyPort >= 1 &&
proxyPort <= 65535
) {
context.resolved.ankiConnect.proxy.port = proxyPort;
} else if (proxy.port !== undefined) {
context.warn(
'ankiConnect.proxy.port',
proxy.port,
context.resolved.ankiConnect.proxy.port,
'Expected integer between 1 and 65535.',
);
}
} else if (ac.proxy !== undefined) {
context.warn(
'ankiConnect.proxy',
ac.proxy,
context.resolved.ankiConnect.proxy,
'Expected object.',
);
}
if (Array.isArray(ac.tags)) {
const normalizedTags = ac.tags
.filter((entry): entry is string => typeof entry === 'string')

View File

@@ -41,7 +41,13 @@ test('initializeOverlayRuntime skips Anki integration when ankiConnect.enabled i
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => 'auto',
createFieldGroupingCallback: () =>
async () => ({
keepNoteId: 1,
deleteNoteId: 2,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
@@ -90,7 +96,13 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => 'manual',
createFieldGroupingCallback: () =>
async () => ({
keepNoteId: 3,
deleteNoteId: 4,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});

View File

@@ -1696,3 +1696,169 @@ test('createTokenizerDepsRuntime checks MeCab availability before first tokenize
assert.equal(first?.[0]?.surface, '仮面');
assert.equal(second?.[0]?.surface, '仮面');
});
test('tokenizeSubtitle uses async MeCab enrichment override when provided', async () => {
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
tokenizeWithMecab: async () => [
{
headword: '猫',
surface: '猫',
reading: 'ネコ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isMerged: true,
isKnown: false,
isNPlusOneTarget: false,
},
],
enrichTokensWithMecab: async (tokens) =>
tokens.map((token) => ({
...token,
pos1: 'override-pos',
})),
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.pos1, 'override-pos');
});
test('createTokenizerDepsRuntime exposes async MeCab enrichment helper', async () => {
const deps = createTokenizerDepsRuntime({
getYomitanExt: () => null,
getYomitanParserWindow: () => null,
setYomitanParserWindow: () => {},
getYomitanParserReadyPromise: () => null,
setYomitanParserReadyPromise: () => {},
getYomitanParserInitPromise: () => null,
setYomitanParserInitPromise: () => {},
isKnownWord: () => false,
getKnownWordMatchMode: () => 'headword',
getJlptLevel: () => null,
getMecabTokenizer: () => null,
});
const enriched = await deps.enrichTokensWithMecab?.(
[
{
headword: 'は',
surface: 'は',
reading: 'は',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.other,
isMerged: true,
isKnown: false,
isNPlusOneTarget: false,
},
],
[
{
headword: 'は',
surface: 'は',
reading: 'ハ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
);
assert.equal(enriched?.[0]?.pos1, '助詞');
});
test('tokenizeSubtitle skips all enrichment stages when disabled', async () => {
let knownCalls = 0;
let mecabCalls = 0;
let jlptCalls = 0;
let frequencyCalls = 0;
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
isKnownWord: () => {
knownCalls += 1;
return true;
},
getNPlusOneEnabled: () => false,
getJlptEnabled: () => false,
getFrequencyDictionaryEnabled: () => false,
getJlptLevel: () => {
jlptCalls += 1;
return 'N5';
},
getFrequencyRank: () => {
frequencyCalls += 1;
return 10;
},
tokenizeWithMecab: async () => {
mecabCalls += 1;
return null;
},
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.isKnown, false);
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
assert.equal(knownCalls, 0);
assert.equal(mecabCalls, 0);
assert.equal(jlptCalls, 0);
assert.equal(frequencyCalls, 0);
});
test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async () => {
let knownCalls = 0;
let mecabCalls = 0;
let frequencyCalls = 0;
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
isKnownWord: () => {
knownCalls += 1;
return true;
},
getNPlusOneEnabled: () => false,
getJlptEnabled: () => false,
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: () => {
frequencyCalls += 1;
return 7;
},
tokenizeWithMecab: async () => {
mecabCalls += 1;
return [
{
headword: '猫',
surface: '猫',
reading: 'ネコ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
];
},
}),
);
assert.equal(result.tokens?.[0]?.frequencyRank, 7);
assert.equal(result.tokens?.[0]?.isKnown, false);
assert.equal(knownCalls, 0);
assert.equal(mecabCalls, 1);
assert.equal(frequencyCalls, 1);
});

View File

@@ -9,13 +9,16 @@ import {
FrequencyDictionaryLookup,
JlptLevel,
} from '../../types';
import { annotateTokens } from './tokenizer/annotation-stage';
import { enrichTokensWithMecabPos1 } from './tokenizer/parser-enrichment-stage';
import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage';
import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime';
const logger = createLogger('main:tokenizer');
type MecabTokenEnrichmentFn = (
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
) => Promise<MergedToken[]>;
export interface TokenizerServiceDeps {
getYomitanExt: () => Extension | null;
getYomitanParserWindow: () => BrowserWindow | null;
@@ -27,12 +30,14 @@ export interface TokenizerServiceDeps {
isKnownWord: (text: string) => boolean;
getKnownWordMatchMode: () => NPlusOneMatchMode;
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyRank?: FrequencyDictionaryLookup;
getMinSentenceWordsForNPlusOne?: () => number;
getYomitanGroupDebugEnabled?: () => boolean;
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
enrichTokensWithMecab?: MecabTokenEnrichmentFn;
}
interface MecabTokenizerLike {
@@ -52,6 +57,7 @@ export interface TokenizerDepsRuntimeOptions {
isKnownWord: (text: string) => boolean;
getKnownWordMatchMode: () => NPlusOneMatchMode;
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyRank?: FrequencyDictionaryLookup;
@@ -60,6 +66,82 @@ export interface TokenizerDepsRuntimeOptions {
getMecabTokenizer: () => MecabTokenizerLike | null;
}
interface TokenizerAnnotationOptions {
nPlusOneEnabled: boolean;
jlptEnabled: boolean;
frequencyEnabled: boolean;
minSentenceWordsForNPlusOne: number | undefined;
}
let parserEnrichmentWorkerRuntimeModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
| null = null;
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
let parserEnrichmentFallbackModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-stage')>
| null = null;
function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
if (!options.nPlusOneEnabled) {
return () => false;
}
return deps.isKnownWord;
}
function needsMecabPosEnrichment(options: TokenizerAnnotationOptions): boolean {
return options.jlptEnabled || options.frequencyEnabled;
}
function hasAnyAnnotationEnabled(options: TokenizerAnnotationOptions): boolean {
return options.nPlusOneEnabled || options.jlptEnabled || options.frequencyEnabled;
}
async function enrichTokensWithMecabAsync(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
if (!parserEnrichmentWorkerRuntimeModulePromise) {
parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
}
try {
const runtime = await parserEnrichmentWorkerRuntimeModulePromise;
return await runtime.enrichTokensWithMecabPos1Async(tokens, mecabTokens);
} catch {
if (!parserEnrichmentFallbackModulePromise) {
parserEnrichmentFallbackModulePromise = import('./tokenizer/parser-enrichment-stage');
}
const fallback = await parserEnrichmentFallbackModulePromise;
return fallback.enrichTokensWithMecabPos1(tokens, mecabTokens);
}
}
async function applyAnnotationStage(
tokens: MergedToken[],
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): Promise<MergedToken[]> {
if (!hasAnyAnnotationEnabled(options)) {
return tokens;
}
if (!annotationStageModulePromise) {
annotationStageModulePromise = import('./tokenizer/annotation-stage');
}
const annotationStage = await annotationStageModulePromise;
return annotationStage.annotateTokens(
tokens,
{
isKnownWord: getKnownWordLookup(deps, options),
knownWordMatchMode: deps.getKnownWordMatchMode(),
getJlptLevel: deps.getJlptLevel,
getFrequencyRank: deps.getFrequencyRank,
},
options,
);
}
export function createTokenizerDepsRuntime(
options: TokenizerDepsRuntimeOptions,
): TokenizerServiceDeps {
@@ -76,6 +158,7 @@ export function createTokenizerDepsRuntime(
isKnownWord: options.isKnownWord,
getKnownWordMatchMode: options.getKnownWordMatchMode,
getJlptLevel: options.getJlptLevel,
getNPlusOneEnabled: options.getNPlusOneEnabled,
getJlptEnabled: options.getJlptEnabled,
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
getFrequencyRank: options.getFrequencyRank,
@@ -104,8 +187,11 @@ export function createTokenizerDepsRuntime(
return null;
}
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode());
const isKnownWordLookup = options.getNPlusOneEnabled?.() === false ? () => false : options.isKnownWord;
return mergeTokens(rawTokens, isKnownWordLookup, options.getKnownWordMatchMode());
},
enrichTokensWithMecab: async (tokens, mecabTokens) =>
enrichTokensWithMecabAsync(tokens, mecabTokens),
};
}
@@ -128,36 +214,19 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void {
});
}
function getAnnotationOptions(deps: TokenizerServiceDeps): {
jlptEnabled: boolean;
frequencyEnabled: boolean;
minSentenceWordsForNPlusOne: number | undefined;
} {
function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions {
return {
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
jlptEnabled: deps.getJlptEnabled?.() !== false,
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
};
}
function applyAnnotationStage(tokens: MergedToken[], deps: TokenizerServiceDeps): MergedToken[] {
const options = getAnnotationOptions(deps);
return annotateTokens(
tokens,
{
isKnownWord: deps.isKnownWord,
knownWordMatchMode: deps.getKnownWordMatchMode(),
getJlptLevel: deps.getJlptLevel,
getFrequencyRank: deps.getFrequencyRank,
},
options,
);
}
async function parseWithYomitanInternalParser(
text: string,
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): Promise<MergedToken[] | null> {
const parseResults = await requestYomitanParseResults(text, deps, logger);
if (!parseResults) {
@@ -166,7 +235,7 @@ async function parseWithYomitanInternalParser(
const selectedTokens = selectYomitanParseTokens(
parseResults,
deps.isKnownWord,
getKnownWordLookup(deps, options),
deps.getKnownWordMatchMode(),
);
if (!selectedTokens || selectedTokens.length === 0) {
@@ -177,9 +246,14 @@ async function parseWithYomitanInternalParser(
logSelectedYomitanGroups(text, selectedTokens);
}
if (!needsMecabPosEnrichment(options)) {
return selectedTokens;
}
try {
const mecabTokens = await deps.tokenizeWithMecab(text);
return enrichTokensWithMecabPos1(selectedTokens, mecabTokens);
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
} catch (err) {
const error = err as Error;
logger.warn(
@@ -207,12 +281,13 @@ export async function tokenizeSubtitle(
}
const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
const annotationOptions = getAnnotationOptions(deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
if (yomitanTokens && yomitanTokens.length > 0) {
return {
text: displayText,
tokens: applyAnnotationStage(yomitanTokens, deps),
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
};
}

View File

@@ -31,6 +31,7 @@ export interface AnnotationStageDeps {
}
export interface AnnotationStageOptions {
nPlusOneEnabled?: boolean;
jlptEnabled?: boolean;
frequencyEnabled?: boolean;
minSentenceWordsForNPlusOne?: number;
@@ -340,11 +341,14 @@ export function annotateTokens(
deps: AnnotationStageDeps,
options: AnnotationStageOptions = {},
): MergedToken[] {
const knownMarkedTokens = applyKnownWordMarking(
tokens,
deps.isKnownWord,
deps.knownWordMatchMode,
);
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
const knownMarkedTokens = nPlusOneEnabled
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
: tokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
const frequencyEnabled = options.frequencyEnabled !== false;
const frequencyMarkedTokens =
@@ -363,6 +367,14 @@ export function annotateTokens(
jlptLevel: undefined,
}));
if (!nPlusOneEnabled) {
return jlptMarkedTokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
}
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
const sanitizedMinSentenceWordsForNPlusOne =
minSentenceWordsForNPlusOne !== undefined &&

View File

@@ -0,0 +1,147 @@
import type { MergedToken } from '../../../types';
import { createLogger } from '../../../logger';
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
const logger = createLogger('main:tokenizer');
const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER';
interface WorkerRequest {
id: number;
tokens: MergedToken[];
mecabTokens: MergedToken[] | null;
}
interface WorkerResponse {
id?: unknown;
result?: unknown;
error?: unknown;
}
type PendingRequest = {
resolve: (value: MergedToken[]) => void;
reject: (reason?: unknown) => void;
};
class ParserEnrichmentWorkerRuntime {
private worker: import('node:worker_threads').Worker | null = null;
private nextRequestId = 1;
private pending = new Map<number, PendingRequest>();
private initAttempted = false;
async enrichTokens(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
const worker = await this.getWorker();
if (!worker) {
return enrichTokensWithMecabPos1(tokens, mecabTokens);
}
return new Promise<MergedToken[]>((resolve, reject) => {
const id = this.nextRequestId++;
this.pending.set(id, { resolve, reject });
const request: WorkerRequest = { id, tokens, mecabTokens };
worker.postMessage(request);
});
}
private async getWorker(): Promise<import('node:worker_threads').Worker | null> {
if (process.env[DISABLE_WORKER_ENV] === '1') {
return null;
}
if (this.worker) {
return this.worker;
}
if (this.initAttempted) {
return null;
}
this.initAttempted = true;
let workerThreads: typeof import('node:worker_threads');
try {
workerThreads = await import('node:worker_threads');
} catch {
return null;
}
let workerPath = '';
try {
workerPath = require.resolve('./parser-enrichment-worker-thread.js');
} catch {
return null;
}
try {
const worker = new workerThreads.Worker(workerPath);
worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message));
worker.on('error', (error: Error) => this.handleWorkerFailure(error));
worker.on('exit', (code: number) => {
if (code !== 0) {
this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`));
} else {
this.worker = null;
}
});
this.worker = worker;
return worker;
} catch (error) {
logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`);
return null;
}
}
private handleWorkerMessage(message: WorkerResponse): void {
if (typeof message.id !== 'number') {
return;
}
const request = this.pending.get(message.id);
if (!request) {
return;
}
this.pending.delete(message.id);
if (typeof message.error === 'string' && message.error.length > 0) {
request.reject(new Error(message.error));
return;
}
if (!Array.isArray(message.result)) {
request.reject(new Error('Parser enrichment worker returned invalid payload'));
return;
}
request.resolve(message.result as MergedToken[]);
}
private handleWorkerFailure(error: Error): void {
logger.debug(`Parser enrichment worker unavailable, falling back to main thread: ${error.message}`);
for (const pending of this.pending.values()) {
pending.reject(error);
}
this.pending.clear();
if (this.worker) {
this.worker.removeAllListeners();
this.worker = null;
}
}
}
let runtime: ParserEnrichmentWorkerRuntime | null = null;
export async function enrichTokensWithMecabPos1Async(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
if (!runtime) {
runtime = new ParserEnrichmentWorkerRuntime();
}
try {
return await runtime.enrichTokens(tokens, mecabTokens);
} catch {
return enrichTokensWithMecabPos1(tokens, mecabTokens);
}
}

View File

@@ -0,0 +1,25 @@
import { parentPort } from 'node:worker_threads';
import type { MergedToken } from '../../../types';
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
interface WorkerRequest {
id: number;
tokens: MergedToken[];
mecabTokens: MergedToken[] | null;
}
if (!parentPort) {
throw new Error('parser-enrichment worker missing parent port');
}
const port = parentPort;
port.on('message', (message: WorkerRequest) => {
try {
const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens);
port.postMessage({ id: message.id, result });
} catch (error) {
const messageText = error instanceof Error ? error.message : String(error);
port.postMessage({ id: message.id, error: messageText });
}
});

View File

@@ -0,0 +1,83 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime';
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
const parserWindow = {
isDestroyed: () => false,
webContents: {
executeJavaScript: async (script: string) => await executeJavaScript(script),
},
};
return {
getYomitanExt: () => ({ id: 'ext-id' }) as never,
getYomitanParserWindow: () => parserWindow as never,
setYomitanParserWindow: () => undefined,
getYomitanParserReadyPromise: () => null,
setYomitanParserReadyPromise: () => undefined,
getYomitanParserInitPromise: () => null,
setYomitanParserInitPromise: () => undefined,
};
}
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
let scriptValue = '';
const deps = createDeps(async (script) => {
scriptValue = script;
return { updated: true };
});
const infoLogs: string[] = [];
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: () => undefined,
info: (message) => infoLogs.push(message),
});
assert.equal(updated, true);
assert.match(scriptValue, /optionsGetFull/);
assert.match(scriptValue, /setAllSettings/);
assert.equal(infoLogs.length, 1);
});
test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => {
const deps = createDeps(async () => ({ updated: false }));
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: () => undefined,
info: () => undefined,
});
assert.equal(updated, false);
});
test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
const deps = createDeps(async () => {
throw new Error('execute failed');
});
const errorLogs: string[] = [];
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: (message) => errorLogs.push(message),
info: () => undefined,
});
assert.equal(updated, false);
assert.equal(errorLogs.length, 1);
});
test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
let executeCount = 0;
const deps = createDeps(async () => {
executeCount += 1;
return { updated: true };
});
const updated = await syncYomitanDefaultAnkiServer(' ', deps, {
error: () => undefined,
info: () => undefined,
});
assert.equal(updated, false);
assert.equal(executeCount, 0);
});

View File

@@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron';
interface LoggerLike {
error: (message: string, ...args: unknown[]) => void;
info?: (message: string, ...args: unknown[]) => void;
}
interface YomitanParserRuntimeDeps {
@@ -152,3 +153,90 @@ export async function requestYomitanParseResults(
return null;
}
}
export async function syncYomitanDefaultAnkiServer(
serverUrl: string,
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
): Promise<boolean> {
const normalizedTargetServer = serverUrl.trim();
if (!normalizedTargetServer) {
return false;
}
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
return false;
}
const script = `
(async () => {
const invoke = (action, params) =>
new Promise((resolve, reject) => {
chrome.runtime.sendMessage({ action, params }, (response) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
return;
}
if (!response || typeof response !== "object") {
reject(new Error("Invalid response from Yomitan backend"));
return;
}
if (response.error) {
reject(new Error(response.error.message || "Yomitan backend error"));
return;
}
resolve(response.result);
});
});
const targetServer = ${JSON.stringify(normalizedTargetServer)};
const optionsFull = await invoke("optionsGetFull", undefined);
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
if (profiles.length === 0) {
return { updated: false, reason: "no-profiles" };
}
const defaultProfile = profiles[0];
if (!defaultProfile || typeof defaultProfile !== "object") {
return { updated: false, reason: "invalid-default-profile" };
}
defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object"
? defaultProfile.options
: {};
defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object"
? defaultProfile.options.anki
: {};
const currentServerRaw = defaultProfile.options.anki.server;
const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
const canReplaceDefault =
currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
if (!canReplaceDefault || currentServer === targetServer) {
return { updated: false, reason: "no-change", currentServer, targetServer };
}
defaultProfile.options.anki.server = targetServer;
await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
return { updated: true, currentServer, targetServer };
})();
`;
try {
const result = await parserWindow.webContents.executeJavaScript(script, true);
const updated =
typeof result === 'object' &&
result !== null &&
(result as { updated?: unknown }).updated === true;
if (updated) {
logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`);
return true;
}
return false;
} catch (err) {
logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message);
return false;
}
}

View File

@@ -19,6 +19,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () =>
isKnownWord: (text) => text === 'known',
recordLookup: (hit) => calls.push(`lookup:${hit}`),
getKnownWordMatchMode: () => 'surface',
getNPlusOneEnabled: () => true,
getMinSentenceWordsForNPlusOne: () => 3,
getJlptLevel: () => 'N2',
getJlptEnabled: () => true,
@@ -33,6 +34,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () =>
deps.setYomitanParserWindow(null);
deps.setYomitanParserReadyPromise(null);
deps.setYomitanParserInitPromise(null);
assert.equal(deps.getNPlusOneEnabled?.(), true);
assert.equal(deps.getMinSentenceWordsForNPlusOne?.(), 3);
assert.deepEqual(calls, ['lookup:true', 'lookup:false', 'set-window', 'set-ready', 'set-init']);
});

View File

@@ -32,6 +32,11 @@ export function createBuildTokenizerDepsMainHandler(deps: TokenizerMainDeps) {
return hit;
},
getKnownWordMatchMode: () => deps.getKnownWordMatchMode(),
...(deps.getNPlusOneEnabled
? {
getNPlusOneEnabled: () => deps.getNPlusOneEnabled!(),
}
: {}),
getMinSentenceWordsForNPlusOne: () => deps.getMinSentenceWordsForNPlusOne(),
getJlptLevel: (text: string) => deps.getJlptLevel(text),
getJlptEnabled: () => deps.getJlptEnabled(),