mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-12 04:19:25 -07:00
Add annotation websocket and texthooker startup config
- Add `texthooker.launchAtStartup` (default `true`) and wire startup behavior - Add dedicated `annotationWebsocket` config/service path (default port `6678`) for texthooker annotations - Regenerate config example/tests and update Yomitan patching/vendor assets
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { serializeSubtitleMarkup, serializeSubtitleWebsocketMessage } from './subtitle-ws';
|
||||
import {
|
||||
serializeInitialSubtitleWebsocketMessage,
|
||||
serializeSubtitleMarkup,
|
||||
serializeSubtitleWebsocketMessage,
|
||||
} from './subtitle-ws';
|
||||
import { PartOfSpeech, type SubtitleData } from '../../types';
|
||||
|
||||
const frequencyOptions = {
|
||||
@@ -78,6 +82,51 @@ test('serializeSubtitleMarkup includes known, n+1, jlpt, and frequency classes',
|
||||
assert.match(markup, /word word-frequency-band-1/);
|
||||
});
|
||||
|
||||
test('serializeSubtitleMarkup preserves tooltip attrs and name-match precedence', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: 'ignored',
|
||||
tokens: [
|
||||
{
|
||||
surface: '無事',
|
||||
reading: 'ぶじ',
|
||||
headword: '無事',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
jlptLevel: 'N2',
|
||||
frequencyRank: 745,
|
||||
},
|
||||
{
|
||||
surface: 'アレクシア',
|
||||
reading: 'あれくしあ',
|
||||
headword: 'アレクシア',
|
||||
startPos: 2,
|
||||
endPos: 7,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: true,
|
||||
frequencyRank: 12,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
|
||||
assert.match(
|
||||
markup,
|
||||
/<span class="word word-known word-jlpt-n2" data-reading="ぶじ" data-headword="無事" data-frequency-rank="745" data-jlpt-level="N2">無事<\/span>/,
|
||||
);
|
||||
assert.match(
|
||||
markup,
|
||||
/<span class="word word-name-match" data-reading="あれくしあ" data-headword="アレクシア" data-frequency-rank="12">アレクシア<\/span>/,
|
||||
);
|
||||
assert.doesNotMatch(markup, /word-name-match word-known|word-known word-name-match/);
|
||||
});
|
||||
|
||||
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: '字幕',
|
||||
@@ -85,5 +134,101 @@ test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
|
||||
};
|
||||
|
||||
const raw = serializeSubtitleWebsocketMessage(payload, frequencyOptions);
|
||||
assert.deepEqual(JSON.parse(raw), { sentence: '字幕' });
|
||||
assert.deepEqual(JSON.parse(raw), {
|
||||
version: 1,
|
||||
text: '字幕',
|
||||
sentence: '字幕',
|
||||
tokens: [],
|
||||
});
|
||||
});
|
||||
|
||||
test('serializeSubtitleWebsocketMessage emits structured token api payload', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: '無事',
|
||||
tokens: [
|
||||
{
|
||||
surface: '無事',
|
||||
reading: 'ぶじ',
|
||||
headword: '無事',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
jlptLevel: 'N2',
|
||||
frequencyRank: 745,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const raw = serializeSubtitleWebsocketMessage(payload, frequencyOptions);
|
||||
assert.deepEqual(JSON.parse(raw), {
|
||||
version: 1,
|
||||
text: '無事',
|
||||
sentence:
|
||||
'<span class="word word-known word-jlpt-n2" data-reading="ぶじ" data-headword="無事" data-frequency-rank="745" data-jlpt-level="N2">無事</span>',
|
||||
tokens: [
|
||||
{
|
||||
surface: '無事',
|
||||
reading: 'ぶじ',
|
||||
headword: '無事',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: false,
|
||||
jlptLevel: 'N2',
|
||||
frequencyRank: 745,
|
||||
className: 'word word-known word-jlpt-n2',
|
||||
frequencyRankLabel: '745',
|
||||
jlptLevelLabel: 'N2',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
test('serializeInitialSubtitleWebsocketMessage keeps annotated current subtitle content', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: 'ignored fallback',
|
||||
tokens: [
|
||||
{
|
||||
surface: '既知',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const raw = serializeInitialSubtitleWebsocketMessage(payload, frequencyOptions);
|
||||
assert.deepEqual(JSON.parse(raw ?? ''), {
|
||||
version: 1,
|
||||
text: 'ignored fallback',
|
||||
sentence: '<span class="word word-known">既知</span>',
|
||||
tokens: [
|
||||
{
|
||||
surface: '既知',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: false,
|
||||
className: 'word word-known',
|
||||
frequencyRankLabel: null,
|
||||
jlptLevelLabel: null,
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,6 +18,26 @@ export type SubtitleWebsocketFrequencyOptions = {
|
||||
mode: 'single' | 'banded';
|
||||
};
|
||||
|
||||
type SerializedSubtitleToken = Pick<
|
||||
MergedToken,
|
||||
| 'surface'
|
||||
| 'reading'
|
||||
| 'headword'
|
||||
| 'startPos'
|
||||
| 'endPos'
|
||||
| 'partOfSpeech'
|
||||
| 'isMerged'
|
||||
| 'isKnown'
|
||||
| 'isNPlusOneTarget'
|
||||
| 'frequencyRank'
|
||||
| 'jlptLevel'
|
||||
> & {
|
||||
isNameMatch: boolean;
|
||||
className: string;
|
||||
frequencyRankLabel: string | null;
|
||||
jlptLevelLabel: string | null;
|
||||
};
|
||||
|
||||
function escapeHtml(text: string): string {
|
||||
return text
|
||||
.replaceAll('&', '&')
|
||||
@@ -46,11 +66,29 @@ function computeFrequencyClass(
|
||||
return 'word-frequency-single';
|
||||
}
|
||||
|
||||
function getFrequencyRankLabel(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string | null {
|
||||
if (!options.enabled) return null;
|
||||
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
|
||||
|
||||
const rank = Math.max(1, Math.floor(token.frequencyRank));
|
||||
const topX = Math.max(1, Math.floor(options.topX));
|
||||
return rank <= topX ? String(rank) : null;
|
||||
}
|
||||
|
||||
function getJlptLevelLabel(token: MergedToken): string | null {
|
||||
return token.jlptLevel ?? null;
|
||||
}
|
||||
|
||||
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
|
||||
const classes = ['word'];
|
||||
|
||||
if (token.isNPlusOneTarget) {
|
||||
classes.push('word-n-plus-one');
|
||||
} else if (token.isNameMatch) {
|
||||
classes.push('word-name-match');
|
||||
} else if (token.isKnown) {
|
||||
classes.push('word-known');
|
||||
}
|
||||
@@ -59,7 +97,7 @@ function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequenc
|
||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||
}
|
||||
|
||||
if (!token.isKnown && !token.isNPlusOneTarget) {
|
||||
if (!token.isKnown && !token.isNPlusOneTarget && !token.isNameMatch) {
|
||||
const frequencyClass = computeFrequencyClass(token, options);
|
||||
if (frequencyClass) {
|
||||
classes.push(frequencyClass);
|
||||
@@ -69,6 +107,55 @@ function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequenc
|
||||
return classes.join(' ');
|
||||
}
|
||||
|
||||
function serializeWordDataAttributes(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string {
|
||||
const attributes: string[] = [];
|
||||
|
||||
if (token.reading) {
|
||||
attributes.push(`data-reading="${escapeHtml(token.reading)}"`);
|
||||
}
|
||||
if (token.headword) {
|
||||
attributes.push(`data-headword="${escapeHtml(token.headword)}"`);
|
||||
}
|
||||
|
||||
const frequencyRankLabel = getFrequencyRankLabel(token, options);
|
||||
if (frequencyRankLabel) {
|
||||
attributes.push(`data-frequency-rank="${escapeHtml(frequencyRankLabel)}"`);
|
||||
}
|
||||
|
||||
const jlptLevelLabel = getJlptLevelLabel(token);
|
||||
if (jlptLevelLabel) {
|
||||
attributes.push(`data-jlpt-level="${escapeHtml(jlptLevelLabel)}"`);
|
||||
}
|
||||
|
||||
return attributes.length > 0 ? ` ${attributes.join(' ')}` : '';
|
||||
}
|
||||
|
||||
function serializeSubtitleToken(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): SerializedSubtitleToken {
|
||||
return {
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
startPos: token.startPos,
|
||||
endPos: token.endPos,
|
||||
partOfSpeech: token.partOfSpeech,
|
||||
isMerged: token.isMerged,
|
||||
isKnown: token.isKnown,
|
||||
isNPlusOneTarget: token.isNPlusOneTarget,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
jlptLevel: token.jlptLevel,
|
||||
frequencyRank: token.frequencyRank,
|
||||
className: computeWordClass(token, options),
|
||||
frequencyRankLabel: getFrequencyRankLabel(token, options),
|
||||
jlptLevelLabel: getJlptLevelLabel(token),
|
||||
};
|
||||
}
|
||||
|
||||
export function serializeSubtitleMarkup(
|
||||
payload: SubtitleData,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
@@ -80,11 +167,12 @@ export function serializeSubtitleMarkup(
|
||||
const chunks: string[] = [];
|
||||
for (const token of payload.tokens) {
|
||||
const klass = computeWordClass(token, options);
|
||||
const attrs = serializeWordDataAttributes(token, options);
|
||||
const parts = token.surface.split('\n');
|
||||
for (let index = 0; index < parts.length; index += 1) {
|
||||
const part = parts[index];
|
||||
if (part) {
|
||||
chunks.push(`<span class="${klass}">${escapeHtml(part)}</span>`);
|
||||
chunks.push(`<span class="${klass}"${attrs}>${escapeHtml(part)}</span>`);
|
||||
}
|
||||
if (index < parts.length - 1) {
|
||||
chunks.push('<br>');
|
||||
@@ -99,7 +187,23 @@ export function serializeSubtitleWebsocketMessage(
|
||||
payload: SubtitleData,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string {
|
||||
return JSON.stringify({ sentence: serializeSubtitleMarkup(payload, options) });
|
||||
return JSON.stringify({
|
||||
version: 1,
|
||||
text: payload.text,
|
||||
sentence: serializeSubtitleMarkup(payload, options),
|
||||
tokens: payload.tokens?.map((token) => serializeSubtitleToken(token, options)) ?? [],
|
||||
});
|
||||
}
|
||||
|
||||
export function serializeInitialSubtitleWebsocketMessage(
|
||||
payload: SubtitleData | null,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string | null {
|
||||
if (!payload || !payload.text.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return serializeSubtitleWebsocketMessage(payload, options);
|
||||
}
|
||||
|
||||
export class SubtitleWebSocket {
|
||||
@@ -114,7 +218,11 @@ export class SubtitleWebSocket {
|
||||
return (this.server?.clients.size ?? 0) > 0;
|
||||
}
|
||||
|
||||
public start(port: number, getCurrentSubtitleText: () => string): void {
|
||||
public start(
|
||||
port: number,
|
||||
getCurrentSubtitleData: () => SubtitleData | null,
|
||||
getFrequencyOptions: () => SubtitleWebsocketFrequencyOptions,
|
||||
): void {
|
||||
this.server = new WebSocket.Server({ port, host: '127.0.0.1' });
|
||||
|
||||
this.server.on('connection', (ws: WebSocket) => {
|
||||
@@ -124,9 +232,12 @@ export class SubtitleWebSocket {
|
||||
return;
|
||||
}
|
||||
|
||||
const currentText = getCurrentSubtitleText();
|
||||
if (currentText) {
|
||||
ws.send(JSON.stringify({ sentence: currentText }));
|
||||
const currentMessage = serializeInitialSubtitleWebsocketMessage(
|
||||
getCurrentSubtitleData(),
|
||||
getFrequencyOptions(),
|
||||
);
|
||||
if (currentMessage) {
|
||||
ws.send(currentMessage);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
27
src/core/services/texthooker.test.ts
Normal file
27
src/core/services/texthooker.test.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { injectTexthookerBootstrapHtml } from './texthooker';
|
||||
|
||||
test('injectTexthookerBootstrapHtml injects websocket bootstrap before head close', () => {
|
||||
const html = '<html><head><title>Texthooker</title></head><body></body></html>';
|
||||
|
||||
const actual = injectTexthookerBootstrapHtml(html, 'ws://127.0.0.1:6678');
|
||||
|
||||
assert.match(
|
||||
actual,
|
||||
/window\.localStorage\.setItem\('bannou-texthooker-websocketUrl', "ws:\/\/127\.0\.0\.1:6678"\)/,
|
||||
);
|
||||
assert.ok(actual.indexOf('</script></head>') !== -1);
|
||||
assert.ok(actual.includes("bannou-texthooker-websocketUrl"));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableKnownWordColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableNPlusOneColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableNameMatchColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableFrequencyColoring'));
|
||||
assert.ok(!actual.includes('bannou-texthooker-enableJlptColoring'));
|
||||
});
|
||||
|
||||
test('injectTexthookerBootstrapHtml leaves html unchanged without websocketUrl', () => {
|
||||
const html = '<html><head></head><body></body></html>';
|
||||
|
||||
assert.equal(injectTexthookerBootstrapHtml(html), html);
|
||||
});
|
||||
@@ -5,6 +5,22 @@ import { createLogger } from '../../logger';
|
||||
|
||||
const logger = createLogger('main:texthooker');
|
||||
|
||||
export function injectTexthookerBootstrapHtml(html: string, websocketUrl?: string): string {
|
||||
if (!websocketUrl) {
|
||||
return html;
|
||||
}
|
||||
|
||||
const bootstrapScript = `<script>window.localStorage.setItem('bannou-texthooker-websocketUrl', ${JSON.stringify(
|
||||
websocketUrl,
|
||||
)});</script>`;
|
||||
|
||||
if (html.includes('</head>')) {
|
||||
return html.replace('</head>', `${bootstrapScript}</head>`);
|
||||
}
|
||||
|
||||
return `${bootstrapScript}${html}`;
|
||||
}
|
||||
|
||||
export class Texthooker {
|
||||
private server: http.Server | null = null;
|
||||
|
||||
@@ -12,7 +28,11 @@ export class Texthooker {
|
||||
return this.server !== null;
|
||||
}
|
||||
|
||||
public start(port: number): http.Server | null {
|
||||
public start(port: number, websocketUrl?: string): http.Server | null {
|
||||
if (this.server) {
|
||||
return this.server;
|
||||
}
|
||||
|
||||
const texthookerPath = this.getTexthookerPath();
|
||||
if (!texthookerPath) {
|
||||
logger.error('texthooker-ui not found');
|
||||
@@ -42,8 +62,12 @@ export class Texthooker {
|
||||
res.end('Not found');
|
||||
return;
|
||||
}
|
||||
const responseData =
|
||||
urlPath === '/' || urlPath === '/index.html'
|
||||
? Buffer.from(injectTexthookerBootstrapHtml(data.toString('utf-8'), websocketUrl))
|
||||
: data;
|
||||
res.writeHead(200, { 'Content-Type': mimeTypes[ext] || 'text/plain' });
|
||||
res.end(data);
|
||||
res.end(responseData);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user