mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
feat(subtitles): add line-break display toggle and narrow-space normalization
This commit is contained in:
@@ -24,6 +24,7 @@ test('loads defaults when config is missing', () => {
|
||||
assert.equal(config.jellyfin.autoAnnounce, false);
|
||||
assert.equal(config.jellyfin.remoteControlDeviceName, 'SubMiner');
|
||||
assert.equal(config.subtitleStyle.backgroundColor, 'rgb(30, 32, 48, 0.88)');
|
||||
assert.equal(config.subtitleStyle.preserveLineBreaks, false);
|
||||
assert.equal(config.immersionTracking.enabled, true);
|
||||
assert.equal(config.immersionTracking.dbPath, '');
|
||||
assert.equal(config.immersionTracking.batchSize, 25);
|
||||
@@ -38,6 +39,44 @@ test('loads defaults when config is missing', () => {
|
||||
assert.equal(config.immersionTracking.retention.vacuumIntervalDays, 7);
|
||||
});
|
||||
|
||||
test('parses subtitleStyle.preserveLineBreaks and warns on invalid values', () => {
|
||||
const validDir = makeTempDir();
|
||||
fs.writeFileSync(
|
||||
path.join(validDir, 'config.jsonc'),
|
||||
`{
|
||||
"subtitleStyle": {
|
||||
"preserveLineBreaks": true
|
||||
}
|
||||
}`,
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const validService = new ConfigService(validDir);
|
||||
assert.equal(validService.getConfig().subtitleStyle.preserveLineBreaks, true);
|
||||
|
||||
const invalidDir = makeTempDir();
|
||||
fs.writeFileSync(
|
||||
path.join(invalidDir, 'config.jsonc'),
|
||||
`{
|
||||
"subtitleStyle": {
|
||||
"preserveLineBreaks": "yes"
|
||||
}
|
||||
}`,
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const invalidService = new ConfigService(invalidDir);
|
||||
assert.equal(
|
||||
invalidService.getConfig().subtitleStyle.preserveLineBreaks,
|
||||
DEFAULT_CONFIG.subtitleStyle.preserveLineBreaks,
|
||||
);
|
||||
assert.ok(
|
||||
invalidService
|
||||
.getWarnings()
|
||||
.some((warning) => warning.path === 'subtitleStyle.preserveLineBreaks'),
|
||||
);
|
||||
});
|
||||
|
||||
test('parses anilist.enabled and warns for invalid value', () => {
|
||||
const dir = makeTempDir();
|
||||
fs.writeFileSync(
|
||||
@@ -885,6 +924,7 @@ test('template generator includes known keys', () => {
|
||||
assert.match(output, /"logging":/);
|
||||
assert.match(output, /"websocket":/);
|
||||
assert.match(output, /"youtubeSubgen":/);
|
||||
assert.match(output, /"preserveLineBreaks": false/);
|
||||
assert.match(output, /"nPlusOne"\s*:\s*\{/);
|
||||
assert.match(output, /"nPlusOne": "#c6a0f6"/);
|
||||
assert.match(output, /"knownWord": "#a6da95"/);
|
||||
|
||||
@@ -172,6 +172,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
||||
},
|
||||
subtitleStyle: {
|
||||
enableJlpt: false,
|
||||
preserveLineBreaks: false,
|
||||
fontFamily: 'Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif',
|
||||
fontSize: 35,
|
||||
fontColor: '#cad3f5',
|
||||
@@ -343,6 +344,14 @@ export const CONFIG_OPTION_REGISTRY: ConfigOptionRegistryEntry[] = [
|
||||
'Enable JLPT vocabulary level underlines. ' +
|
||||
'When disabled, JLPT tagging lookup and underlines are skipped.',
|
||||
},
|
||||
{
|
||||
path: 'subtitleStyle.preserveLineBreaks',
|
||||
kind: 'boolean',
|
||||
defaultValue: DEFAULT_CONFIG.subtitleStyle.preserveLineBreaks,
|
||||
description:
|
||||
'Preserve line breaks in visible overlay subtitle rendering. ' +
|
||||
'When false, line breaks are flattened to spaces for a single-line flow.',
|
||||
},
|
||||
{
|
||||
path: 'subtitleStyle.frequencyDictionary.enabled',
|
||||
kind: 'boolean',
|
||||
|
||||
@@ -746,6 +746,8 @@ export class ConfigService {
|
||||
}
|
||||
|
||||
if (isObject(src.subtitleStyle)) {
|
||||
const fallbackSubtitleStyleEnableJlpt = resolved.subtitleStyle.enableJlpt;
|
||||
const fallbackSubtitleStylePreserveLineBreaks = resolved.subtitleStyle.preserveLineBreaks;
|
||||
resolved.subtitleStyle = {
|
||||
...resolved.subtitleStyle,
|
||||
...(src.subtitleStyle as ResolvedConfig['subtitleStyle']),
|
||||
@@ -761,6 +763,7 @@ export class ConfigService {
|
||||
if (enableJlpt !== undefined) {
|
||||
resolved.subtitleStyle.enableJlpt = enableJlpt;
|
||||
} else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
|
||||
resolved.subtitleStyle.enableJlpt = fallbackSubtitleStyleEnableJlpt;
|
||||
warn(
|
||||
'subtitleStyle.enableJlpt',
|
||||
(src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
|
||||
@@ -769,6 +772,23 @@ export class ConfigService {
|
||||
);
|
||||
}
|
||||
|
||||
const preserveLineBreaks = asBoolean(
|
||||
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks,
|
||||
);
|
||||
if (preserveLineBreaks !== undefined) {
|
||||
resolved.subtitleStyle.preserveLineBreaks = preserveLineBreaks;
|
||||
} else if (
|
||||
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks !== undefined
|
||||
) {
|
||||
resolved.subtitleStyle.preserveLineBreaks = fallbackSubtitleStylePreserveLineBreaks;
|
||||
warn(
|
||||
'subtitleStyle.preserveLineBreaks',
|
||||
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks,
|
||||
resolved.subtitleStyle.preserveLineBreaks,
|
||||
'Expected boolean.',
|
||||
);
|
||||
}
|
||||
|
||||
const frequencyDictionary = isObject(
|
||||
(src.subtitleStyle as { frequencyDictionary?: unknown }).frequencyDictionary,
|
||||
)
|
||||
|
||||
@@ -79,6 +79,7 @@ export type RendererState = {
|
||||
jlptN3Color: string;
|
||||
jlptN4Color: string;
|
||||
jlptN5Color: string;
|
||||
preserveSubtitleLineBreaks: boolean;
|
||||
frequencyDictionaryEnabled: boolean;
|
||||
frequencyDictionaryTopX: number;
|
||||
frequencyDictionaryMode: 'single' | 'banded';
|
||||
@@ -155,6 +156,7 @@ export function createRendererState(): RendererState {
|
||||
jlptN3Color: '#f9e2af',
|
||||
jlptN4Color: '#a6e3a1',
|
||||
jlptN5Color: '#8aadf4',
|
||||
preserveSubtitleLineBreaks: false,
|
||||
frequencyDictionaryEnabled: false,
|
||||
frequencyDictionaryTopX: 1000,
|
||||
frequencyDictionaryMode: 'single',
|
||||
|
||||
@@ -5,7 +5,7 @@ import path from 'node:path';
|
||||
|
||||
import type { MergedToken } from '../types';
|
||||
import { PartOfSpeech } from '../types.js';
|
||||
import { computeWordClass } from './subtitle-render.js';
|
||||
import { alignTokensToSourceText, computeWordClass } from './subtitle-render.js';
|
||||
|
||||
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||
return {
|
||||
@@ -203,6 +203,19 @@ test('computeWordClass skips frequency class when rank is out of topX', () => {
|
||||
assert.equal(actual, 'word');
|
||||
});
|
||||
|
||||
test('alignTokensToSourceText preserves newline separators between adjacent token surfaces', () => {
|
||||
const tokens = [
|
||||
createToken({ surface: 'キリキリと', reading: 'きりきりと', headword: 'キリキリと' }),
|
||||
createToken({ surface: 'かかってこい', reading: 'かかってこい', headword: 'かかってこい' }),
|
||||
];
|
||||
|
||||
const segments = alignTokensToSourceText(tokens, 'キリキリと\nかかってこい');
|
||||
assert.deepEqual(
|
||||
segments.map((segment) => (segment.kind === 'text' ? `text:${segment.text}` : 'token')),
|
||||
['token', 'text:\n', 'token'],
|
||||
);
|
||||
});
|
||||
|
||||
test('JLPT CSS rules use underline-only styling in renderer stylesheet', () => {
|
||||
const distCssPath = path.join(process.cwd(), 'dist', 'renderer', 'style.css');
|
||||
const srcCssPath = path.join(process.cwd(), 'src', 'renderer', 'style.css');
|
||||
|
||||
@@ -9,11 +9,15 @@ type FrequencyRenderSettings = {
|
||||
bandedColors: [string, string, string, string, string];
|
||||
};
|
||||
|
||||
function normalizeSubtitle(text: string, trim = true): string {
|
||||
function normalizeSubtitle(text: string, trim = true, collapseLineBreaks = false): string {
|
||||
if (!text) return '';
|
||||
|
||||
let normalized = text.replace(/\\N/g, '\n').replace(/\\n/g, '\n');
|
||||
normalized = normalized.replace(/\{[^}]*\}/g, '');
|
||||
if (collapseLineBreaks) {
|
||||
normalized = normalized.replace(/\n/g, ' ');
|
||||
normalized = normalized.replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
return trim ? normalized.trim() : normalized;
|
||||
}
|
||||
@@ -90,6 +94,8 @@ function renderWithTokens(
|
||||
root: HTMLElement,
|
||||
tokens: MergedToken[],
|
||||
frequencyRenderSettings?: Partial<FrequencyRenderSettings>,
|
||||
sourceText?: string,
|
||||
preserveLineBreaks = false,
|
||||
): void {
|
||||
const resolvedFrequencyRenderSettings = {
|
||||
...DEFAULT_FREQUENCY_RENDER_SETTINGS,
|
||||
@@ -110,6 +116,29 @@ function renderWithTokens(
|
||||
|
||||
const fragment = document.createDocumentFragment();
|
||||
|
||||
if (preserveLineBreaks && sourceText) {
|
||||
const normalizedSource = normalizeSubtitle(sourceText, true, false);
|
||||
const segments = alignTokensToSourceText(tokens, normalizedSource);
|
||||
|
||||
for (const segment of segments) {
|
||||
if (segment.kind === 'text') {
|
||||
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
||||
continue;
|
||||
}
|
||||
|
||||
const token = segment.token;
|
||||
const span = document.createElement('span');
|
||||
span.className = computeWordClass(token, resolvedFrequencyRenderSettings);
|
||||
span.textContent = token.surface;
|
||||
if (token.reading) span.dataset.reading = token.reading;
|
||||
if (token.headword) span.dataset.headword = token.headword;
|
||||
fragment.appendChild(span);
|
||||
}
|
||||
|
||||
root.appendChild(fragment);
|
||||
return;
|
||||
}
|
||||
|
||||
for (const token of tokens) {
|
||||
const surface = token.surface;
|
||||
|
||||
@@ -142,6 +171,50 @@ function renderWithTokens(
|
||||
root.appendChild(fragment);
|
||||
}
|
||||
|
||||
type SubtitleRenderSegment = { kind: 'text'; text: string } | { kind: 'token'; token: MergedToken };
|
||||
|
||||
export function alignTokensToSourceText(
|
||||
tokens: MergedToken[],
|
||||
sourceText: string,
|
||||
): SubtitleRenderSegment[] {
|
||||
if (tokens.length === 0) {
|
||||
return sourceText ? [{ kind: 'text', text: sourceText }] : [];
|
||||
}
|
||||
|
||||
const segments: SubtitleRenderSegment[] = [];
|
||||
let cursor = 0;
|
||||
|
||||
for (const token of tokens) {
|
||||
const surface = token.surface;
|
||||
if (!surface) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const foundIndex = sourceText.indexOf(surface, cursor);
|
||||
if (foundIndex < 0) {
|
||||
if (cursor < sourceText.length) {
|
||||
segments.push({ kind: 'text', text: sourceText.slice(cursor) });
|
||||
}
|
||||
segments.push({ kind: 'token', token });
|
||||
cursor = sourceText.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (foundIndex > cursor) {
|
||||
segments.push({ kind: 'text', text: sourceText.slice(cursor, foundIndex) });
|
||||
}
|
||||
|
||||
segments.push({ kind: 'token', token });
|
||||
cursor = foundIndex + surface.length;
|
||||
}
|
||||
|
||||
if (cursor < sourceText.length) {
|
||||
segments.push({ kind: 'text', text: sourceText.slice(cursor) });
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
export function computeWordClass(
|
||||
token: MergedToken,
|
||||
frequencySettings?: Partial<FrequencyRenderSettings>,
|
||||
@@ -199,7 +272,7 @@ function renderCharacterLevel(root: HTMLElement, text: string): void {
|
||||
root.appendChild(fragment);
|
||||
}
|
||||
|
||||
function renderPlainTextPreserveLineBreaks(root: HTMLElement, text: string): void {
|
||||
function renderPlainTextPreserveLineBreaks(root: ParentNode, text: string): void {
|
||||
const lines = text.split('\n');
|
||||
const fragment = document.createDocumentFragment();
|
||||
|
||||
@@ -246,7 +319,13 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
||||
|
||||
const normalized = normalizeSubtitle(text);
|
||||
if (tokens && tokens.length > 0) {
|
||||
renderWithTokens(ctx.dom.subtitleRoot, tokens, getFrequencyRenderSettings());
|
||||
renderWithTokens(
|
||||
ctx.dom.subtitleRoot,
|
||||
tokens,
|
||||
getFrequencyRenderSettings(),
|
||||
text,
|
||||
ctx.state.preserveSubtitleLineBreaks,
|
||||
);
|
||||
return;
|
||||
}
|
||||
renderCharacterLevel(ctx.dom.subtitleRoot, normalized);
|
||||
@@ -346,6 +425,7 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
||||
ctx.state.jlptN3Color = jlptColors.N3;
|
||||
ctx.state.jlptN4Color = jlptColors.N4;
|
||||
ctx.state.jlptN5Color = jlptColors.N5;
|
||||
ctx.state.preserveSubtitleLineBreaks = style.preserveLineBreaks ?? false;
|
||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n1-color', jlptColors.N1);
|
||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n2-color', jlptColors.N2);
|
||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n3-color', jlptColors.N3);
|
||||
|
||||
10
src/subtitle/stages/normalize.test.ts
Normal file
10
src/subtitle/stages/normalize.test.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { normalizeTokenizerInput } from './normalize';
|
||||
|
||||
test('normalizeTokenizerInput collapses zero-width separators between Japanese segments', () => {
|
||||
const input = 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが!';
|
||||
const normalized = normalizeTokenizerInput(input);
|
||||
|
||||
assert.equal(normalized, 'キリキリと かかってこい このヘナチョコ冒険者どもめが!');
|
||||
});
|
||||
@@ -2,6 +2,12 @@ export function normalizeDisplayText(text: string): string {
|
||||
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
||||
}
|
||||
|
||||
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
|
||||
|
||||
export function normalizeTokenizerInput(displayText: string): string {
|
||||
return displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
return displayText
|
||||
.replace(/\n/g, ' ')
|
||||
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
@@ -270,6 +270,7 @@ export interface AnkiConnectConfig {
|
||||
|
||||
export interface SubtitleStyleConfig {
|
||||
enableJlpt?: boolean;
|
||||
preserveLineBreaks?: boolean;
|
||||
fontFamily?: string;
|
||||
fontSize?: number;
|
||||
fontColor?: string;
|
||||
|
||||
Reference in New Issue
Block a user