refactor: remove dead registry and pipeline modules

This commit is contained in:
2026-03-06 11:04:17 -08:00
parent e659b5d8f4
commit 39976c03f9
13 changed files with 113 additions and 340 deletions

View File

@@ -3,10 +3,10 @@ id: TASK-87.5
title: >-
Dead architecture cleanup: delete unused registry and pipeline modules that
are off the live path
status: To Do
status: Done
assignee: []
created_date: '2026-03-06 03:20'
updated_date: '2026-03-06 03:21'
updated_date: '2026-03-06 11:05'
labels:
- tech-debt
- dead-code
@@ -40,10 +40,10 @@ The review found several modules that appear self-contained but unused from the
<!-- AC:BEGIN -->
- [ ] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path.
- [ ] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior.
- [ ] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green.
- [ ] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design.
- [x] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path.
- [x] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior.
- [x] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green.
- [x] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design.
<!-- AC:END -->
## Implementation Plan
@@ -55,3 +55,10 @@ The review found several modules that appear self-contained but unused from the
3. Pay special attention to subtitle sync and tokenization surfaces, since duplicate architecture exists near active code.
4. Verify the relevant tokenization and subsync commands/tests still pass and update any stale docs or notes.
<!-- SECTION:PLAN:END -->
## Implementation Notes
- Traced imports from `src/main.ts`, `src/main/runtime/**`, `src/core/services/subsync-runner.ts`, and `src/core/services/tokenizer.ts`; confirmed the candidate registry/pipeline modules were isolated from the maintained runtime path.
- Deleted dead modules: `src/translators/index.ts`, `src/subsync/engines.ts`, `src/subtitle/pipeline.ts`, `src/subtitle/stages/{merge,normalize,tokenize}.ts`, `src/subtitle/stages/normalize.test.ts`, `src/tokenizers/index.ts`, and `src/token-mergers/index.ts`.
- Moved the useful zero-width separator normalization into the live tokenizer path in `src/core/services/tokenizer.ts` and added regression coverage plus a repository-level dead-architecture guard in `src/dead-architecture-cleanup.test.ts`.
- Verified with `bun test src/core/services/tokenizer.test.ts`, `bun test src/dead-architecture-cleanup.test.ts`, `bun test src/core/services/subsync.test.ts src/subsync/utils.test.ts`, `bun run tsc`, and `bun run test:src`.

View File

@@ -1235,6 +1235,30 @@ test('tokenizeSubtitle normalizes newlines before Yomitan parse request', async
assert.equal(result.tokens, null);
});
test('tokenizeSubtitle collapses zero-width separators before Yomitan parse request', async () => {
let parseInput = '';
const result = await tokenizeSubtitle(
'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが',
makeDeps({
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
getYomitanParserWindow: () =>
({
isDestroyed: () => false,
webContents: {
executeJavaScript: async (script: string) => {
parseInput = script;
return null;
},
},
}) as unknown as Electron.BrowserWindow,
}),
);
assert.match(parseInput, /キリキリと かかってこい このヘナチョコ冒険者どもめが!/);
assert.equal(result.text, 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが');
assert.equal(result.tokens, null);
});
test('tokenizeSubtitle returns null tokens when Yomitan parsing is unavailable', async () => {
const result = await tokenizeSubtitle('猫です', makeDeps());

View File

@@ -106,6 +106,7 @@ const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet(
const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet(
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
);
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
function getKnownWordLookup(
deps: TokenizerServiceDeps,
@@ -563,7 +564,11 @@ export async function tokenizeSubtitle(
return { text, tokens: null };
}
const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
const tokenizeText = displayText
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
.replace(/\n/g, ' ')
.replace(/\s+/g, ' ')
.trim();
const annotationOptions = getAnnotationOptions(deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);

View File

@@ -0,0 +1,70 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
const DEAD_MODULE_PATHS = [
'src/translators/index.ts',
'src/subsync/engines.ts',
'src/subtitle/pipeline.ts',
'src/subtitle/stages/merge.ts',
'src/subtitle/stages/normalize.ts',
'src/subtitle/stages/normalize.test.ts',
'src/subtitle/stages/tokenize.ts',
'src/tokenizers/index.ts',
'src/token-mergers/index.ts',
] as const;
const FORBIDDEN_IMPORT_PATTERNS = [
/from ['"]\.\.?\/tokenizers['"]/,
/from ['"]\.\.?\/token-mergers['"]/,
/from ['"]\.\.?\/subtitle\/pipeline['"]/,
/from ['"]\.\.?\/subsync\/engines['"]/,
/from ['"]\.\.?\/translators['"]/,
] as const;
function readWorkspaceFile(relativePath: string): string {
return fs.readFileSync(path.join(process.cwd(), relativePath), 'utf8');
}
function collectSourceFiles(rootDir: string): string[] {
const absoluteRoot = path.join(process.cwd(), rootDir);
const out: string[] = [];
const visit = (currentDir: string) => {
for (const entry of fs.readdirSync(currentDir, { withFileTypes: true })) {
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
visit(fullPath);
continue;
}
if (!fullPath.endsWith('.ts') && !fullPath.endsWith('.tsx')) {
continue;
}
out.push(path.relative(process.cwd(), fullPath).replaceAll('\\', '/'));
}
};
visit(absoluteRoot);
out.sort();
return out;
}
test('dead registry and pipeline modules stay removed from the repository', () => {
for (const relativePath of DEAD_MODULE_PATHS) {
assert.equal(
fs.existsSync(path.join(process.cwd(), relativePath)),
false,
`${relativePath} should stay deleted`,
);
}
});
test('live source tree no longer imports dead registry and pipeline modules', () => {
for (const relativePath of collectSourceFiles('src')) {
const source = readWorkspaceFile(relativePath);
for (const pattern of FORBIDDEN_IMPORT_PATTERNS) {
assert.doesNotMatch(source, pattern, `${relativePath} should not import ${pattern.source}`);
}
}
});

View File

@@ -1,79 +0,0 @@
export type SubsyncEngine = 'alass' | 'ffsubsync';
export interface SubsyncCommandResult {
ok: boolean;
code: number | null;
stderr: string;
stdout: string;
error?: string;
}
export interface SubsyncEngineExecutionContext {
referenceFilePath: string;
videoPath: string;
inputSubtitlePath: string;
outputPath: string;
audioStreamIndex: number | null;
resolveExecutablePath: (configuredPath: string, commandName: string) => string;
resolvedPaths: {
alassPath: string;
ffsubsyncPath: string;
};
runCommand: (command: string, args: string[]) => Promise<SubsyncCommandResult>;
}
export interface SubsyncEngineProvider {
engine: SubsyncEngine;
execute: (context: SubsyncEngineExecutionContext) => Promise<SubsyncCommandResult>;
}
type SubsyncEngineProviderFactory = () => SubsyncEngineProvider;
const subsyncEngineProviderFactories = new Map<SubsyncEngine, SubsyncEngineProviderFactory>();
export function registerSubsyncEngineProvider(
engine: SubsyncEngine,
factory: SubsyncEngineProviderFactory,
): void {
if (subsyncEngineProviderFactories.has(engine)) {
return;
}
subsyncEngineProviderFactories.set(engine, factory);
}
export function createSubsyncEngineProvider(engine: SubsyncEngine): SubsyncEngineProvider | null {
const factory = subsyncEngineProviderFactories.get(engine);
if (!factory) return null;
return factory();
}
function registerDefaultSubsyncEngineProviders(): void {
registerSubsyncEngineProvider('alass', () => ({
engine: 'alass',
execute: async (context: SubsyncEngineExecutionContext) => {
const alassPath = context.resolveExecutablePath(context.resolvedPaths.alassPath, 'alass');
return context.runCommand(alassPath, [
context.referenceFilePath,
context.inputSubtitlePath,
context.outputPath,
]);
},
}));
registerSubsyncEngineProvider('ffsubsync', () => ({
engine: 'ffsubsync',
execute: async (context: SubsyncEngineExecutionContext) => {
const ffsubsyncPath = context.resolveExecutablePath(
context.resolvedPaths.ffsubsyncPath,
'ffsubsync',
);
const args = [context.videoPath, '-i', context.inputSubtitlePath, '-o', context.outputPath];
if (context.audioStreamIndex !== null) {
args.push('--reference-stream', `0:${context.audioStreamIndex}`);
}
return context.runCommand(ffsubsyncPath, args);
},
}));
}
registerDefaultSubsyncEngineProviders();

View File

@@ -1,43 +0,0 @@
import { TokenMergerProvider } from '../token-mergers';
import { TokenizerProvider } from '../tokenizers';
import { SubtitleData } from '../types';
import { normalizeDisplayText, normalizeTokenizerInput } from './stages/normalize';
import { tokenizeStage } from './stages/tokenize';
import { mergeStage } from './stages/merge';
export interface SubtitlePipelineDeps {
getTokenizer: () => TokenizerProvider | null;
getTokenMerger: () => TokenMergerProvider | null;
}
export class SubtitlePipeline {
private readonly deps: SubtitlePipelineDeps;
constructor(deps: SubtitlePipelineDeps) {
this.deps = deps;
}
async process(text: string): Promise<SubtitleData> {
if (!text) {
return { text, tokens: null };
}
const displayText = normalizeDisplayText(text);
if (!displayText) {
return { text, tokens: null };
}
const tokenizeText = normalizeTokenizerInput(displayText);
try {
const tokens = await tokenizeStage(this.deps.getTokenizer(), tokenizeText);
const mergedTokens = mergeStage(this.deps.getTokenMerger(), tokens);
if (!mergedTokens || mergedTokens.length === 0) {
return { text: displayText, tokens: null };
}
return { text: displayText, tokens: mergedTokens };
} catch {
return { text: displayText, tokens: null };
}
}
}

View File

@@ -1,12 +0,0 @@
import { TokenMergerProvider } from '../../token-mergers';
import { MergedToken, Token } from '../../types';
export function mergeStage(
mergerProvider: TokenMergerProvider | null,
tokens: Token[] | null,
): MergedToken[] | null {
if (!mergerProvider || !tokens || tokens.length === 0) {
return null;
}
return mergerProvider.merge(tokens);
}

View File

@@ -1,10 +0,0 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { normalizeTokenizerInput } from './normalize';
test('normalizeTokenizerInput collapses zero-width separators between Japanese segments', () => {
const input = 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが';
const normalized = normalizeTokenizerInput(input);
assert.equal(normalized, 'キリキリと かかってこい このヘナチョコ冒険者どもめが!');
});

View File

@@ -1,13 +0,0 @@
export function normalizeDisplayText(text: string): string {
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
}
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
export function normalizeTokenizerInput(displayText: string): string {
return displayText
.replace(/\n/g, ' ')
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
.replace(/\s+/g, ' ')
.trim();
}

View File

@@ -1,12 +0,0 @@
import { TokenizerProvider } from '../../tokenizers';
import { Token } from '../../types';
export async function tokenizeStage(
tokenizerProvider: TokenizerProvider | null,
input: string,
): Promise<Token[] | null> {
if (!tokenizerProvider || !input) {
return null;
}
return tokenizerProvider.tokenize(input);
}

View File

@@ -1,27 +0,0 @@
import { mergeTokens as defaultMergeTokens } from '../token-merger';
import { MergedToken, Token } from '../types';
export interface TokenMergerProvider {
id: string;
merge: (tokens: Token[]) => MergedToken[];
}
type TokenMergerProviderFactory = () => TokenMergerProvider;
const tokenMergerProviderFactories = new Map<string, TokenMergerProviderFactory>();
export function registerTokenMergerProvider(id: string, factory: TokenMergerProviderFactory): void {
if (tokenMergerProviderFactories.has(id)) {
return;
}
tokenMergerProviderFactories.set(id, factory);
}
function registerDefaultTokenMergerProviders(): void {
registerTokenMergerProvider('default', () => ({
id: 'default',
merge: (tokens: Token[]) => defaultMergeTokens(tokens),
}));
}
registerDefaultTokenMergerProviders();

View File

@@ -1,36 +0,0 @@
import { MecabTokenizer } from '../mecab-tokenizer';
import { MecabStatus, Token } from '../types';
export interface TokenizerProvider {
id: string;
checkAvailability: () => Promise<boolean>;
tokenize: (text: string) => Promise<Token[] | null>;
getStatus: () => MecabStatus;
setEnabled: (enabled: boolean) => void;
}
type TokenizerProviderFactory = () => TokenizerProvider;
const tokenizerProviderFactories = new Map<string, TokenizerProviderFactory>();
export function registerTokenizerProvider(id: string, factory: TokenizerProviderFactory): void {
if (tokenizerProviderFactories.has(id)) {
return;
}
tokenizerProviderFactories.set(id, factory);
}
function registerDefaultTokenizerProviders(): void {
registerTokenizerProvider('mecab', () => {
const mecab = new MecabTokenizer();
return {
id: 'mecab',
checkAvailability: () => mecab.checkAvailability(),
tokenize: (text: string) => mecab.tokenize(text),
getStatus: () => mecab.getStatus(),
setEnabled: (enabled: boolean) => mecab.setEnabled(enabled),
};
});
}
registerDefaultTokenizerProviders();

View File

@@ -1,101 +0,0 @@
import axios from 'axios';
export interface TranslationRequest {
sentence: string;
apiKey: string;
baseUrl: string;
model: string;
targetLanguage: string;
systemPrompt: string;
timeoutMs?: number;
}
export interface TranslationProvider {
id: string;
translate: (request: TranslationRequest) => Promise<string | null>;
}
type TranslationProviderFactory = () => TranslationProvider;
const translationProviderFactories = new Map<string, TranslationProviderFactory>();
export function registerTranslationProvider(id: string, factory: TranslationProviderFactory): void {
if (translationProviderFactories.has(id)) {
return;
}
translationProviderFactories.set(id, factory);
}
export function createTranslationProvider(id = 'openai-compatible'): TranslationProvider | null {
const factory = translationProviderFactories.get(id);
if (!factory) return null;
return factory();
}
function extractAiText(content: unknown): string {
if (typeof content === 'string') {
return content.trim();
}
if (!Array.isArray(content)) {
return '';
}
const parts: string[] = [];
for (const item of content) {
if (
item &&
typeof item === 'object' &&
'type' in item &&
(item as { type?: unknown }).type === 'text' &&
'text' in item &&
typeof (item as { text?: unknown }).text === 'string'
) {
parts.push((item as { text: string }).text);
}
}
return parts.join('').trim();
}
function normalizeOpenAiBaseUrl(baseUrl: string): string {
const trimmed = baseUrl.trim().replace(/\/+$/, '');
if (/\/v1$/i.test(trimmed)) {
return trimmed;
}
return `${trimmed}/v1`;
}
function registerDefaultTranslationProviders(): void {
registerTranslationProvider('openai-compatible', () => ({
id: 'openai-compatible',
translate: async (request: TranslationRequest): Promise<string | null> => {
const response = await axios.post(
`${normalizeOpenAiBaseUrl(request.baseUrl)}/chat/completions`,
{
model: request.model,
temperature: 0,
messages: [
{ role: 'system', content: request.systemPrompt },
{
role: 'user',
content: `Translate this text to ${request.targetLanguage}:\n\n${request.sentence}`,
},
],
},
{
headers: {
Authorization: `Bearer ${request.apiKey}`,
'Content-Type': 'application/json',
},
timeout: request.timeoutMs ?? 15000,
},
);
const content = (response.data as { choices?: unknown[] })?.choices?.[0] as
| { message?: { content?: unknown } }
| undefined;
const translated = extractAiText(content?.message?.content);
return translated || null;
},
}));
}
registerDefaultTranslationProviders();