refactor: remove dead registry and pipeline modules

This commit is contained in:
2026-03-06 11:04:17 -08:00
parent e659b5d8f4
commit 39976c03f9
13 changed files with 113 additions and 340 deletions

View File

@@ -3,10 +3,10 @@ id: TASK-87.5
title: >- title: >-
Dead architecture cleanup: delete unused registry and pipeline modules that Dead architecture cleanup: delete unused registry and pipeline modules that
are off the live path are off the live path
status: To Do status: Done
assignee: [] assignee: []
created_date: '2026-03-06 03:20' created_date: '2026-03-06 03:20'
updated_date: '2026-03-06 03:21' updated_date: '2026-03-06 11:05'
labels: labels:
- tech-debt - tech-debt
- dead-code - dead-code
@@ -40,10 +40,10 @@ The review found several modules that appear self-contained but unused from the
<!-- AC:BEGIN --> <!-- AC:BEGIN -->
- [ ] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path. - [x] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path.
- [ ] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior. - [x] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior.
- [ ] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green. - [x] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green.
- [ ] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design. - [x] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design.
<!-- AC:END --> <!-- AC:END -->
## Implementation Plan ## Implementation Plan
@@ -55,3 +55,10 @@ The review found several modules that appear self-contained but unused from the
3. Pay special attention to subtitle sync and tokenization surfaces, since duplicate architecture exists near active code. 3. Pay special attention to subtitle sync and tokenization surfaces, since duplicate architecture exists near active code.
4. Verify the relevant tokenization and subsync commands/tests still pass and update any stale docs or notes. 4. Verify the relevant tokenization and subsync commands/tests still pass and update any stale docs or notes.
<!-- SECTION:PLAN:END --> <!-- SECTION:PLAN:END -->
## Implementation Notes
- Traced imports from `src/main.ts`, `src/main/runtime/**`, `src/core/services/subsync-runner.ts`, and `src/core/services/tokenizer.ts`; confirmed the candidate registry/pipeline modules were isolated from the maintained runtime path.
- Deleted dead modules: `src/translators/index.ts`, `src/subsync/engines.ts`, `src/subtitle/pipeline.ts`, `src/subtitle/stages/{merge,normalize,tokenize}.ts`, `src/subtitle/stages/normalize.test.ts`, `src/tokenizers/index.ts`, and `src/token-mergers/index.ts`.
- Moved the useful zero-width separator normalization into the live tokenizer path in `src/core/services/tokenizer.ts` and added regression coverage plus a repository-level dead-architecture guard in `src/dead-architecture-cleanup.test.ts`.
- Verified with `bun test src/core/services/tokenizer.test.ts`, `bun test src/dead-architecture-cleanup.test.ts`, `bun test src/core/services/subsync.test.ts src/subsync/utils.test.ts`, `bun run tsc`, and `bun run test:src`.

View File

@@ -1235,6 +1235,30 @@ test('tokenizeSubtitle normalizes newlines before Yomitan parse request', async
assert.equal(result.tokens, null); assert.equal(result.tokens, null);
}); });
test('tokenizeSubtitle collapses zero-width separators before Yomitan parse request', async () => {
let parseInput = '';
const result = await tokenizeSubtitle(
'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが',
makeDeps({
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
getYomitanParserWindow: () =>
({
isDestroyed: () => false,
webContents: {
executeJavaScript: async (script: string) => {
parseInput = script;
return null;
},
},
}) as unknown as Electron.BrowserWindow,
}),
);
assert.match(parseInput, /キリキリと かかってこい このヘナチョコ冒険者どもめが!/);
assert.equal(result.text, 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが');
assert.equal(result.tokens, null);
});
test('tokenizeSubtitle returns null tokens when Yomitan parsing is unavailable', async () => { test('tokenizeSubtitle returns null tokens when Yomitan parsing is unavailable', async () => {
const result = await tokenizeSubtitle('猫です', makeDeps()); const result = await tokenizeSubtitle('猫です', makeDeps());

View File

@@ -106,6 +106,7 @@ const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet(
const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet( const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet(
DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG, DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG,
); );
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
function getKnownWordLookup( function getKnownWordLookup(
deps: TokenizerServiceDeps, deps: TokenizerServiceDeps,
@@ -563,7 +564,11 @@ export async function tokenizeSubtitle(
return { text, tokens: null }; return { text, tokens: null };
} }
const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim(); const tokenizeText = displayText
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
.replace(/\n/g, ' ')
.replace(/\s+/g, ' ')
.trim();
const annotationOptions = getAnnotationOptions(deps); const annotationOptions = getAnnotationOptions(deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions); const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);

View File

@@ -0,0 +1,70 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
const DEAD_MODULE_PATHS = [
'src/translators/index.ts',
'src/subsync/engines.ts',
'src/subtitle/pipeline.ts',
'src/subtitle/stages/merge.ts',
'src/subtitle/stages/normalize.ts',
'src/subtitle/stages/normalize.test.ts',
'src/subtitle/stages/tokenize.ts',
'src/tokenizers/index.ts',
'src/token-mergers/index.ts',
] as const;
const FORBIDDEN_IMPORT_PATTERNS = [
/from ['"]\.\.?\/tokenizers['"]/,
/from ['"]\.\.?\/token-mergers['"]/,
/from ['"]\.\.?\/subtitle\/pipeline['"]/,
/from ['"]\.\.?\/subsync\/engines['"]/,
/from ['"]\.\.?\/translators['"]/,
] as const;
function readWorkspaceFile(relativePath: string): string {
return fs.readFileSync(path.join(process.cwd(), relativePath), 'utf8');
}
function collectSourceFiles(rootDir: string): string[] {
const absoluteRoot = path.join(process.cwd(), rootDir);
const out: string[] = [];
const visit = (currentDir: string) => {
for (const entry of fs.readdirSync(currentDir, { withFileTypes: true })) {
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
visit(fullPath);
continue;
}
if (!fullPath.endsWith('.ts') && !fullPath.endsWith('.tsx')) {
continue;
}
out.push(path.relative(process.cwd(), fullPath).replaceAll('\\', '/'));
}
};
visit(absoluteRoot);
out.sort();
return out;
}
test('dead registry and pipeline modules stay removed from the repository', () => {
for (const relativePath of DEAD_MODULE_PATHS) {
assert.equal(
fs.existsSync(path.join(process.cwd(), relativePath)),
false,
`${relativePath} should stay deleted`,
);
}
});
test('live source tree no longer imports dead registry and pipeline modules', () => {
for (const relativePath of collectSourceFiles('src')) {
const source = readWorkspaceFile(relativePath);
for (const pattern of FORBIDDEN_IMPORT_PATTERNS) {
assert.doesNotMatch(source, pattern, `${relativePath} should not import ${pattern.source}`);
}
}
});

View File

@@ -1,79 +0,0 @@
export type SubsyncEngine = 'alass' | 'ffsubsync';
export interface SubsyncCommandResult {
ok: boolean;
code: number | null;
stderr: string;
stdout: string;
error?: string;
}
export interface SubsyncEngineExecutionContext {
referenceFilePath: string;
videoPath: string;
inputSubtitlePath: string;
outputPath: string;
audioStreamIndex: number | null;
resolveExecutablePath: (configuredPath: string, commandName: string) => string;
resolvedPaths: {
alassPath: string;
ffsubsyncPath: string;
};
runCommand: (command: string, args: string[]) => Promise<SubsyncCommandResult>;
}
export interface SubsyncEngineProvider {
engine: SubsyncEngine;
execute: (context: SubsyncEngineExecutionContext) => Promise<SubsyncCommandResult>;
}
type SubsyncEngineProviderFactory = () => SubsyncEngineProvider;
const subsyncEngineProviderFactories = new Map<SubsyncEngine, SubsyncEngineProviderFactory>();
export function registerSubsyncEngineProvider(
engine: SubsyncEngine,
factory: SubsyncEngineProviderFactory,
): void {
if (subsyncEngineProviderFactories.has(engine)) {
return;
}
subsyncEngineProviderFactories.set(engine, factory);
}
export function createSubsyncEngineProvider(engine: SubsyncEngine): SubsyncEngineProvider | null {
const factory = subsyncEngineProviderFactories.get(engine);
if (!factory) return null;
return factory();
}
function registerDefaultSubsyncEngineProviders(): void {
registerSubsyncEngineProvider('alass', () => ({
engine: 'alass',
execute: async (context: SubsyncEngineExecutionContext) => {
const alassPath = context.resolveExecutablePath(context.resolvedPaths.alassPath, 'alass');
return context.runCommand(alassPath, [
context.referenceFilePath,
context.inputSubtitlePath,
context.outputPath,
]);
},
}));
registerSubsyncEngineProvider('ffsubsync', () => ({
engine: 'ffsubsync',
execute: async (context: SubsyncEngineExecutionContext) => {
const ffsubsyncPath = context.resolveExecutablePath(
context.resolvedPaths.ffsubsyncPath,
'ffsubsync',
);
const args = [context.videoPath, '-i', context.inputSubtitlePath, '-o', context.outputPath];
if (context.audioStreamIndex !== null) {
args.push('--reference-stream', `0:${context.audioStreamIndex}`);
}
return context.runCommand(ffsubsyncPath, args);
},
}));
}
registerDefaultSubsyncEngineProviders();

View File

@@ -1,43 +0,0 @@
import { TokenMergerProvider } from '../token-mergers';
import { TokenizerProvider } from '../tokenizers';
import { SubtitleData } from '../types';
import { normalizeDisplayText, normalizeTokenizerInput } from './stages/normalize';
import { tokenizeStage } from './stages/tokenize';
import { mergeStage } from './stages/merge';
export interface SubtitlePipelineDeps {
getTokenizer: () => TokenizerProvider | null;
getTokenMerger: () => TokenMergerProvider | null;
}
export class SubtitlePipeline {
private readonly deps: SubtitlePipelineDeps;
constructor(deps: SubtitlePipelineDeps) {
this.deps = deps;
}
async process(text: string): Promise<SubtitleData> {
if (!text) {
return { text, tokens: null };
}
const displayText = normalizeDisplayText(text);
if (!displayText) {
return { text, tokens: null };
}
const tokenizeText = normalizeTokenizerInput(displayText);
try {
const tokens = await tokenizeStage(this.deps.getTokenizer(), tokenizeText);
const mergedTokens = mergeStage(this.deps.getTokenMerger(), tokens);
if (!mergedTokens || mergedTokens.length === 0) {
return { text: displayText, tokens: null };
}
return { text: displayText, tokens: mergedTokens };
} catch {
return { text: displayText, tokens: null };
}
}
}

View File

@@ -1,12 +0,0 @@
import { TokenMergerProvider } from '../../token-mergers';
import { MergedToken, Token } from '../../types';
export function mergeStage(
mergerProvider: TokenMergerProvider | null,
tokens: Token[] | null,
): MergedToken[] | null {
if (!mergerProvider || !tokens || tokens.length === 0) {
return null;
}
return mergerProvider.merge(tokens);
}

View File

@@ -1,10 +0,0 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { normalizeTokenizerInput } from './normalize';
test('normalizeTokenizerInput collapses zero-width separators between Japanese segments', () => {
const input = 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが';
const normalized = normalizeTokenizerInput(input);
assert.equal(normalized, 'キリキリと かかってこい このヘナチョコ冒険者どもめが!');
});

View File

@@ -1,13 +0,0 @@
export function normalizeDisplayText(text: string): string {
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
}
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
export function normalizeTokenizerInput(displayText: string): string {
return displayText
.replace(/\n/g, ' ')
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
.replace(/\s+/g, ' ')
.trim();
}

View File

@@ -1,12 +0,0 @@
import { TokenizerProvider } from '../../tokenizers';
import { Token } from '../../types';
export async function tokenizeStage(
tokenizerProvider: TokenizerProvider | null,
input: string,
): Promise<Token[] | null> {
if (!tokenizerProvider || !input) {
return null;
}
return tokenizerProvider.tokenize(input);
}

View File

@@ -1,27 +0,0 @@
import { mergeTokens as defaultMergeTokens } from '../token-merger';
import { MergedToken, Token } from '../types';
export interface TokenMergerProvider {
id: string;
merge: (tokens: Token[]) => MergedToken[];
}
type TokenMergerProviderFactory = () => TokenMergerProvider;
const tokenMergerProviderFactories = new Map<string, TokenMergerProviderFactory>();
export function registerTokenMergerProvider(id: string, factory: TokenMergerProviderFactory): void {
if (tokenMergerProviderFactories.has(id)) {
return;
}
tokenMergerProviderFactories.set(id, factory);
}
function registerDefaultTokenMergerProviders(): void {
registerTokenMergerProvider('default', () => ({
id: 'default',
merge: (tokens: Token[]) => defaultMergeTokens(tokens),
}));
}
registerDefaultTokenMergerProviders();

View File

@@ -1,36 +0,0 @@
import { MecabTokenizer } from '../mecab-tokenizer';
import { MecabStatus, Token } from '../types';
export interface TokenizerProvider {
id: string;
checkAvailability: () => Promise<boolean>;
tokenize: (text: string) => Promise<Token[] | null>;
getStatus: () => MecabStatus;
setEnabled: (enabled: boolean) => void;
}
type TokenizerProviderFactory = () => TokenizerProvider;
const tokenizerProviderFactories = new Map<string, TokenizerProviderFactory>();
export function registerTokenizerProvider(id: string, factory: TokenizerProviderFactory): void {
if (tokenizerProviderFactories.has(id)) {
return;
}
tokenizerProviderFactories.set(id, factory);
}
function registerDefaultTokenizerProviders(): void {
registerTokenizerProvider('mecab', () => {
const mecab = new MecabTokenizer();
return {
id: 'mecab',
checkAvailability: () => mecab.checkAvailability(),
tokenize: (text: string) => mecab.tokenize(text),
getStatus: () => mecab.getStatus(),
setEnabled: (enabled: boolean) => mecab.setEnabled(enabled),
};
});
}
registerDefaultTokenizerProviders();

View File

@@ -1,101 +0,0 @@
import axios from 'axios';
export interface TranslationRequest {
sentence: string;
apiKey: string;
baseUrl: string;
model: string;
targetLanguage: string;
systemPrompt: string;
timeoutMs?: number;
}
export interface TranslationProvider {
id: string;
translate: (request: TranslationRequest) => Promise<string | null>;
}
type TranslationProviderFactory = () => TranslationProvider;
const translationProviderFactories = new Map<string, TranslationProviderFactory>();
export function registerTranslationProvider(id: string, factory: TranslationProviderFactory): void {
if (translationProviderFactories.has(id)) {
return;
}
translationProviderFactories.set(id, factory);
}
export function createTranslationProvider(id = 'openai-compatible'): TranslationProvider | null {
const factory = translationProviderFactories.get(id);
if (!factory) return null;
return factory();
}
function extractAiText(content: unknown): string {
if (typeof content === 'string') {
return content.trim();
}
if (!Array.isArray(content)) {
return '';
}
const parts: string[] = [];
for (const item of content) {
if (
item &&
typeof item === 'object' &&
'type' in item &&
(item as { type?: unknown }).type === 'text' &&
'text' in item &&
typeof (item as { text?: unknown }).text === 'string'
) {
parts.push((item as { text: string }).text);
}
}
return parts.join('').trim();
}
function normalizeOpenAiBaseUrl(baseUrl: string): string {
const trimmed = baseUrl.trim().replace(/\/+$/, '');
if (/\/v1$/i.test(trimmed)) {
return trimmed;
}
return `${trimmed}/v1`;
}
function registerDefaultTranslationProviders(): void {
registerTranslationProvider('openai-compatible', () => ({
id: 'openai-compatible',
translate: async (request: TranslationRequest): Promise<string | null> => {
const response = await axios.post(
`${normalizeOpenAiBaseUrl(request.baseUrl)}/chat/completions`,
{
model: request.model,
temperature: 0,
messages: [
{ role: 'system', content: request.systemPrompt },
{
role: 'user',
content: `Translate this text to ${request.targetLanguage}:\n\n${request.sentence}`,
},
],
},
{
headers: {
Authorization: `Bearer ${request.apiKey}`,
'Content-Type': 'application/json',
},
timeout: request.timeoutMs ?? 15000,
},
);
const content = (response.data as { choices?: unknown[] })?.choices?.[0] as
| { message?: { content?: unknown } }
| undefined;
const translated = extractAiText(content?.message?.content);
return translated || null;
},
}));
}
registerDefaultTranslationProviders();