diff --git a/backlog/tasks/task-87.5 - Dead-architecture-cleanup-delete-unused-registry-and-pipeline-modules-that-are-off-the-live-path.md b/backlog/tasks/task-87.5 - Dead-architecture-cleanup-delete-unused-registry-and-pipeline-modules-that-are-off-the-live-path.md index 5639863..f88c8e2 100644 --- a/backlog/tasks/task-87.5 - Dead-architecture-cleanup-delete-unused-registry-and-pipeline-modules-that-are-off-the-live-path.md +++ b/backlog/tasks/task-87.5 - Dead-architecture-cleanup-delete-unused-registry-and-pipeline-modules-that-are-off-the-live-path.md @@ -3,10 +3,10 @@ id: TASK-87.5 title: >- Dead architecture cleanup: delete unused registry and pipeline modules that are off the live path -status: To Do +status: Done assignee: [] created_date: '2026-03-06 03:20' -updated_date: '2026-03-06 03:21' +updated_date: '2026-03-06 11:05' labels: - tech-debt - dead-code @@ -40,10 +40,10 @@ The review found several modules that appear self-contained but unused from the -- [ ] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path. -- [ ] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior. -- [ ] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green. -- [ ] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design. +- [x] #1 Each candidate module identified in the review is either removed as dead code or justified and reconnected to a real supported execution path. +- [x] #2 Any stale exports, imports, or tests associated with the removed or consolidated modules are cleaned up so the codebase has a single obvious path for the affected behavior. +- [x] #3 The cleanup does not regress live tokenization or subtitle sync behavior and the relevant verification commands remain green. +- [x] #4 Contributor-facing documentation or internal notes no longer imply that removed duplicate architecture is part of the current design. ## Implementation Plan @@ -55,3 +55,10 @@ The review found several modules that appear self-contained but unused from the 3. Pay special attention to subtitle sync and tokenization surfaces, since duplicate architecture exists near active code. 4. Verify the relevant tokenization and subsync commands/tests still pass and update any stale docs or notes. + +## Implementation Notes + +- Traced imports from `src/main.ts`, `src/main/runtime/**`, `src/core/services/subsync-runner.ts`, and `src/core/services/tokenizer.ts`; confirmed the candidate registry/pipeline modules were isolated from the maintained runtime path. +- Deleted dead modules: `src/translators/index.ts`, `src/subsync/engines.ts`, `src/subtitle/pipeline.ts`, `src/subtitle/stages/{merge,normalize,tokenize}.ts`, `src/subtitle/stages/normalize.test.ts`, `src/tokenizers/index.ts`, and `src/token-mergers/index.ts`. +- Moved the useful zero-width separator normalization into the live tokenizer path in `src/core/services/tokenizer.ts` and added regression coverage plus a repository-level dead-architecture guard in `src/dead-architecture-cleanup.test.ts`. +- Verified with `bun test src/core/services/tokenizer.test.ts`, `bun test src/dead-architecture-cleanup.test.ts`, `bun test src/core/services/subsync.test.ts src/subsync/utils.test.ts`, `bun run tsc`, and `bun run test:src`. diff --git a/src/core/services/tokenizer.test.ts b/src/core/services/tokenizer.test.ts index ff52f3d..6e1c911 100644 --- a/src/core/services/tokenizer.test.ts +++ b/src/core/services/tokenizer.test.ts @@ -1235,6 +1235,30 @@ test('tokenizeSubtitle normalizes newlines before Yomitan parse request', async assert.equal(result.tokens, null); }); +test('tokenizeSubtitle collapses zero-width separators before Yomitan parse request', async () => { + let parseInput = ''; + const result = await tokenizeSubtitle( + 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが!', + makeDeps({ + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + parseInput = script; + return null; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.match(parseInput, /キリキリと かかってこい このヘナチョコ冒険者どもめが!/); + assert.equal(result.text, 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが!'); + assert.equal(result.tokens, null); +}); + test('tokenizeSubtitle returns null tokens when Yomitan parsing is unavailable', async () => { const result = await tokenizeSubtitle('猫です', makeDeps()); diff --git a/src/core/services/tokenizer.ts b/src/core/services/tokenizer.ts index ca8174b..6af3def 100644 --- a/src/core/services/tokenizer.ts +++ b/src/core/services/tokenizer.ts @@ -106,6 +106,7 @@ const DEFAULT_ANNOTATION_POS1_EXCLUSIONS = resolveAnnotationPos1ExclusionSet( const DEFAULT_ANNOTATION_POS2_EXCLUSIONS = resolveAnnotationPos2ExclusionSet( DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG, ); +const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g; function getKnownWordLookup( deps: TokenizerServiceDeps, @@ -563,7 +564,11 @@ export async function tokenizeSubtitle( return { text, tokens: null }; } - const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim(); + const tokenizeText = displayText + .replace(INVISIBLE_SEPARATOR_PATTERN, ' ') + .replace(/\n/g, ' ') + .replace(/\s+/g, ' ') + .trim(); const annotationOptions = getAnnotationOptions(deps); const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions); diff --git a/src/dead-architecture-cleanup.test.ts b/src/dead-architecture-cleanup.test.ts new file mode 100644 index 0000000..01bca85 --- /dev/null +++ b/src/dead-architecture-cleanup.test.ts @@ -0,0 +1,70 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import path from 'node:path'; + +const DEAD_MODULE_PATHS = [ + 'src/translators/index.ts', + 'src/subsync/engines.ts', + 'src/subtitle/pipeline.ts', + 'src/subtitle/stages/merge.ts', + 'src/subtitle/stages/normalize.ts', + 'src/subtitle/stages/normalize.test.ts', + 'src/subtitle/stages/tokenize.ts', + 'src/tokenizers/index.ts', + 'src/token-mergers/index.ts', +] as const; + +const FORBIDDEN_IMPORT_PATTERNS = [ + /from ['"]\.\.?\/tokenizers['"]/, + /from ['"]\.\.?\/token-mergers['"]/, + /from ['"]\.\.?\/subtitle\/pipeline['"]/, + /from ['"]\.\.?\/subsync\/engines['"]/, + /from ['"]\.\.?\/translators['"]/, +] as const; + +function readWorkspaceFile(relativePath: string): string { + return fs.readFileSync(path.join(process.cwd(), relativePath), 'utf8'); +} + +function collectSourceFiles(rootDir: string): string[] { + const absoluteRoot = path.join(process.cwd(), rootDir); + const out: string[] = []; + + const visit = (currentDir: string) => { + for (const entry of fs.readdirSync(currentDir, { withFileTypes: true })) { + const fullPath = path.join(currentDir, entry.name); + if (entry.isDirectory()) { + visit(fullPath); + continue; + } + if (!fullPath.endsWith('.ts') && !fullPath.endsWith('.tsx')) { + continue; + } + out.push(path.relative(process.cwd(), fullPath).replaceAll('\\', '/')); + } + }; + + visit(absoluteRoot); + out.sort(); + return out; +} + +test('dead registry and pipeline modules stay removed from the repository', () => { + for (const relativePath of DEAD_MODULE_PATHS) { + assert.equal( + fs.existsSync(path.join(process.cwd(), relativePath)), + false, + `${relativePath} should stay deleted`, + ); + } +}); + +test('live source tree no longer imports dead registry and pipeline modules', () => { + for (const relativePath of collectSourceFiles('src')) { + const source = readWorkspaceFile(relativePath); + for (const pattern of FORBIDDEN_IMPORT_PATTERNS) { + assert.doesNotMatch(source, pattern, `${relativePath} should not import ${pattern.source}`); + } + } +}); diff --git a/src/subsync/engines.ts b/src/subsync/engines.ts deleted file mode 100644 index c440e74..0000000 --- a/src/subsync/engines.ts +++ /dev/null @@ -1,79 +0,0 @@ -export type SubsyncEngine = 'alass' | 'ffsubsync'; - -export interface SubsyncCommandResult { - ok: boolean; - code: number | null; - stderr: string; - stdout: string; - error?: string; -} - -export interface SubsyncEngineExecutionContext { - referenceFilePath: string; - videoPath: string; - inputSubtitlePath: string; - outputPath: string; - audioStreamIndex: number | null; - resolveExecutablePath: (configuredPath: string, commandName: string) => string; - resolvedPaths: { - alassPath: string; - ffsubsyncPath: string; - }; - runCommand: (command: string, args: string[]) => Promise; -} - -export interface SubsyncEngineProvider { - engine: SubsyncEngine; - execute: (context: SubsyncEngineExecutionContext) => Promise; -} - -type SubsyncEngineProviderFactory = () => SubsyncEngineProvider; - -const subsyncEngineProviderFactories = new Map(); - -export function registerSubsyncEngineProvider( - engine: SubsyncEngine, - factory: SubsyncEngineProviderFactory, -): void { - if (subsyncEngineProviderFactories.has(engine)) { - return; - } - subsyncEngineProviderFactories.set(engine, factory); -} - -export function createSubsyncEngineProvider(engine: SubsyncEngine): SubsyncEngineProvider | null { - const factory = subsyncEngineProviderFactories.get(engine); - if (!factory) return null; - return factory(); -} - -function registerDefaultSubsyncEngineProviders(): void { - registerSubsyncEngineProvider('alass', () => ({ - engine: 'alass', - execute: async (context: SubsyncEngineExecutionContext) => { - const alassPath = context.resolveExecutablePath(context.resolvedPaths.alassPath, 'alass'); - return context.runCommand(alassPath, [ - context.referenceFilePath, - context.inputSubtitlePath, - context.outputPath, - ]); - }, - })); - - registerSubsyncEngineProvider('ffsubsync', () => ({ - engine: 'ffsubsync', - execute: async (context: SubsyncEngineExecutionContext) => { - const ffsubsyncPath = context.resolveExecutablePath( - context.resolvedPaths.ffsubsyncPath, - 'ffsubsync', - ); - const args = [context.videoPath, '-i', context.inputSubtitlePath, '-o', context.outputPath]; - if (context.audioStreamIndex !== null) { - args.push('--reference-stream', `0:${context.audioStreamIndex}`); - } - return context.runCommand(ffsubsyncPath, args); - }, - })); -} - -registerDefaultSubsyncEngineProviders(); diff --git a/src/subtitle/pipeline.ts b/src/subtitle/pipeline.ts deleted file mode 100644 index 7809059..0000000 --- a/src/subtitle/pipeline.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { TokenMergerProvider } from '../token-mergers'; -import { TokenizerProvider } from '../tokenizers'; -import { SubtitleData } from '../types'; -import { normalizeDisplayText, normalizeTokenizerInput } from './stages/normalize'; -import { tokenizeStage } from './stages/tokenize'; -import { mergeStage } from './stages/merge'; - -export interface SubtitlePipelineDeps { - getTokenizer: () => TokenizerProvider | null; - getTokenMerger: () => TokenMergerProvider | null; -} - -export class SubtitlePipeline { - private readonly deps: SubtitlePipelineDeps; - - constructor(deps: SubtitlePipelineDeps) { - this.deps = deps; - } - - async process(text: string): Promise { - if (!text) { - return { text, tokens: null }; - } - - const displayText = normalizeDisplayText(text); - if (!displayText) { - return { text, tokens: null }; - } - - const tokenizeText = normalizeTokenizerInput(displayText); - - try { - const tokens = await tokenizeStage(this.deps.getTokenizer(), tokenizeText); - const mergedTokens = mergeStage(this.deps.getTokenMerger(), tokens); - if (!mergedTokens || mergedTokens.length === 0) { - return { text: displayText, tokens: null }; - } - return { text: displayText, tokens: mergedTokens }; - } catch { - return { text: displayText, tokens: null }; - } - } -} diff --git a/src/subtitle/stages/merge.ts b/src/subtitle/stages/merge.ts deleted file mode 100644 index f94491b..0000000 --- a/src/subtitle/stages/merge.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { TokenMergerProvider } from '../../token-mergers'; -import { MergedToken, Token } from '../../types'; - -export function mergeStage( - mergerProvider: TokenMergerProvider | null, - tokens: Token[] | null, -): MergedToken[] | null { - if (!mergerProvider || !tokens || tokens.length === 0) { - return null; - } - return mergerProvider.merge(tokens); -} diff --git a/src/subtitle/stages/normalize.test.ts b/src/subtitle/stages/normalize.test.ts deleted file mode 100644 index 9584b06..0000000 --- a/src/subtitle/stages/normalize.test.ts +++ /dev/null @@ -1,10 +0,0 @@ -import test from 'node:test'; -import assert from 'node:assert/strict'; -import { normalizeTokenizerInput } from './normalize'; - -test('normalizeTokenizerInput collapses zero-width separators between Japanese segments', () => { - const input = 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが!'; - const normalized = normalizeTokenizerInput(input); - - assert.equal(normalized, 'キリキリと かかってこい このヘナチョコ冒険者どもめが!'); -}); diff --git a/src/subtitle/stages/normalize.ts b/src/subtitle/stages/normalize.ts deleted file mode 100644 index 6cbe0f8..0000000 --- a/src/subtitle/stages/normalize.ts +++ /dev/null @@ -1,13 +0,0 @@ -export function normalizeDisplayText(text: string): string { - return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim(); -} - -const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g; - -export function normalizeTokenizerInput(displayText: string): string { - return displayText - .replace(/\n/g, ' ') - .replace(INVISIBLE_SEPARATOR_PATTERN, ' ') - .replace(/\s+/g, ' ') - .trim(); -} diff --git a/src/subtitle/stages/tokenize.ts b/src/subtitle/stages/tokenize.ts deleted file mode 100644 index 0ad8ea7..0000000 --- a/src/subtitle/stages/tokenize.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { TokenizerProvider } from '../../tokenizers'; -import { Token } from '../../types'; - -export async function tokenizeStage( - tokenizerProvider: TokenizerProvider | null, - input: string, -): Promise { - if (!tokenizerProvider || !input) { - return null; - } - return tokenizerProvider.tokenize(input); -} diff --git a/src/token-mergers/index.ts b/src/token-mergers/index.ts deleted file mode 100644 index 0fbc28c..0000000 --- a/src/token-mergers/index.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { mergeTokens as defaultMergeTokens } from '../token-merger'; -import { MergedToken, Token } from '../types'; - -export interface TokenMergerProvider { - id: string; - merge: (tokens: Token[]) => MergedToken[]; -} - -type TokenMergerProviderFactory = () => TokenMergerProvider; - -const tokenMergerProviderFactories = new Map(); - -export function registerTokenMergerProvider(id: string, factory: TokenMergerProviderFactory): void { - if (tokenMergerProviderFactories.has(id)) { - return; - } - tokenMergerProviderFactories.set(id, factory); -} - -function registerDefaultTokenMergerProviders(): void { - registerTokenMergerProvider('default', () => ({ - id: 'default', - merge: (tokens: Token[]) => defaultMergeTokens(tokens), - })); -} - -registerDefaultTokenMergerProviders(); diff --git a/src/tokenizers/index.ts b/src/tokenizers/index.ts deleted file mode 100644 index 2804dbf..0000000 --- a/src/tokenizers/index.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { MecabTokenizer } from '../mecab-tokenizer'; -import { MecabStatus, Token } from '../types'; - -export interface TokenizerProvider { - id: string; - checkAvailability: () => Promise; - tokenize: (text: string) => Promise; - getStatus: () => MecabStatus; - setEnabled: (enabled: boolean) => void; -} - -type TokenizerProviderFactory = () => TokenizerProvider; - -const tokenizerProviderFactories = new Map(); - -export function registerTokenizerProvider(id: string, factory: TokenizerProviderFactory): void { - if (tokenizerProviderFactories.has(id)) { - return; - } - tokenizerProviderFactories.set(id, factory); -} - -function registerDefaultTokenizerProviders(): void { - registerTokenizerProvider('mecab', () => { - const mecab = new MecabTokenizer(); - return { - id: 'mecab', - checkAvailability: () => mecab.checkAvailability(), - tokenize: (text: string) => mecab.tokenize(text), - getStatus: () => mecab.getStatus(), - setEnabled: (enabled: boolean) => mecab.setEnabled(enabled), - }; - }); -} - -registerDefaultTokenizerProviders(); diff --git a/src/translators/index.ts b/src/translators/index.ts deleted file mode 100644 index 092269e..0000000 --- a/src/translators/index.ts +++ /dev/null @@ -1,101 +0,0 @@ -import axios from 'axios'; - -export interface TranslationRequest { - sentence: string; - apiKey: string; - baseUrl: string; - model: string; - targetLanguage: string; - systemPrompt: string; - timeoutMs?: number; -} - -export interface TranslationProvider { - id: string; - translate: (request: TranslationRequest) => Promise; -} - -type TranslationProviderFactory = () => TranslationProvider; - -const translationProviderFactories = new Map(); - -export function registerTranslationProvider(id: string, factory: TranslationProviderFactory): void { - if (translationProviderFactories.has(id)) { - return; - } - translationProviderFactories.set(id, factory); -} - -export function createTranslationProvider(id = 'openai-compatible'): TranslationProvider | null { - const factory = translationProviderFactories.get(id); - if (!factory) return null; - return factory(); -} - -function extractAiText(content: unknown): string { - if (typeof content === 'string') { - return content.trim(); - } - if (!Array.isArray(content)) { - return ''; - } - const parts: string[] = []; - for (const item of content) { - if ( - item && - typeof item === 'object' && - 'type' in item && - (item as { type?: unknown }).type === 'text' && - 'text' in item && - typeof (item as { text?: unknown }).text === 'string' - ) { - parts.push((item as { text: string }).text); - } - } - return parts.join('').trim(); -} - -function normalizeOpenAiBaseUrl(baseUrl: string): string { - const trimmed = baseUrl.trim().replace(/\/+$/, ''); - if (/\/v1$/i.test(trimmed)) { - return trimmed; - } - return `${trimmed}/v1`; -} - -function registerDefaultTranslationProviders(): void { - registerTranslationProvider('openai-compatible', () => ({ - id: 'openai-compatible', - translate: async (request: TranslationRequest): Promise => { - const response = await axios.post( - `${normalizeOpenAiBaseUrl(request.baseUrl)}/chat/completions`, - { - model: request.model, - temperature: 0, - messages: [ - { role: 'system', content: request.systemPrompt }, - { - role: 'user', - content: `Translate this text to ${request.targetLanguage}:\n\n${request.sentence}`, - }, - ], - }, - { - headers: { - Authorization: `Bearer ${request.apiKey}`, - 'Content-Type': 'application/json', - }, - timeout: request.timeoutMs ?? 15000, - }, - ); - - const content = (response.data as { choices?: unknown[] })?.choices?.[0] as - | { message?: { content?: unknown } } - | undefined; - const translated = extractAiText(content?.message?.content); - return translated || null; - }, - })); -} - -registerDefaultTranslationProviders();