This commit is contained in:
2026-02-17 22:50:57 -08:00
parent ffeef9c136
commit f20d019c11
315 changed files with 9876 additions and 12537 deletions

View File

@@ -1,12 +1,9 @@
import { TokenMergerProvider } from "../token-mergers";
import { TokenizerProvider } from "../tokenizers";
import { SubtitleData } from "../types";
import {
normalizeDisplayText,
normalizeTokenizerInput,
} from "./stages/normalize";
import { tokenizeStage } from "./stages/tokenize";
import { mergeStage } from "./stages/merge";
import { TokenMergerProvider } from '../token-mergers';
import { TokenizerProvider } from '../tokenizers';
import { SubtitleData } from '../types';
import { normalizeDisplayText, normalizeTokenizerInput } from './stages/normalize';
import { tokenizeStage } from './stages/tokenize';
import { mergeStage } from './stages/merge';
export interface SubtitlePipelineDeps {
getTokenizer: () => TokenizerProvider | null;
@@ -33,10 +30,7 @@ export class SubtitlePipeline {
const tokenizeText = normalizeTokenizerInput(displayText);
try {
const tokens = await tokenizeStage(
this.deps.getTokenizer(),
tokenizeText,
);
const tokens = await tokenizeStage(this.deps.getTokenizer(), tokenizeText);
const mergedTokens = mergeStage(this.deps.getTokenMerger(), tokens);
if (!mergedTokens || mergedTokens.length === 0) {
return { text: displayText, tokens: null };

View File

@@ -1,5 +1,5 @@
import { TokenMergerProvider } from "../../token-mergers";
import { MergedToken, Token } from "../../types";
import { TokenMergerProvider } from '../../token-mergers';
import { MergedToken, Token } from '../../types';
export function mergeStage(
mergerProvider: TokenMergerProvider | null,

View File

@@ -1,11 +1,7 @@
export function normalizeDisplayText(text: string): string {
return text
.replace(/\r\n/g, "\n")
.replace(/\\N/g, "\n")
.replace(/\\n/g, "\n")
.trim();
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
}
export function normalizeTokenizerInput(displayText: string): string {
return displayText.replace(/\n/g, " ").replace(/\s+/g, " ").trim();
return displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
}

View File

@@ -1,5 +1,5 @@
import { TokenizerProvider } from "../../tokenizers";
import { Token } from "../../types";
import { TokenizerProvider } from '../../tokenizers';
import { Token } from '../../types';
export async function tokenizeStage(
tokenizerProvider: TokenizerProvider | null,