mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 18:22:42 -08:00
pretty
This commit is contained in:
@@ -1,12 +1,9 @@
|
||||
import { TokenMergerProvider } from "../token-mergers";
|
||||
import { TokenizerProvider } from "../tokenizers";
|
||||
import { SubtitleData } from "../types";
|
||||
import {
|
||||
normalizeDisplayText,
|
||||
normalizeTokenizerInput,
|
||||
} from "./stages/normalize";
|
||||
import { tokenizeStage } from "./stages/tokenize";
|
||||
import { mergeStage } from "./stages/merge";
|
||||
import { TokenMergerProvider } from '../token-mergers';
|
||||
import { TokenizerProvider } from '../tokenizers';
|
||||
import { SubtitleData } from '../types';
|
||||
import { normalizeDisplayText, normalizeTokenizerInput } from './stages/normalize';
|
||||
import { tokenizeStage } from './stages/tokenize';
|
||||
import { mergeStage } from './stages/merge';
|
||||
|
||||
export interface SubtitlePipelineDeps {
|
||||
getTokenizer: () => TokenizerProvider | null;
|
||||
@@ -33,10 +30,7 @@ export class SubtitlePipeline {
|
||||
const tokenizeText = normalizeTokenizerInput(displayText);
|
||||
|
||||
try {
|
||||
const tokens = await tokenizeStage(
|
||||
this.deps.getTokenizer(),
|
||||
tokenizeText,
|
||||
);
|
||||
const tokens = await tokenizeStage(this.deps.getTokenizer(), tokenizeText);
|
||||
const mergedTokens = mergeStage(this.deps.getTokenMerger(), tokens);
|
||||
if (!mergedTokens || mergedTokens.length === 0) {
|
||||
return { text: displayText, tokens: null };
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { TokenMergerProvider } from "../../token-mergers";
|
||||
import { MergedToken, Token } from "../../types";
|
||||
import { TokenMergerProvider } from '../../token-mergers';
|
||||
import { MergedToken, Token } from '../../types';
|
||||
|
||||
export function mergeStage(
|
||||
mergerProvider: TokenMergerProvider | null,
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
export function normalizeDisplayText(text: string): string {
|
||||
return text
|
||||
.replace(/\r\n/g, "\n")
|
||||
.replace(/\\N/g, "\n")
|
||||
.replace(/\\n/g, "\n")
|
||||
.trim();
|
||||
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
||||
}
|
||||
|
||||
export function normalizeTokenizerInput(displayText: string): string {
|
||||
return displayText.replace(/\n/g, " ").replace(/\s+/g, " ").trim();
|
||||
return displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { TokenizerProvider } from "../../tokenizers";
|
||||
import { Token } from "../../types";
|
||||
import { TokenizerProvider } from '../../tokenizers';
|
||||
import { Token } from '../../types';
|
||||
|
||||
export async function tokenizeStage(
|
||||
tokenizerProvider: TokenizerProvider | null,
|
||||
|
||||
Reference in New Issue
Block a user