mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
306 lines
8.0 KiB
TypeScript
306 lines
8.0 KiB
TypeScript
import { BrowserWindow, Extension, session } from "electron";
|
|
import { MergedToken, PartOfSpeech, SubtitleData } from "../../types";
|
|
|
|
interface YomitanParseHeadword {
|
|
term?: unknown;
|
|
}
|
|
|
|
interface YomitanParseSegment {
|
|
text?: unknown;
|
|
reading?: unknown;
|
|
headwords?: unknown;
|
|
}
|
|
|
|
interface YomitanParseResultItem {
|
|
source?: unknown;
|
|
index?: unknown;
|
|
content?: unknown;
|
|
}
|
|
|
|
export interface TokenizerServiceDeps {
|
|
getYomitanExt: () => Extension | null;
|
|
getYomitanParserWindow: () => BrowserWindow | null;
|
|
setYomitanParserWindow: (window: BrowserWindow | null) => void;
|
|
getYomitanParserReadyPromise: () => Promise<void> | null;
|
|
setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
|
|
getYomitanParserInitPromise: () => Promise<boolean> | null;
|
|
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
|
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
|
|
}
|
|
|
|
function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
|
const headwords = segment.headwords;
|
|
if (!Array.isArray(headwords) || headwords.length === 0) {
|
|
return "";
|
|
}
|
|
|
|
const firstGroup = headwords[0];
|
|
if (!Array.isArray(firstGroup) || firstGroup.length === 0) {
|
|
return "";
|
|
}
|
|
|
|
const firstHeadword = firstGroup[0] as YomitanParseHeadword;
|
|
return typeof firstHeadword?.term === "string" ? firstHeadword.term : "";
|
|
}
|
|
|
|
function mapYomitanParseResultsToMergedTokens(
|
|
parseResults: unknown,
|
|
): MergedToken[] | null {
|
|
if (!Array.isArray(parseResults) || parseResults.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const scanningItems = parseResults.filter((item) => {
|
|
const resultItem = item as YomitanParseResultItem;
|
|
return (
|
|
resultItem &&
|
|
resultItem.source === "scanning-parser" &&
|
|
Array.isArray(resultItem.content)
|
|
);
|
|
}) as YomitanParseResultItem[];
|
|
|
|
if (scanningItems.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const primaryItem =
|
|
scanningItems.find((item) => item.index === 0) || scanningItems[0];
|
|
const content = primaryItem.content;
|
|
if (!Array.isArray(content)) {
|
|
return null;
|
|
}
|
|
|
|
const tokens: MergedToken[] = [];
|
|
let charOffset = 0;
|
|
|
|
for (const line of content) {
|
|
if (!Array.isArray(line)) {
|
|
continue;
|
|
}
|
|
|
|
let surface = "";
|
|
let reading = "";
|
|
let headword = "";
|
|
|
|
for (const rawSegment of line) {
|
|
const segment = rawSegment as YomitanParseSegment;
|
|
if (!segment || typeof segment !== "object") {
|
|
continue;
|
|
}
|
|
|
|
const segmentText = segment.text;
|
|
if (typeof segmentText !== "string" || segmentText.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
surface += segmentText;
|
|
|
|
if (typeof segment.reading === "string") {
|
|
reading += segment.reading;
|
|
}
|
|
|
|
if (!headword) {
|
|
headword = extractYomitanHeadword(segment);
|
|
}
|
|
}
|
|
|
|
if (!surface) {
|
|
continue;
|
|
}
|
|
|
|
const start = charOffset;
|
|
const end = start + surface.length;
|
|
charOffset = end;
|
|
|
|
tokens.push({
|
|
surface,
|
|
reading,
|
|
headword: headword || surface,
|
|
startPos: start,
|
|
endPos: end,
|
|
partOfSpeech: PartOfSpeech.other,
|
|
isMerged: true,
|
|
});
|
|
}
|
|
|
|
return tokens.length > 0 ? tokens : null;
|
|
}
|
|
|
|
async function ensureYomitanParserWindow(
|
|
deps: TokenizerServiceDeps,
|
|
): Promise<boolean> {
|
|
const yomitanExt = deps.getYomitanExt();
|
|
if (!yomitanExt) {
|
|
return false;
|
|
}
|
|
|
|
const currentWindow = deps.getYomitanParserWindow();
|
|
if (currentWindow && !currentWindow.isDestroyed()) {
|
|
return true;
|
|
}
|
|
|
|
const existingInitPromise = deps.getYomitanParserInitPromise();
|
|
if (existingInitPromise) {
|
|
return existingInitPromise;
|
|
}
|
|
|
|
const initPromise = (async () => {
|
|
const parserWindow = new BrowserWindow({
|
|
show: false,
|
|
width: 800,
|
|
height: 600,
|
|
webPreferences: {
|
|
contextIsolation: true,
|
|
nodeIntegration: false,
|
|
session: session.defaultSession,
|
|
},
|
|
});
|
|
deps.setYomitanParserWindow(parserWindow);
|
|
|
|
deps.setYomitanParserReadyPromise(
|
|
new Promise((resolve, reject) => {
|
|
parserWindow.webContents.once("did-finish-load", () => resolve());
|
|
parserWindow.webContents.once(
|
|
"did-fail-load",
|
|
(_event, _errorCode, errorDescription) => {
|
|
reject(new Error(errorDescription));
|
|
},
|
|
);
|
|
}),
|
|
);
|
|
|
|
parserWindow.on("closed", () => {
|
|
if (deps.getYomitanParserWindow() === parserWindow) {
|
|
deps.setYomitanParserWindow(null);
|
|
deps.setYomitanParserReadyPromise(null);
|
|
}
|
|
});
|
|
|
|
try {
|
|
await parserWindow.loadURL(`chrome-extension://${yomitanExt.id}/search.html`);
|
|
const readyPromise = deps.getYomitanParserReadyPromise();
|
|
if (readyPromise) {
|
|
await readyPromise;
|
|
}
|
|
return true;
|
|
} catch (err) {
|
|
console.error(
|
|
"Failed to initialize Yomitan parser window:",
|
|
(err as Error).message,
|
|
);
|
|
if (!parserWindow.isDestroyed()) {
|
|
parserWindow.destroy();
|
|
}
|
|
if (deps.getYomitanParserWindow() === parserWindow) {
|
|
deps.setYomitanParserWindow(null);
|
|
deps.setYomitanParserReadyPromise(null);
|
|
}
|
|
return false;
|
|
} finally {
|
|
deps.setYomitanParserInitPromise(null);
|
|
}
|
|
})();
|
|
|
|
deps.setYomitanParserInitPromise(initPromise);
|
|
return initPromise;
|
|
}
|
|
|
|
async function parseWithYomitanInternalParser(
|
|
text: string,
|
|
deps: TokenizerServiceDeps,
|
|
): Promise<MergedToken[] | null> {
|
|
const yomitanExt = deps.getYomitanExt();
|
|
if (!text || !yomitanExt) {
|
|
return null;
|
|
}
|
|
|
|
const isReady = await ensureYomitanParserWindow(deps);
|
|
const parserWindow = deps.getYomitanParserWindow();
|
|
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
|
return null;
|
|
}
|
|
|
|
const script = `
|
|
(async () => {
|
|
const invoke = (action, params) =>
|
|
new Promise((resolve, reject) => {
|
|
chrome.runtime.sendMessage({ action, params }, (response) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
return;
|
|
}
|
|
if (!response || typeof response !== "object") {
|
|
reject(new Error("Invalid response from Yomitan backend"));
|
|
return;
|
|
}
|
|
if (response.error) {
|
|
reject(new Error(response.error.message || "Yomitan backend error"));
|
|
return;
|
|
}
|
|
resolve(response.result);
|
|
});
|
|
});
|
|
|
|
const optionsFull = await invoke("optionsGetFull", undefined);
|
|
const profileIndex = optionsFull.profileCurrent;
|
|
const scanLength =
|
|
optionsFull.profiles?.[profileIndex]?.options?.scanning?.length ?? 40;
|
|
|
|
return await invoke("parseText", {
|
|
text: ${JSON.stringify(text)},
|
|
optionsContext: { index: profileIndex },
|
|
scanLength,
|
|
useInternalParser: true,
|
|
useMecabParser: false
|
|
});
|
|
})();
|
|
`;
|
|
|
|
try {
|
|
const parseResults = await parserWindow.webContents.executeJavaScript(
|
|
script,
|
|
true,
|
|
);
|
|
return mapYomitanParseResultsToMergedTokens(parseResults);
|
|
} catch (err) {
|
|
console.error("Yomitan parser request failed:", (err as Error).message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export async function tokenizeSubtitleService(
|
|
text: string,
|
|
deps: TokenizerServiceDeps,
|
|
): Promise<SubtitleData> {
|
|
const displayText = text
|
|
.replace(/\r\n/g, "\n")
|
|
.replace(/\\N/g, "\n")
|
|
.replace(/\\n/g, "\n")
|
|
.trim();
|
|
|
|
if (!displayText) {
|
|
return { text, tokens: null };
|
|
}
|
|
|
|
const tokenizeText = displayText
|
|
.replace(/\n/g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
|
|
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
|
if (yomitanTokens && yomitanTokens.length > 0) {
|
|
return { text: displayText, tokens: yomitanTokens };
|
|
}
|
|
|
|
try {
|
|
const mecabTokens = await deps.tokenizeWithMecab(tokenizeText);
|
|
if (mecabTokens && mecabTokens.length > 0) {
|
|
return { text: displayText, tokens: mecabTokens };
|
|
}
|
|
} catch (err) {
|
|
console.error("Tokenization error:", (err as Error).message);
|
|
}
|
|
|
|
return { text: displayText, tokens: null };
|
|
}
|