mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
feat: integrate n+1 target highlighting
- Merge feature branch changes for n+1 target-only highlight flow - Extend merged token model and token-merger to mark exactly-one unknown targets - Thread n+1 candidate metadata through tokenizer and config systems - Update subtitle renderer/state to route configured colors and new token class - Resolve merge conflicts in core service tests, including subtitle and subsync behavior
This commit is contained in:
@@ -12,17 +12,17 @@ test("sendToVisibleOverlayRuntimeService restores visibility flag when opening h
|
||||
let visibleOverlayVisible = false;
|
||||
|
||||
const ok = sendToVisibleOverlayRuntimeService({
|
||||
mainWindow: {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
isLoading: () => false,
|
||||
send: (...args: unknown[]) => {
|
||||
sent.push(args);
|
||||
},
|
||||
mainWindow: {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
isLoading: () => false,
|
||||
send: (...args: unknown[]) => {
|
||||
sent.push(args);
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow,
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow,
|
||||
visibleOverlayVisible,
|
||||
setVisibleOverlayVisible: (visible) => {
|
||||
setVisibleOverlayVisible: (visible: boolean) => {
|
||||
visibleOverlayVisible = visible;
|
||||
},
|
||||
channel: "runtime-options:open",
|
||||
|
||||
@@ -313,7 +313,7 @@ test("runSubsyncManualService resolves string sid values from mpv stream propert
|
||||
writeExecutableScript(alassPath, "#!/bin/sh\nexit 0\n");
|
||||
writeExecutableScript(
|
||||
ffsubsyncPath,
|
||||
`#!/bin/sh\n: > "${ffsubsyncLogPath}"\nfor arg in "$@"; do\n printf '%s\\n' "$arg" >> "${ffsubsyncLogPath}"\ndone\nprev=""\nfor arg in "$@"; do\n if [ "$prev" = "-o" ]; then\n : > "$arg"\n fi\n prev="$arg"\ndone`,
|
||||
`#!/bin/sh\nmkdir -p "${tmpDir}"\n: > "${ffsubsyncLogPath}"\nfor arg in "$@"; do printf '%s\\n' "$arg" >> "${ffsubsyncLogPath}"; done\nprev=""\nout=""\nfor arg in "$@"; do\n if [ "$prev" = "--reference-stream" ]; then :; fi\n if [ "$prev" = "-o" ]; then out="$arg"; fi\n prev="$arg"\ndone\nif [ -n "$out" ]; then : > "$out"; fi`,
|
||||
);
|
||||
|
||||
const deps = makeDeps({
|
||||
@@ -354,9 +354,11 @@ test("runSubsyncManualService resolves string sid values from mpv stream propert
|
||||
|
||||
assert.equal(result.ok, true);
|
||||
assert.equal(result.message, "Subtitle synchronized with ffsubsync");
|
||||
const ffsubsyncArgs = fs.readFileSync(ffsubsyncLogPath, "utf8").trim().split("\n");
|
||||
const outputIndex = ffsubsyncArgs.findIndex((value) => value === "-o");
|
||||
assert.ok(outputIndex >= 0);
|
||||
const outputPath = ffsubsyncArgs[outputIndex + 1];
|
||||
const ffArgs = fs.readFileSync(ffsubsyncLogPath, "utf8").trim().split("\n");
|
||||
const syncOutputIndex = ffArgs.indexOf("-o");
|
||||
assert.equal(syncOutputIndex >= 0, true);
|
||||
const outputPath = ffArgs[syncOutputIndex + 1];
|
||||
assert.equal(typeof outputPath, "string");
|
||||
assert.ok(outputPath.length > 0);
|
||||
assert.equal(fs.readFileSync(outputPath, "utf8"), "");
|
||||
});
|
||||
|
||||
@@ -69,6 +69,7 @@ test("tokenizeSubtitleService normalizes newlines before mecab fallback", async
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
];
|
||||
},
|
||||
@@ -94,6 +95,7 @@ test("tokenizeSubtitleService falls back to mecab tokens when available", async
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
}),
|
||||
@@ -157,6 +159,7 @@ test("tokenizeSubtitleService uses Yomitan parser result when available", async
|
||||
assert.equal(result.tokens?.[0]?.surface, "猫です");
|
||||
assert.equal(result.tokens?.[0]?.reading, "ねこです");
|
||||
assert.equal(result.tokens?.[0]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, true);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService marks tokens as known using callback", async () => {
|
||||
@@ -185,6 +188,125 @@ test("tokenizeSubtitleService marks tokens as known using callback", async () =>
|
||||
assert.equal(result.tokens?.[0]?.isKnown, true);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService selects one N+1 target token", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫です",
|
||||
makeDeps({
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
surface: "私",
|
||||
reading: "ワタシ",
|
||||
headword: "私",
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
{
|
||||
surface: "犬",
|
||||
reading: "イヌ",
|
||||
headword: "犬",
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
const targets = result.tokens?.filter((token) => token.isNPlusOneTarget) ?? [];
|
||||
assert.equal(targets.length, 1);
|
||||
assert.equal(targets[0]?.surface, "犬");
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService does not mark target when sentence has multiple candidates", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫犬",
|
||||
makeDeps({
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
surface: "猫",
|
||||
reading: "ネコ",
|
||||
headword: "猫",
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
{
|
||||
surface: "犬",
|
||||
reading: "イヌ",
|
||||
headword: "犬",
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
result.tokens?.some((token) => token.isNPlusOneTarget),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService applies N+1 target marking to Yomitan results", async () => {
|
||||
const parserWindow = {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async () => [
|
||||
{
|
||||
source: "scanning-parser",
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: "猫",
|
||||
reading: "ねこ",
|
||||
headwords: [[{ term: "猫" }]],
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
text: "です",
|
||||
reading: "です",
|
||||
headwords: [[{ term: "です" }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as Electron.BrowserWindow;
|
||||
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫です",
|
||||
makeDeps({
|
||||
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||
getYomitanParserWindow: () => parserWindow,
|
||||
tokenizeWithMecab: async () => null,
|
||||
isKnownWord: (text) => text === "です",
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.text, "猫です");
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal(result.tokens?.[0]?.surface, "猫");
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test("tokenizeSubtitleService checks known words by headword, not surface", async () => {
|
||||
const result = await tokenizeSubtitleService(
|
||||
"猫です",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { BrowserWindow, Extension, session } from "electron";
|
||||
import { mergeTokens } from "../../token-merger";
|
||||
import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
|
||||
import {
|
||||
MergedToken,
|
||||
NPlusOneMatchMode,
|
||||
@@ -93,6 +93,25 @@ function resolveKnownWordText(
|
||||
return matchMode === "surface" ? surface : headword;
|
||||
}
|
||||
|
||||
function applyKnownWordMarking(
|
||||
tokens: MergedToken[],
|
||||
isKnownWord: (text: string) => boolean,
|
||||
knownWordMatchMode: NPlusOneMatchMode,
|
||||
): MergedToken[] {
|
||||
return tokens.map((token) => {
|
||||
const matchText = resolveKnownWordText(
|
||||
token.surface,
|
||||
token.headword,
|
||||
knownWordMatchMode,
|
||||
);
|
||||
|
||||
return {
|
||||
...token,
|
||||
isKnown: token.isKnown || (matchText ? isKnownWord(matchText) : false),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
||||
const headwords = segment.headwords;
|
||||
if (!Array.isArray(headwords) || headwords.length === 0) {
|
||||
@@ -187,6 +206,7 @@ function mapYomitanParseResultsToMergedTokens(
|
||||
endPos: end,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: true,
|
||||
isNPlusOneTarget: false,
|
||||
isKnown: (() => {
|
||||
const matchText = resolveKnownWordText(
|
||||
surface,
|
||||
@@ -368,13 +388,23 @@ export async function tokenizeSubtitleService(
|
||||
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
return { text: displayText, tokens: yomitanTokens };
|
||||
const knownMarkedTokens = applyKnownWordMarking(
|
||||
yomitanTokens,
|
||||
deps.isKnownWord,
|
||||
deps.getKnownWordMatchMode(),
|
||||
);
|
||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
||||
}
|
||||
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(tokenizeText);
|
||||
if (mecabTokens && mecabTokens.length > 0) {
|
||||
return { text: displayText, tokens: mecabTokens };
|
||||
const knownMarkedTokens = applyKnownWordMarking(
|
||||
mecabTokens,
|
||||
deps.isKnownWord,
|
||||
deps.getKnownWordMatchMode(),
|
||||
);
|
||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Tokenization error:", (err as Error).message);
|
||||
|
||||
Reference in New Issue
Block a user