mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-03 06:22:41 -08:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
879ffe571d
|
|||
|
8f47e0f7f2
|
|||
|
18a555eb95
|
|||
|
1914c550a5
|
|||
|
2707b2ee96
|
|||
|
5241ff3fcc
|
|||
|
4b14ecbee6
|
|||
|
e2c164c43c
|
|||
|
87fe81ad3e
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "subminer",
|
||||
"version": "0.2.2",
|
||||
"version": "0.2.1",
|
||||
"description": "All-in-one sentence mining overlay with AnkiConnect and dictionary integration",
|
||||
"packageManager": "bun@1.3.5",
|
||||
"main": "dist/main-entry.js",
|
||||
|
||||
@@ -316,33 +316,3 @@ test('FieldGroupingMergeCollaborator deduplicates identical sentence, audio, and
|
||||
assert.equal(merged.Picture, '<img data-group-id="202" src="same.png">');
|
||||
assert.equal(merged.ExpressionAudio, merged.SentenceAudio);
|
||||
});
|
||||
|
||||
test('AnkiIntegration.formatMiscInfoPattern avoids leaking Jellyfin api_key query params', () => {
|
||||
const integration = new AnkiIntegration(
|
||||
{
|
||||
metadata: {
|
||||
pattern: '[SubMiner] %f (%t)',
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
currentSubText: '',
|
||||
currentVideoPath:
|
||||
'stream?static=true&api_key=secret-token&MediaSourceId=a762ab23d26d4347e3cacdb83aaae405&AudioStreamIndex=3',
|
||||
currentTimePos: 426,
|
||||
currentSubStart: 426,
|
||||
currentSubEnd: 428,
|
||||
currentAudioStreamIndex: 3,
|
||||
currentMediaTitle: '[Jellyfin/direct] Bocchi the Rock! - S01E02',
|
||||
send: () => true,
|
||||
} as unknown as never,
|
||||
);
|
||||
|
||||
const privateApi = integration as unknown as {
|
||||
formatMiscInfoPattern: (fallbackFilename: string, startTimeSeconds?: number) => string;
|
||||
};
|
||||
const result = privateApi.formatMiscInfoPattern('audio_123.mp3', 426);
|
||||
|
||||
assert.equal(result, '[SubMiner] [Jellyfin/direct] Bocchi the Rock! - S01E02 (00:07:06)');
|
||||
assert.equal(result.includes('api_key='), false);
|
||||
});
|
||||
|
||||
@@ -58,56 +58,6 @@ interface NoteInfo {
|
||||
|
||||
type CardKind = 'sentence' | 'audio';
|
||||
|
||||
function trimToNonEmptyString(value: unknown): string | null {
|
||||
if (typeof value !== 'string') return null;
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
|
||||
function decodeURIComponentSafe(value: string): string {
|
||||
try {
|
||||
return decodeURIComponent(value);
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
function extractFilenameFromMediaPath(rawPath: string): string {
|
||||
const trimmedPath = rawPath.trim();
|
||||
if (!trimmedPath) return '';
|
||||
|
||||
if (/^[a-zA-Z][a-zA-Z\d+\-.]*:\/\//.test(trimmedPath)) {
|
||||
try {
|
||||
const parsed = new URL(trimmedPath);
|
||||
return decodeURIComponentSafe(path.basename(parsed.pathname));
|
||||
} catch {
|
||||
// Fall through to separator-based handling below.
|
||||
}
|
||||
}
|
||||
|
||||
const separatorIndex = trimmedPath.search(/[?#]/);
|
||||
const pathWithoutQuery =
|
||||
separatorIndex >= 0 ? trimmedPath.slice(0, separatorIndex) : trimmedPath;
|
||||
return decodeURIComponentSafe(path.basename(pathWithoutQuery));
|
||||
}
|
||||
|
||||
function shouldPreferMediaTitleForMiscInfo(rawPath: string, filename: string): boolean {
|
||||
const loweredPath = rawPath.toLowerCase();
|
||||
const loweredFilename = filename.toLowerCase();
|
||||
if (loweredPath.includes('api_key=')) {
|
||||
return true;
|
||||
}
|
||||
if (loweredPath.startsWith('http://') || loweredPath.startsWith('https://')) {
|
||||
return true;
|
||||
}
|
||||
return (
|
||||
loweredFilename === 'stream' ||
|
||||
loweredFilename === 'master.m3u8' ||
|
||||
loweredFilename === 'index.m3u8' ||
|
||||
loweredFilename === 'playlist.m3u8'
|
||||
);
|
||||
}
|
||||
|
||||
export class AnkiIntegration {
|
||||
private client: AnkiConnectClient;
|
||||
private mediaGenerator: MediaGenerator;
|
||||
@@ -779,12 +729,8 @@ export class AnkiIntegration {
|
||||
}
|
||||
|
||||
const currentVideoPath = this.mpvClient.currentVideoPath || '';
|
||||
const videoFilename = extractFilenameFromMediaPath(currentVideoPath);
|
||||
const mediaTitle = trimToNonEmptyString(this.mpvClient.currentMediaTitle);
|
||||
const filenameWithExt =
|
||||
(shouldPreferMediaTitleForMiscInfo(currentVideoPath, videoFilename)
|
||||
? mediaTitle || videoFilename
|
||||
: videoFilename || mediaTitle) || fallbackFilename;
|
||||
const videoFilename = currentVideoPath ? path.basename(currentVideoPath) : '';
|
||||
const filenameWithExt = videoFilename || fallbackFilename;
|
||||
const filenameWithoutExt = filenameWithExt.replace(/\.[^.]+$/, '');
|
||||
|
||||
const currentTimePos =
|
||||
|
||||
@@ -57,26 +57,6 @@ test('MpvIpcClient handles sub-text property change and broadcasts tokenized sub
|
||||
assert.equal(events[0]!.isOverlayVisible, false);
|
||||
});
|
||||
|
||||
test('MpvIpcClient clears cached media title when media path changes', async () => {
|
||||
const client = new MpvIpcClient('/tmp/mpv.sock', makeDeps());
|
||||
|
||||
await invokeHandleMessage(client, {
|
||||
event: 'property-change',
|
||||
name: 'media-title',
|
||||
data: '[Jellyfin/direct] Episode 1',
|
||||
});
|
||||
assert.equal(client.currentMediaTitle, '[Jellyfin/direct] Episode 1');
|
||||
|
||||
await invokeHandleMessage(client, {
|
||||
event: 'property-change',
|
||||
name: 'path',
|
||||
data: '/tmp/new-episode.mkv',
|
||||
});
|
||||
|
||||
assert.equal(client.currentVideoPath, '/tmp/new-episode.mkv');
|
||||
assert.equal(client.currentMediaTitle, null);
|
||||
});
|
||||
|
||||
test('MpvIpcClient parses JSON line protocol in processBuffer', () => {
|
||||
const client = new MpvIpcClient('/tmp/mpv.sock', makeDeps());
|
||||
const seen: Array<Record<string, unknown>> = [];
|
||||
|
||||
@@ -134,7 +134,6 @@ export class MpvIpcClient implements MpvClient {
|
||||
private firstConnection = true;
|
||||
private hasConnectedOnce = false;
|
||||
public currentVideoPath = '';
|
||||
public currentMediaTitle: string | null = null;
|
||||
public currentTimePos = 0;
|
||||
public currentSubStart = 0;
|
||||
public currentSubEnd = 0;
|
||||
@@ -331,7 +330,6 @@ export class MpvIpcClient implements MpvClient {
|
||||
this.emit('media-path-change', payload);
|
||||
},
|
||||
emitMediaTitleChange: (payload) => {
|
||||
this.currentMediaTitle = payload.title;
|
||||
this.emit('media-title-change', payload);
|
||||
},
|
||||
emitSubtitleMetricsChange: (patch) => {
|
||||
@@ -366,7 +364,6 @@ export class MpvIpcClient implements MpvClient {
|
||||
},
|
||||
setCurrentVideoPath: (value: string) => {
|
||||
this.currentVideoPath = value;
|
||||
this.currentMediaTitle = null;
|
||||
},
|
||||
emitSecondarySubtitleVisibility: (payload) => {
|
||||
this.emit('secondary-subtitle-visibility', payload);
|
||||
|
||||
@@ -297,43 +297,6 @@ test('tokenizeSubtitle starts Yomitan frequency lookup and MeCab enrichment in p
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle appends trailing kana to merged Yomitan readings when headword equals surface', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'断じて見ていない',
|
||||
makeDeps({
|
||||
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
|
||||
getYomitanParserWindow: () =>
|
||||
({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async () => [
|
||||
{
|
||||
source: 'scanning-parser',
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{ text: '断', reading: 'だん', headwords: [[{ term: '断じて' }]] },
|
||||
{ text: 'じて', reading: '', headwords: [[{ term: 'じて' }]] },
|
||||
],
|
||||
[
|
||||
{ text: '見', reading: 'み', headwords: [[{ term: '見る' }]] },
|
||||
{ text: 'ていない', reading: '', headwords: [[{ term: 'ていない' }]] },
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}) as unknown as Electron.BrowserWindow,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal(result.tokens?.[0]?.surface, '断じて');
|
||||
assert.equal(result.tokens?.[0]?.reading, 'だんじて');
|
||||
assert.equal(result.tokens?.[1]?.surface, '見ていない');
|
||||
assert.equal(result.tokens?.[1]?.reading, 'み');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle queries headword frequencies with token reading for disambiguation', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'鍛えた',
|
||||
@@ -388,58 +351,6 @@ test('tokenizeSubtitle queries headword frequencies with token reading for disam
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 2847);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle falls back to term-only Yomitan frequency lookup when reading is noisy', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'断じて',
|
||||
makeDeps({
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
|
||||
getYomitanParserWindow: () =>
|
||||
({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => {
|
||||
if (script.includes('getTermFrequencies')) {
|
||||
if (!script.includes('"term":"断じて","reading":null')) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
term: '断じて',
|
||||
reading: null,
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 7082,
|
||||
displayValue: '7082',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
source: 'scanning-parser',
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: '断じて',
|
||||
reading: 'だん',
|
||||
headwords: [[{ term: '断じて' }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
];
|
||||
},
|
||||
},
|
||||
}) as unknown as Electron.BrowserWindow,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 7082);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle avoids headword term-only fallback rank when reading-specific frequency exists', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'無人',
|
||||
|
||||
@@ -249,50 +249,6 @@ function normalizeFrequencyLookupText(rawText: string): string {
|
||||
return rawText.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function isKanaChar(char: string): boolean {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
}
|
||||
|
||||
function getTrailingKanaSuffix(surface: string): string {
|
||||
const chars = Array.from(surface);
|
||||
let splitIndex = chars.length;
|
||||
while (splitIndex > 0 && isKanaChar(chars[splitIndex - 1]!)) {
|
||||
splitIndex -= 1;
|
||||
}
|
||||
if (splitIndex <= 0 || splitIndex >= chars.length) {
|
||||
return '';
|
||||
}
|
||||
return chars.slice(splitIndex).join('');
|
||||
}
|
||||
|
||||
function normalizeYomitanMergedReading(token: MergedToken): string {
|
||||
const reading = token.reading ?? '';
|
||||
if (!reading || token.headword !== token.surface) {
|
||||
return reading;
|
||||
}
|
||||
const trailingKanaSuffix = getTrailingKanaSuffix(token.surface);
|
||||
if (!trailingKanaSuffix || reading.endsWith(trailingKanaSuffix)) {
|
||||
return reading;
|
||||
}
|
||||
return `${reading}${trailingKanaSuffix}`;
|
||||
}
|
||||
|
||||
function normalizeSelectedYomitanTokens(tokens: MergedToken[]): MergedToken[] {
|
||||
return tokens.map((token) => ({
|
||||
...token,
|
||||
reading: normalizeYomitanMergedReading(token),
|
||||
}));
|
||||
}
|
||||
|
||||
function resolveFrequencyLookupText(
|
||||
token: MergedToken,
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
@@ -320,24 +276,17 @@ function buildYomitanFrequencyTermReadingList(
|
||||
tokens: MergedToken[],
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
): Array<{ term: string; reading: string | null }> {
|
||||
const termReadingList: Array<{ term: string; reading: string | null }> = [];
|
||||
for (const token of tokens) {
|
||||
return tokens
|
||||
.map((token) => {
|
||||
const term = resolveFrequencyLookupText(token, matchMode).trim();
|
||||
if (!term) {
|
||||
continue;
|
||||
return null;
|
||||
}
|
||||
|
||||
const readingRaw =
|
||||
token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null;
|
||||
termReadingList.push({ term, reading: readingRaw });
|
||||
|
||||
// Yomitan parse readings can be noisy/truncated on merged tokens; include term-only fallback.
|
||||
if (readingRaw !== null) {
|
||||
termReadingList.push({ term, reading: null });
|
||||
}
|
||||
}
|
||||
|
||||
return termReadingList;
|
||||
return { term, reading: readingRaw };
|
||||
})
|
||||
.filter((pair): pair is { term: string; reading: string | null } => pair !== null);
|
||||
}
|
||||
|
||||
function buildYomitanFrequencyRankMap(
|
||||
@@ -478,17 +427,16 @@ async function parseWithYomitanInternalParser(
|
||||
if (!selectedTokens || selectedTokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const normalizedSelectedTokens = normalizeSelectedYomitanTokens(selectedTokens);
|
||||
|
||||
if (deps.getYomitanGroupDebugEnabled?.() === true) {
|
||||
logSelectedYomitanGroups(text, normalizedSelectedTokens);
|
||||
logSelectedYomitanGroups(text, selectedTokens);
|
||||
}
|
||||
|
||||
const frequencyRankPromise: Promise<Map<string, number>> = options.frequencyEnabled
|
||||
? (async () => {
|
||||
const frequencyMatchMode = options.frequencyMatchMode;
|
||||
const termReadingList = buildYomitanFrequencyTermReadingList(
|
||||
normalizedSelectedTokens,
|
||||
selectedTokens,
|
||||
frequencyMatchMode,
|
||||
);
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||
@@ -501,19 +449,19 @@ async function parseWithYomitanInternalParser(
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||
return await enrichTokensWithMecab(normalizedSelectedTokens, mecabTokens);
|
||||
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logger.warn(
|
||||
'Failed to enrich Yomitan tokens with MeCab POS:',
|
||||
error.message,
|
||||
`tokenCount=${normalizedSelectedTokens.length}`,
|
||||
`tokenCount=${selectedTokens.length}`,
|
||||
`textLength=${text.length}`,
|
||||
);
|
||||
return normalizedSelectedTokens;
|
||||
return selectedTokens;
|
||||
}
|
||||
})()
|
||||
: Promise.resolve(normalizedSelectedTokens);
|
||||
: Promise.resolve(selectedTokens);
|
||||
|
||||
const [yomitanRankByTerm, enrichedTokens] = await Promise.all([
|
||||
frequencyRankPromise,
|
||||
|
||||
@@ -79,7 +79,7 @@ test('computeWordClass preserves known and n+1 classes while adding JLPT classes
|
||||
assert.equal(computeWordClass(nPlusOneJlpt), 'word word-n-plus-one word-jlpt-n2');
|
||||
});
|
||||
|
||||
test('computeWordClass composes known class with frequency class while keeping N+1 exclusive', () => {
|
||||
test('computeWordClass keeps known/N+1 color classes exclusive over frequency classes', () => {
|
||||
const known = createToken({
|
||||
isKnown: true,
|
||||
frequencyRank: 10,
|
||||
@@ -103,7 +103,7 @@ test('computeWordClass composes known class with frequency class while keeping N
|
||||
singleColor: '#000000',
|
||||
bandedColors: ['#000000', '#000000', '#000000', '#000000', '#000000'] as const,
|
||||
}),
|
||||
'word word-known word-frequency-single',
|
||||
'word word-known',
|
||||
);
|
||||
assert.equal(
|
||||
computeWordClass(nPlusOne, {
|
||||
|
||||
@@ -429,7 +429,7 @@ export function computeWordClass(
|
||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||
}
|
||||
|
||||
if (!token.isNPlusOneTarget) {
|
||||
if (!token.isKnown && !token.isNPlusOneTarget) {
|
||||
const frequencyClass = getFrequencyDictionaryClass(token, resolvedFrequencySettings);
|
||||
if (frequencyClass) {
|
||||
classes.push(frequencyClass);
|
||||
|
||||
@@ -124,7 +124,6 @@ export interface NotificationOptions {
|
||||
export interface MpvClient {
|
||||
currentSubText: string;
|
||||
currentVideoPath: string;
|
||||
currentMediaTitle?: string | null;
|
||||
currentTimePos: number;
|
||||
currentSubStart: number;
|
||||
currentSubEnd: number;
|
||||
|
||||
Reference in New Issue
Block a user