mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 03:16:46 -07:00
feat(tracking): store secondary subtitle text and source path in occurrence data
- Add secondary_text column to imm_subtitle_lines with migration - Pass currentSecondarySubText through recordSubtitleLine flow - Include secondaryText and sourcePath in word/kanji occurrence queries - Update all type interfaces (backend + frontend)
This commit is contained in:
@@ -54,6 +54,7 @@ const VOCABULARY_STATS = [
|
||||
pos3: null,
|
||||
frequency: 100,
|
||||
frequencyRank: 42,
|
||||
animeCount: 2,
|
||||
firstSeen: Date.now(),
|
||||
lastSeen: Date.now(),
|
||||
},
|
||||
@@ -75,6 +76,8 @@ const OCCURRENCES = [
|
||||
animeTitle: 'Little Witch Academia',
|
||||
videoId: 2,
|
||||
videoTitle: 'Episode 4',
|
||||
sourcePath: '/media/anime/lwa/ep04.mkv',
|
||||
secondaryText: null,
|
||||
sessionId: 3,
|
||||
lineIndex: 7,
|
||||
segmentStartMs: 12_000,
|
||||
|
||||
@@ -612,6 +612,7 @@ export class ImmersionTrackerService {
|
||||
startSec: number,
|
||||
endSec: number,
|
||||
tokens?: MergedToken[] | null,
|
||||
secondaryText?: string | null,
|
||||
): void {
|
||||
if (!this.sessionState || !text.trim()) return;
|
||||
const cleaned = normalizeText(text);
|
||||
@@ -692,6 +693,7 @@ export class ImmersionTrackerService {
|
||||
segmentStartMs: secToMs(startSec),
|
||||
segmentEndMs: secToMs(endSec),
|
||||
text: cleaned,
|
||||
secondaryText: secondaryText ?? null,
|
||||
wordOccurrences: Array.from(wordOccurrences.values()),
|
||||
kanjiOccurrences: Array.from(kanjiCounts.entries()).map(([kanji, occurrenceCount]) => ({
|
||||
kanji,
|
||||
|
||||
@@ -221,11 +221,17 @@ export function getVocabularyStats(
|
||||
? `WHERE (part_of_speech IS NULL OR part_of_speech NOT IN (${placeholders}))`
|
||||
: '';
|
||||
const stmt = db.prepare(`
|
||||
SELECT id AS wordId, headword, word, reading,
|
||||
part_of_speech AS partOfSpeech, pos1, pos2, pos3,
|
||||
frequency, frequency_rank AS frequencyRank,
|
||||
first_seen AS firstSeen, last_seen AS lastSeen
|
||||
FROM imm_words ${whereClause} ORDER BY frequency DESC LIMIT ?
|
||||
SELECT w.id AS wordId, w.headword, w.word, w.reading,
|
||||
w.part_of_speech AS partOfSpeech, w.pos1, w.pos2, w.pos3,
|
||||
w.frequency, w.frequency_rank AS frequencyRank,
|
||||
w.first_seen AS firstSeen, w.last_seen AS lastSeen,
|
||||
COUNT(DISTINCT sl.anime_id) AS animeCount
|
||||
FROM imm_words w
|
||||
LEFT JOIN imm_word_line_occurrences o ON o.word_id = w.id
|
||||
LEFT JOIN imm_subtitle_lines sl ON sl.line_id = o.line_id AND sl.anime_id IS NOT NULL
|
||||
${whereClause ? whereClause.replace('part_of_speech', 'w.part_of_speech') : ''}
|
||||
GROUP BY w.id
|
||||
ORDER BY w.frequency DESC LIMIT ?
|
||||
`);
|
||||
const params = hasExclude ? [...excludePos, limit] : [limit];
|
||||
return stmt.all(...params) as VocabularyStatsRow[];
|
||||
@@ -528,6 +534,8 @@ export function getWordOccurrences(
|
||||
a.canonical_title AS animeTitle,
|
||||
l.video_id AS videoId,
|
||||
v.canonical_title AS videoTitle,
|
||||
v.source_path AS sourcePath,
|
||||
l.secondary_text AS secondaryText,
|
||||
l.session_id AS sessionId,
|
||||
l.line_index AS lineIndex,
|
||||
l.segment_start_ms AS segmentStartMs,
|
||||
@@ -562,6 +570,8 @@ export function getKanjiOccurrences(
|
||||
a.canonical_title AS animeTitle,
|
||||
l.video_id AS videoId,
|
||||
v.canonical_title AS videoTitle,
|
||||
v.source_path AS sourcePath,
|
||||
l.secondary_text AS secondaryText,
|
||||
l.session_id AS sessionId,
|
||||
l.line_index AS lineIndex,
|
||||
l.segment_start_ms AS segmentStartMs,
|
||||
|
||||
@@ -505,6 +505,7 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
text TEXT NOT NULL,
|
||||
secondary_text TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
|
||||
@@ -644,6 +645,7 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
segment_start_ms INTEGER,
|
||||
segment_end_ms INTEGER,
|
||||
text TEXT NOT NULL,
|
||||
secondary_text TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE,
|
||||
@@ -679,6 +681,10 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
addColumnIfMissing(db, 'imm_words', 'frequency_rank', 'INTEGER');
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version && currentVersion.schema_version < 10) {
|
||||
addColumnIfMissing(db, 'imm_subtitle_lines', 'secondary_text', 'TEXT');
|
||||
}
|
||||
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_anime_normalized_title
|
||||
ON imm_anime(normalized_title_key)
|
||||
@@ -820,9 +826,9 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
|
||||
subtitleLineInsertStmt: db.prepare(`
|
||||
INSERT INTO imm_subtitle_lines (
|
||||
session_id, event_id, video_id, anime_id, line_index, segment_start_ms,
|
||||
segment_end_ms, text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
segment_end_ms, text, secondary_text, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
`),
|
||||
wordIdSelectStmt: db.prepare(`
|
||||
@@ -958,6 +964,7 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
|
||||
write.segmentStartMs ?? null,
|
||||
write.segmentEndMs ?? null,
|
||||
write.text,
|
||||
write.secondaryText ?? null,
|
||||
Date.now(),
|
||||
Date.now(),
|
||||
);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export const SCHEMA_VERSION = 9;
|
||||
export const SCHEMA_VERSION = 10;
|
||||
export const DEFAULT_QUEUE_CAP = 1_000;
|
||||
export const DEFAULT_BATCH_SIZE = 25;
|
||||
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
|
||||
@@ -163,6 +163,7 @@ interface QueuedSubtitleLineWrite {
|
||||
segmentStartMs: number | null;
|
||||
segmentEndMs: number | null;
|
||||
text: string;
|
||||
secondaryText?: string | null;
|
||||
wordOccurrences: CountedWordOccurrence[];
|
||||
kanjiOccurrences: CountedKanjiOccurrence[];
|
||||
firstSeen: number;
|
||||
@@ -243,6 +244,7 @@ export interface VocabularyStatsRow {
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
frequencyRank: number | null;
|
||||
animeCount: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
@@ -282,6 +284,8 @@ export interface WordOccurrenceRow {
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sourcePath: string | null;
|
||||
secondaryText: string | null;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
@@ -295,6 +299,8 @@ export interface KanjiOccurrenceRow {
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sourcePath: string | null;
|
||||
secondaryText: string | null;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
|
||||
@@ -4,13 +4,14 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
|
||||
appState: {
|
||||
initialArgs?: { jellyfinPlay?: unknown } | null;
|
||||
overlayRuntimeInitialized: boolean;
|
||||
mpvClient: { connected?: boolean } | null;
|
||||
mpvClient: { connected?: boolean; currentSecondarySubText?: string } | null;
|
||||
immersionTracker: {
|
||||
recordSubtitleLine?: (
|
||||
text: string,
|
||||
start: number,
|
||||
end: number,
|
||||
tokens?: MergedToken[] | null,
|
||||
secondaryText?: string | null,
|
||||
) => void;
|
||||
handleMediaTitleUpdate?: (title: string) => void;
|
||||
recordPlaybackPosition?: (time: number) => void;
|
||||
@@ -70,25 +71,26 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
|
||||
if (!tracker?.recordSubtitleLine) {
|
||||
return;
|
||||
}
|
||||
const secondaryText = deps.appState.mpvClient?.currentSecondarySubText || null;
|
||||
const cachedTokens =
|
||||
deps.appState.currentSubtitleData?.text === text
|
||||
? deps.appState.currentSubtitleData.tokens
|
||||
: null;
|
||||
if (cachedTokens) {
|
||||
tracker.recordSubtitleLine(text, start, end, cachedTokens);
|
||||
tracker.recordSubtitleLine(text, start, end, cachedTokens, secondaryText);
|
||||
return;
|
||||
}
|
||||
if (!deps.tokenizeSubtitleForImmersion) {
|
||||
tracker.recordSubtitleLine(text, start, end, null);
|
||||
tracker.recordSubtitleLine(text, start, end, null, secondaryText);
|
||||
return;
|
||||
}
|
||||
void deps
|
||||
.tokenizeSubtitleForImmersion(text)
|
||||
.then((payload) => {
|
||||
tracker.recordSubtitleLine?.(text, start, end, payload?.tokens ?? null);
|
||||
tracker.recordSubtitleLine?.(text, start, end, payload?.tokens ?? null, secondaryText);
|
||||
})
|
||||
.catch(() => {
|
||||
tracker.recordSubtitleLine?.(text, start, end, null);
|
||||
tracker.recordSubtitleLine?.(text, start, end, null, secondaryText);
|
||||
});
|
||||
},
|
||||
hasSubtitleTimingTracker: () => Boolean(deps.appState.subtitleTimingTracker),
|
||||
|
||||
@@ -59,6 +59,7 @@ export interface VocabularyEntry {
|
||||
pos3: string | null;
|
||||
frequency: number;
|
||||
frequencyRank: number | null;
|
||||
animeCount: number;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
@@ -76,6 +77,8 @@ export interface VocabularyOccurrenceEntry {
|
||||
animeTitle: string | null;
|
||||
videoId: number;
|
||||
videoTitle: string;
|
||||
sourcePath: string | null;
|
||||
secondaryText: string | null;
|
||||
sessionId: number;
|
||||
lineIndex: number;
|
||||
segmentStartMs: number | null;
|
||||
|
||||
Reference in New Issue
Block a user