mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-25 00:11:26 -07:00
fix(immersion): special-case youtube media paths in runtime and tracking
This commit is contained in:
@@ -20,6 +20,7 @@ import {
|
||||
getOrCreateAnimeRecord,
|
||||
getOrCreateVideoRecord,
|
||||
linkVideoToAnimeRecord,
|
||||
linkYoutubeVideoToAnimeRecord,
|
||||
type TrackerPreparedStatements,
|
||||
updateVideoMetadataRecord,
|
||||
updateVideoTitleRecord,
|
||||
@@ -161,6 +162,7 @@ const YOUTUBE_COVER_RETRY_MS = 5 * 60 * 1000;
|
||||
const YOUTUBE_SCREENSHOT_MAX_SECONDS = 120;
|
||||
const YOUTUBE_OEMBED_ENDPOINT = 'https://www.youtube.com/oembed';
|
||||
const YOUTUBE_ID_PATTERN = /^[A-Za-z0-9_-]{6,}$/;
|
||||
const YOUTUBE_METADATA_REFRESH_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
function isValidYouTubeVideoId(value: string | null): boolean {
|
||||
return Boolean(value && YOUTUBE_ID_PATTERN.test(value));
|
||||
@@ -535,11 +537,15 @@ export class ImmersionTrackerService {
|
||||
}
|
||||
|
||||
async getMediaLibrary(): Promise<MediaLibraryRow[]> {
|
||||
return getMediaLibrary(this.db);
|
||||
const rows = getMediaLibrary(this.db);
|
||||
this.backfillYoutubeMetadataForLibrary();
|
||||
return rows;
|
||||
}
|
||||
|
||||
async getMediaDetail(videoId: number): Promise<MediaDetailRow | null> {
|
||||
return getMediaDetail(this.db, videoId);
|
||||
const detail = getMediaDetail(this.db, videoId);
|
||||
this.backfillYoutubeMetadataForVideo(videoId);
|
||||
return detail;
|
||||
}
|
||||
|
||||
async getMediaSessions(videoId: number, limit = 100): Promise<SessionSummaryQueryRow[]> {
|
||||
@@ -555,10 +561,12 @@ export class ImmersionTrackerService {
|
||||
}
|
||||
|
||||
async getAnimeLibrary(): Promise<AnimeLibraryRow[]> {
|
||||
this.relinkYoutubeAnimeLibrary();
|
||||
return getAnimeLibrary(this.db);
|
||||
}
|
||||
|
||||
async getAnimeDetail(animeId: number): Promise<AnimeDetailRow | null> {
|
||||
this.relinkYoutubeAnimeLibrary();
|
||||
return getAnimeDetail(this.db, animeId);
|
||||
}
|
||||
|
||||
@@ -909,6 +917,7 @@ export class ImmersionTrackerService {
|
||||
return;
|
||||
}
|
||||
upsertYoutubeVideoMetadata(this.db, videoId, metadata);
|
||||
linkYoutubeVideoToAnimeRecord(this.db, videoId, metadata);
|
||||
if (metadata.videoTitle?.trim()) {
|
||||
updateVideoTitleRecord(this.db, videoId, metadata.videoTitle.trim());
|
||||
}
|
||||
@@ -927,6 +936,174 @@ export class ImmersionTrackerService {
|
||||
});
|
||||
}
|
||||
|
||||
private backfillYoutubeMetadataForLibrary(): void {
|
||||
const candidate = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
v.video_id AS videoId,
|
||||
v.source_url AS sourceUrl
|
||||
FROM imm_videos v
|
||||
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
|
||||
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
|
||||
WHERE
|
||||
v.source_type = ?
|
||||
AND v.source_url IS NOT NULL
|
||||
AND (
|
||||
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
|
||||
)
|
||||
AND (
|
||||
yv.video_id IS NULL
|
||||
OR yv.video_title IS NULL
|
||||
OR yv.channel_name IS NULL
|
||||
OR yv.channel_thumbnail_url IS NULL
|
||||
)
|
||||
AND (
|
||||
yv.fetched_at_ms IS NULL
|
||||
OR yv.fetched_at_ms <= ?
|
||||
)
|
||||
ORDER BY lm.last_watched_ms DESC, v.video_id DESC
|
||||
LIMIT 1
|
||||
`,
|
||||
)
|
||||
.get(
|
||||
SOURCE_TYPE_REMOTE,
|
||||
Date.now() - YOUTUBE_METADATA_REFRESH_MS,
|
||||
) as { videoId: number; sourceUrl: string | null } | null;
|
||||
if (!candidate?.sourceUrl) {
|
||||
return;
|
||||
}
|
||||
this.captureYoutubeMetadataAsync(candidate.videoId, candidate.sourceUrl);
|
||||
}
|
||||
|
||||
private backfillYoutubeMetadataForVideo(videoId: number): void {
|
||||
const candidate = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
v.source_url AS sourceUrl
|
||||
FROM imm_videos v
|
||||
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
|
||||
WHERE
|
||||
v.video_id = ?
|
||||
AND v.source_type = ?
|
||||
AND v.source_url IS NOT NULL
|
||||
AND (
|
||||
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
|
||||
)
|
||||
AND (
|
||||
yv.video_id IS NULL
|
||||
OR yv.video_title IS NULL
|
||||
OR yv.channel_name IS NULL
|
||||
OR yv.channel_thumbnail_url IS NULL
|
||||
)
|
||||
AND (
|
||||
yv.fetched_at_ms IS NULL
|
||||
OR yv.fetched_at_ms <= ?
|
||||
)
|
||||
`,
|
||||
)
|
||||
.get(
|
||||
videoId,
|
||||
SOURCE_TYPE_REMOTE,
|
||||
Date.now() - YOUTUBE_METADATA_REFRESH_MS,
|
||||
) as { sourceUrl: string | null } | null;
|
||||
if (!candidate?.sourceUrl) {
|
||||
return;
|
||||
}
|
||||
this.captureYoutubeMetadataAsync(videoId, candidate.sourceUrl);
|
||||
}
|
||||
|
||||
private relinkYoutubeAnimeLibrary(): void {
|
||||
const candidates = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
v.video_id AS videoId,
|
||||
yv.youtube_video_id AS youtubeVideoId,
|
||||
yv.video_url AS videoUrl,
|
||||
yv.video_title AS videoTitle,
|
||||
yv.video_thumbnail_url AS videoThumbnailUrl,
|
||||
yv.channel_id AS channelId,
|
||||
yv.channel_name AS channelName,
|
||||
yv.channel_url AS channelUrl,
|
||||
yv.channel_thumbnail_url AS channelThumbnailUrl,
|
||||
yv.uploader_id AS uploaderId,
|
||||
yv.uploader_url AS uploaderUrl,
|
||||
yv.description AS description,
|
||||
yv.metadata_json AS metadataJson
|
||||
FROM imm_videos v
|
||||
JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
|
||||
LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
|
||||
LEFT JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
|
||||
WHERE
|
||||
v.source_type = ?
|
||||
AND v.source_url IS NOT NULL
|
||||
AND (
|
||||
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
|
||||
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
|
||||
)
|
||||
AND yv.channel_name IS NOT NULL
|
||||
AND (
|
||||
v.anime_id IS NULL
|
||||
OR a.metadata_json IS NULL
|
||||
OR a.metadata_json NOT LIKE '%"source":"youtube-channel"%'
|
||||
OR a.canonical_title IS NULL
|
||||
OR TRIM(a.canonical_title) != TRIM(yv.channel_name)
|
||||
)
|
||||
ORDER BY lm.last_watched_ms DESC, v.video_id DESC
|
||||
`,
|
||||
)
|
||||
.all(SOURCE_TYPE_REMOTE) as Array<{
|
||||
videoId: number;
|
||||
youtubeVideoId: string | null;
|
||||
videoUrl: string | null;
|
||||
videoTitle: string | null;
|
||||
videoThumbnailUrl: string | null;
|
||||
channelId: string | null;
|
||||
channelName: string | null;
|
||||
channelUrl: string | null;
|
||||
channelThumbnailUrl: string | null;
|
||||
uploaderId: string | null;
|
||||
uploaderUrl: string | null;
|
||||
description: string | null;
|
||||
metadataJson: string | null;
|
||||
}>;
|
||||
|
||||
if (candidates.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (!candidate.youtubeVideoId || !candidate.videoUrl) {
|
||||
continue;
|
||||
}
|
||||
linkYoutubeVideoToAnimeRecord(this.db, candidate.videoId, {
|
||||
youtubeVideoId: candidate.youtubeVideoId,
|
||||
videoUrl: candidate.videoUrl,
|
||||
videoTitle: candidate.videoTitle,
|
||||
videoThumbnailUrl: candidate.videoThumbnailUrl,
|
||||
channelId: candidate.channelId,
|
||||
channelName: candidate.channelName,
|
||||
channelUrl: candidate.channelUrl,
|
||||
channelThumbnailUrl: candidate.channelThumbnailUrl,
|
||||
uploaderId: candidate.uploaderId,
|
||||
uploaderUrl: candidate.uploaderUrl,
|
||||
description: candidate.description,
|
||||
metadataJson: candidate.metadataJson,
|
||||
});
|
||||
}
|
||||
rebuildLifetimeSummaryTables(this.db);
|
||||
}
|
||||
|
||||
handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
|
||||
const normalizedPath = normalizeMediaPath(mediaPath);
|
||||
const normalizedTitle = normalizeText(mediaTitle);
|
||||
@@ -971,14 +1148,14 @@ export class ImmersionTrackerService {
|
||||
`Starting immersion session for path=${normalizedPath} videoId=${sessionInfo.videoId}`,
|
||||
);
|
||||
this.startSession(sessionInfo.videoId, sessionInfo.startedAtMs);
|
||||
if (sourceType === SOURCE_TYPE_REMOTE) {
|
||||
const youtubeVideoId = extractYouTubeVideoId(normalizedPath);
|
||||
if (youtubeVideoId) {
|
||||
void this.ensureYouTubeCoverArt(sessionInfo.videoId, normalizedPath, youtubeVideoId);
|
||||
this.captureYoutubeMetadataAsync(sessionInfo.videoId, normalizedPath);
|
||||
}
|
||||
const youtubeVideoId =
|
||||
sourceType === SOURCE_TYPE_REMOTE ? extractYouTubeVideoId(normalizedPath) : null;
|
||||
if (youtubeVideoId) {
|
||||
void this.ensureYouTubeCoverArt(sessionInfo.videoId, normalizedPath, youtubeVideoId);
|
||||
this.captureYoutubeMetadataAsync(sessionInfo.videoId, normalizedPath);
|
||||
} else {
|
||||
this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
|
||||
}
|
||||
this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
|
||||
this.captureVideoMetadataAsync(sessionInfo.videoId, sourceType, normalizedPath);
|
||||
}
|
||||
|
||||
@@ -1006,6 +1183,7 @@ export class ImmersionTrackerService {
|
||||
}
|
||||
|
||||
const startMs = secToMs(startSec);
|
||||
const endMs = secToMs(endSec);
|
||||
const subtitleKey = `${startMs}:${cleaned}`;
|
||||
if (this.recordedSubtitleKeys.has(subtitleKey)) {
|
||||
return;
|
||||
@@ -1019,6 +1197,9 @@ export class ImmersionTrackerService {
|
||||
this.sessionState.currentLineIndex += 1;
|
||||
this.sessionState.linesSeen += 1;
|
||||
this.sessionState.tokensSeen += tokenCount;
|
||||
if (this.sessionState.lastMediaMs === null || endMs > this.sessionState.lastMediaMs) {
|
||||
this.sessionState.lastMediaMs = endMs;
|
||||
}
|
||||
this.sessionState.pendingTelemetry = true;
|
||||
|
||||
const wordOccurrences = new Map<string, CountedWordOccurrence>();
|
||||
@@ -1068,8 +1249,8 @@ export class ImmersionTrackerService {
|
||||
sessionId: this.sessionState.sessionId,
|
||||
videoId: this.sessionState.videoId,
|
||||
lineIndex: this.sessionState.currentLineIndex,
|
||||
segmentStartMs: secToMs(startSec),
|
||||
segmentEndMs: secToMs(endSec),
|
||||
segmentStartMs: startMs,
|
||||
segmentEndMs: endMs,
|
||||
text: cleaned,
|
||||
secondaryText: secondaryText ?? null,
|
||||
wordOccurrences: Array.from(wordOccurrences.values()),
|
||||
|
||||
Reference in New Issue
Block a user