sudacode ce605006f5
All checks were successful
Build Docker Image / build (push) Successful in 14m39s
update add_video route and change queue method
- change add to queue method from `GET` on `/` to `POST` on `/queue`
- update `/add_video` route to accept single video url
2025-02-25 02:02:54 -08:00

124 lines
3.9 KiB
Python

import logging
from urllib.parse import urlparse
import bleach
from yt_dlp import YoutubeDL
def fetch_video_info(video_url):
"""
Fetch comprehensive video information using yt-dlp.
Returns a dictionary with video metadata or None if an error occurs.
"""
ydl_opts = {
"format": "best",
"quiet": True,
"noplaylist": True,
}
with YoutubeDL(ydl_opts) as ydl:
try:
info = ydl.extract_info(video_url, download=False)
# Get the first category or default to "Unknown"
category = "Unknown"
if "categories" in info and info["categories"]:
category = info["categories"][0]
# Extract basic required info plus additional metadata
return {
"video_url": video_url,
"video_name": info.get("title", "Unknown Title"),
"channel_url": info.get("channel_url", ""),
"channel_name": info.get("uploader", "Unknown Channel"),
"category": category,
"view_count": info.get("view_count", 0),
"subscriber_count": info.get("channel_follower_count", 0),
"thumbnail_url": info.get("thumbnail", ""),
"upload_date": info.get("upload_date", None),
}
except Exception as e:
logging.error(f"Error fetching info for {video_url}: {e}")
return None
# Helper functions for validation and sanitization
def is_valid_url(url, allowed_domains=None):
"""Validates URL format and optionally checks domain."""
if not url or not isinstance(url, str):
return False
try:
result = urlparse(url)
# Check for valid scheme and netloc
valid_format = all([result.scheme in ["http", "https"], result.netloc])
# Check domain if specified
if valid_format and allowed_domains:
return any(domain in result.netloc for domain in allowed_domains)
return valid_format
except:
return False
def validate_video_data(data):
"""Validates all fields in the video data."""
errors = {}
# URL validation
if not is_valid_url(data.get("video_url")):
errors["video_url"] = "Invalid video URL format"
if not is_valid_url(data.get("channel_url")):
errors["channel_url"] = "Invalid channel URL format"
if data.get("thumbnail_url") and not is_valid_url(data.get("thumbnail_url")):
errors["thumbnail_url"] = "Invalid thumbnail URL format"
# String length validation
if len(data.get("video_name", "")) > 500:
errors["video_name"] = "Video name too long (max 500 characters)"
if len(data.get("channel_name", "")) > 200:
errors["channel_name"] = "Channel name too long (max 200 characters)"
if data.get("category") and len(data.get("category")) > 100:
errors["category"] = "Category too long (max 100 characters)"
# Type validation for numeric fields
if data.get("view_count") is not None:
try:
int(data.get("view_count"))
except (ValueError, TypeError):
errors["view_count"] = "View count must be a valid integer"
if data.get("subscriber") is not None:
try:
int(data.get("subscriber"))
except (ValueError, TypeError):
errors["subscriber"] = "Subscriber count must be a valid integer"
# Date validation
if data.get("upload_date"):
# Implement appropriate date validation based on expected format
pass
return errors
def sanitize_video_data(data):
"""Sanitizes all string fields to prevent XSS."""
sanitized = {}
# Copy all fields, sanitizing strings
for key, value in data.items():
if isinstance(value, str):
# Remove potentially harmful HTML/scripts
sanitized[key] = bleach.clean(value, strip=True)
else:
sanitized[key] = value
return sanitized