All checks were successful
Build Docker Image / build (push) Successful in 14m39s
- change add to queue method from `GET` on `/` to `POST` on `/queue` - update `/add_video` route to accept single video url
124 lines
3.9 KiB
Python
124 lines
3.9 KiB
Python
import logging
|
|
from urllib.parse import urlparse
|
|
|
|
import bleach
|
|
from yt_dlp import YoutubeDL
|
|
|
|
|
|
def fetch_video_info(video_url):
|
|
"""
|
|
Fetch comprehensive video information using yt-dlp.
|
|
|
|
Returns a dictionary with video metadata or None if an error occurs.
|
|
"""
|
|
ydl_opts = {
|
|
"format": "best",
|
|
"quiet": True,
|
|
"noplaylist": True,
|
|
}
|
|
|
|
with YoutubeDL(ydl_opts) as ydl:
|
|
try:
|
|
info = ydl.extract_info(video_url, download=False)
|
|
|
|
# Get the first category or default to "Unknown"
|
|
category = "Unknown"
|
|
if "categories" in info and info["categories"]:
|
|
category = info["categories"][0]
|
|
|
|
# Extract basic required info plus additional metadata
|
|
return {
|
|
"video_url": video_url,
|
|
"video_name": info.get("title", "Unknown Title"),
|
|
"channel_url": info.get("channel_url", ""),
|
|
"channel_name": info.get("uploader", "Unknown Channel"),
|
|
"category": category,
|
|
"view_count": info.get("view_count", 0),
|
|
"subscriber_count": info.get("channel_follower_count", 0),
|
|
"thumbnail_url": info.get("thumbnail", ""),
|
|
"upload_date": info.get("upload_date", None),
|
|
}
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error fetching info for {video_url}: {e}")
|
|
return None
|
|
|
|
|
|
# Helper functions for validation and sanitization
|
|
def is_valid_url(url, allowed_domains=None):
|
|
"""Validates URL format and optionally checks domain."""
|
|
if not url or not isinstance(url, str):
|
|
return False
|
|
|
|
try:
|
|
result = urlparse(url)
|
|
# Check for valid scheme and netloc
|
|
valid_format = all([result.scheme in ["http", "https"], result.netloc])
|
|
|
|
# Check domain if specified
|
|
if valid_format and allowed_domains:
|
|
return any(domain in result.netloc for domain in allowed_domains)
|
|
return valid_format
|
|
except:
|
|
return False
|
|
|
|
|
|
def validate_video_data(data):
|
|
"""Validates all fields in the video data."""
|
|
errors = {}
|
|
|
|
# URL validation
|
|
if not is_valid_url(data.get("video_url")):
|
|
errors["video_url"] = "Invalid video URL format"
|
|
|
|
if not is_valid_url(data.get("channel_url")):
|
|
errors["channel_url"] = "Invalid channel URL format"
|
|
|
|
if data.get("thumbnail_url") and not is_valid_url(data.get("thumbnail_url")):
|
|
errors["thumbnail_url"] = "Invalid thumbnail URL format"
|
|
|
|
# String length validation
|
|
if len(data.get("video_name", "")) > 500:
|
|
errors["video_name"] = "Video name too long (max 500 characters)"
|
|
|
|
if len(data.get("channel_name", "")) > 200:
|
|
errors["channel_name"] = "Channel name too long (max 200 characters)"
|
|
|
|
if data.get("category") and len(data.get("category")) > 100:
|
|
errors["category"] = "Category too long (max 100 characters)"
|
|
|
|
# Type validation for numeric fields
|
|
if data.get("view_count") is not None:
|
|
try:
|
|
int(data.get("view_count"))
|
|
except (ValueError, TypeError):
|
|
errors["view_count"] = "View count must be a valid integer"
|
|
|
|
if data.get("subscriber") is not None:
|
|
try:
|
|
int(data.get("subscriber"))
|
|
except (ValueError, TypeError):
|
|
errors["subscriber"] = "Subscriber count must be a valid integer"
|
|
|
|
# Date validation
|
|
if data.get("upload_date"):
|
|
# Implement appropriate date validation based on expected format
|
|
pass
|
|
|
|
return errors
|
|
|
|
|
|
def sanitize_video_data(data):
|
|
"""Sanitizes all string fields to prevent XSS."""
|
|
sanitized = {}
|
|
|
|
# Copy all fields, sanitizing strings
|
|
for key, value in data.items():
|
|
if isinstance(value, str):
|
|
# Remove potentially harmful HTML/scripts
|
|
sanitized[key] = bleach.clean(value, strip=True)
|
|
else:
|
|
sanitized[key] = value
|
|
|
|
return sanitized
|