#!/usr/bin/env python
import logging
import os
import time
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from pathlib import Path
from typing import Optional, Tuple

import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Constants
AUDIO_BASE_URL = (
    "https://www.heypera.com/listen/nihongo-con-teppei-for-beginners/{}/next"
)
SUB_BASE_URL = "https://storage.googleapis.com/pera-transcripts/nihongo-con-teppei-for-beginners/transcripts/{}.vtt"
DEFAULT_DELAY = 5  # seconds between requests
DEFAULT_TIMEOUT = 30  # seconds for HTTP requests
MAX_RETRIES = 3


class TeppeiDownloader:
    def __init__(self, delay: float = DEFAULT_DELAY, timeout: int = DEFAULT_TIMEOUT):
        self.delay = delay
        self.timeout = timeout
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'
        })

    def get_audio_url(self, episode_num: int) -> Optional[str]:
        """Get audio URL using Selenium scraping."""
        chrome_options = Options()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--window-size=1920,1080")

        driver = None
        try:
            driver = webdriver.Chrome(options=chrome_options)
            driver.get(AUDIO_BASE_URL.format(episode_num))

            # Wait for the audio element to be present
            audio_element = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.TAG_NAME, "audio"))
            )
            audio_url = audio_element.get_attribute("src")

            if audio_url:
                logger.info(f"Found audio URL for episode {episode_num}")
                return audio_url
            else:
                logger.error(f"No audio URL found for episode {episode_num}")
                return None

        except Exception as e:
            logger.error(f"Error getting audio URL for episode {episode_num}: {e}")
            return None
        finally:
            if driver:
                driver.quit()

    def get_sub_url(self, episode_num: int) -> str:
        """Get subtitle URL (direct URL construction)."""
        return SUB_BASE_URL.format(episode_num)

    def verify_download(self, filename: str, expected_size: Optional[int] = None) -> bool:
        """Verify that a downloaded file exists and has reasonable content."""
        if not os.path.exists(filename):
            logger.warning(f"File verification failed: {filename} does not exist")
            return False

        try:
            file_size = os.path.getsize(filename)
            if file_size == 0:
                logger.warning(f"File verification failed: {filename} is empty (0 bytes)")
                return False

            # For subtitle files, check if they have at least some basic content
            if filename.endswith('.vtt'):
                with open(filename, 'r', encoding='utf-8') as f:
                    content = f.read().strip()
                    if len(content) < 10:  # Very basic check for minimal content
                        logger.warning(f"File verification failed: {filename} appears to have insufficient content")
                        return False

            # If expected size is provided, check if it's reasonable
            if expected_size and abs(file_size - expected_size) > expected_size * 0.1:
                logger.warning(f"File verification failed: {filename} size ({file_size} bytes) differs significantly from expected ({expected_size} bytes)")

            logger.debug(f"File verification passed: {filename} ({file_size} bytes)")
            return True

        except (OSError, IOError) as e:
            logger.warning(f"File verification failed: {filename} - {e}")
            return False

    def download_file(self, url: str, filename: str, retries: int = MAX_RETRIES) -> bool:
        """Download a file with retry logic and proper error handling."""
        for attempt in range(retries):
            try:
                logger.info(f"Downloading {filename} (attempt {attempt + 1}/{retries})")
                response = self.session.get(url, timeout=self.timeout)

                if response.status_code == 200:
                    with open(filename, "wb") as file:
                        file.write(response.content)

                    # Verify the download was successful
                    if self.verify_download(filename, len(response.content)):
                        logger.info(f"Successfully downloaded and verified {filename}")
                        return True
                    else:
                        logger.warning(f"Download verification failed for {filename}, retrying...")
                        # Remove the corrupted/incomplete file
                        try:
                            os.remove(filename)
                        except OSError:
                            pass  # Ignore if file doesn't exist or can't be removed

                else:
                    logger.warning(f"HTTP {response.status_code} for {filename}")

            except requests.exceptions.RequestException as e:
                logger.warning(f"Download attempt {attempt + 1} failed for {filename}: {e}")
                if attempt < retries - 1:
                    time.sleep(self.delay)

        logger.error(f"Failed to download {filename} after {retries} attempts")
        return False

    def file_exists(self, filepath: str) -> bool:
        """Check if file exists and has content."""
        if not os.path.exists(filepath):
            return False

        # Check if file has content (not empty)
        try:
            return os.path.getsize(filepath) > 0
        except OSError:
            return False

    def download_episode(self, episode_num: int, output_dir: str, force: bool = False) -> Tuple[bool, bool]:
        """Download both audio and subtitle files for an episode."""
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

        audio_filename = output_path / f"Nihongo-Con-Teppei-E{episode_num:02d}.mp3"
        sub_filename = output_path / f"Nihongo-Con-Teppei-E{episode_num:02d}.vtt"

        audio_success = True
        sub_success = True

        # Download audio file
        if force or not self.file_exists(str(audio_filename)):
            audio_url = self.get_audio_url(episode_num)
            if audio_url:
                audio_success = self.download_file(audio_url, str(audio_filename))
            else:
                audio_success = False
                logger.error(f"Could not get audio URL for episode {episode_num}")

            # Add delay between audio and subtitle downloads
            if audio_success:
                time.sleep(self.delay)
        else:
            logger.info(f"Audio file already exists: {audio_filename}")

        # Download subtitle file
        if force or not self.file_exists(str(sub_filename)):
            sub_url = self.get_sub_url(episode_num)
            sub_success = self.download_file(sub_url, str(sub_filename))
        else:
            logger.info(f"Subtitle file already exists: {sub_filename}")

        return audio_success, sub_success

    def download_range(self, start: int, end: int, output_dir: str, force: bool = False) -> None:
        """Download a range of episodes with progress tracking."""
        if start > end:
            logger.error("Start episode must be less than or equal to end episode")
            return

        total_episodes = end - start + 1
        successful_downloads = 0

        logger.info(f"Starting download of episodes {start} to {end} ({total_episodes} episodes)")

        for episode in range(start, end + 1):
            logger.info(f"Processing episode {episode}/{end} ({episode-start+1}/{total_episodes})")

            audio_success, sub_success = self.download_episode(episode, output_dir, force)

            if audio_success and sub_success:
                successful_downloads += 1

            # Add delay between episodes to be respectful to the server
            if episode < end:  # Don't delay after the last episode
                logger.info(f"Waiting {self.delay} seconds before next episode...")
                time.sleep(self.delay)

        logger.info(f"Download complete! Successfully downloaded {successful_downloads}/{total_episodes} episodes")


def parse_args():
    parser = ArgumentParser(
        description="Download Nihongo Con Teppei episodes with audio and subtitles",
        formatter_class=RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s 11 --download                    # Download episode 11
  %(prog)s --start 11 --end 15 --download   # Download episodes 11-15
  %(prog)s 11 --download --force           # Force re-download episode 11
  %(prog)s --start 1 --end 20 --download --output ./teppei_episodes
        """
    )

    # Single episode mode
    parser.add_argument(
        "episode_num",
        type=int,
        nargs='?',
        help="Episode number to download (for single episode mode)"
    )

    # Range mode
    parser.add_argument(
        "--start",
        type=int,
        help="Starting episode number for range download"
    )
    parser.add_argument(
        "--end",
        type=int,
        help="Ending episode number for range download"
    )

    # Download options
    parser.add_argument(
        "-d", "--download",
        action="store_true",
        help="Download the files (if not specified, only show URLs)"
    )
    parser.add_argument(
        "-o", "--output",
        default=".",
        help="Output directory (default: current directory)"
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="Force re-download even if files already exist"
    )
    parser.add_argument(
        "--delay",
        type=float,
        default=DEFAULT_DELAY,
        help=f"Delay between requests in seconds (default: {DEFAULT_DELAY})"
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=DEFAULT_TIMEOUT,
        help=f"HTTP request timeout in seconds (default: {DEFAULT_TIMEOUT})"
    )

    return parser.parse_args()


def main():
    args = parse_args()

    # Validate arguments
    if args.start is not None and args.end is not None:
        # Range mode
        if args.start < 1 or args.end < 1:
            logger.error("Episode numbers must be greater than 0")
            return
        if args.start > args.end:
            logger.error("Start episode must be less than or equal to end episode")
            return
    elif args.episode_num is not None:
        # Single episode mode
        if args.episode_num < 1:
            logger.error("Episode number must be greater than 0")
            return
    else:
        logger.error("Must specify either a single episode number or a range (--start and --end)")
        return

    # Create downloader instance
    downloader = TeppeiDownloader(delay=args.delay, timeout=args.timeout)

    if args.start is not None and args.end is not None:
        # Range download mode
        downloader.download_range(args.start, args.end, args.output, args.force)
    else:
        # Single episode mode
        episode = args.episode_num

        if not args.download:
            # Just show URLs
            audio_url = downloader.get_audio_url(episode)
            sub_url = downloader.get_sub_url(episode)
            if audio_url:
                print(f"Audio URL: {audio_url}")
            else:
                print("Could not retrieve audio URL")
            print(f"Subtitle URL: {sub_url}")
        else:
            # Download files
            audio_success, sub_success = downloader.download_episode(episode, args.output, args.force)
            if audio_success and sub_success:
                logger.info(f"Successfully downloaded episode {episode}")
            else:
                logger.error(f"Failed to download episode {episode}")


if __name__ == "__main__":
    main()