autosnatch/autosnatch/tools.py

from beacon_snatch import BeaconSeries, BeaconContent
from colorama import Fore, Back, Style
from selenium import webdriver
from selenium.webdriver.common.by import By

from .config import Config
from .logger import getLogger

import logging
import json
import os
import requests
import shutil
import subprocess
from datetime import timedelta, date, datetime
from pathlib import Path

logger = getLogger(__name__)
logger.setLevel(Config.LOG_LEVEL)

def ffmpegRun(command):
    logger.debug(" ".join(command))
    try:
        process = subprocess.Popen(command, stderr=subprocess.PIPE, universal_newlines=True, text=True)
        logger.debug(f"{Fore.YELLOW} {process.stderr.read()}")
        process.wait()

    except Exception as e:
        logger.error(f"Failed to start ffmpeg: {e}")

def mergeSubtitles(dl_path, episode_id):
    input_path = f"{dl_path}/{episode_id}.mp4"
    subs_path = f"{dl_path}/{episode_id}.srt"
    output_path = f"{dl_path}/{episode_id}.mkv"
    ffmpegRun([ # Take the original video and the subtitle file, then smoosh 'em together into a new video file.
        'ffmpeg',
        '-i', input_path,
        '-i', subs_path,
        '-map', '0',
        '-map', '1',
        '-c', 'copy',
        '-v', 'quiet', '-stats', '-y',
        output_path,
    ])
    if Config.DELETE_MP4_SRT_AFTER_DOWNLOAD:
        os.remove(input_path)
        os.remove(subs_path)
    return output_path

def convertToMkv(dl_path, episode_id):
    input_path = f"{dl_path}/{episode_id}.mp4"
    output_path = f"{dl_path}/{episode_id}.mkv"
    ffmpegRun([ # Take the original video and the subtitle file, then smoosh 'em together into a new video file.
        'ffmpeg',
        '-i', input_path,
        '-c', 'copy',
        '-v', 'quiet', '-stats', '-y',
        output_path,
    ])
    if Config.DELETE_MP4_SRT_AFTER_DOWNLOAD:
        os.remove(input_path)
    return output_path

def getCache(series_id):
    cache_file = Path(f'{Config.CACHE_DIR}/{series_id}.json')
    if cache_file.is_file():
        lastmodtime = datetime.fromtimestamp(cache_file.stat().st_mtime)
        cache_age = round( (datetime.today() - lastmodtime).total_seconds() / 3600, 2 )
        # logger.debug(f"Cache is {cache_age} days old (last modified at {lastmodtime.strftime('%A, %d %B %Y @ %H:%M %p')}).")
        return (cache_file, cache_age)
    else:
        return (cache_file, 0)

def bodgeEpisodeNumber(episode_slug, series_id):
    '''Take the slug of an episode, return the episode number as an int.'''
    match series_id:
        case "campaign-4":        return int(episode_slug.strip("c4-e").split("-")[0])             # c4-e017-the-place-of-wings
        case "critical-cooldown": return int(episode_slug.strip("cr-cooldown-c").split("4-e")[1])  # cr-cooldown-c4-e017

def episodesInOrder(auth, series):
    '''Take a BeaconSeries object, return an ordered list of all episodes. Also handle caching them'''
    cache_file, cache_age = getCache(series.id)
    if not cache_file.is_file():
        logger.warn(f"Cache of episodes for this series does not exist. Generating new cache via beacon-snatch")
    elif cache_age > Config.CACHE_MAX_AGE:
        logger.info(f"Cache of episodes for this series is outdated ({cache_age} hr old, max is {Config.CACHE_MAX_AGE} hr.). Generating new cache via beacon-snatch")
        os.remove(cache_file)
    elif cache_age < Config.CACHE_MAX_AGE:
        logger.info(f"Cache of episodes for this series is fresh enough to re-use ({cache_age} hr old, max is {Config.CACHE_MAX_AGE} hr)")
        with open(cache_file) as f:
            return json.load(f)

    episodes = []
    series.fetch(auth)
    logger.debug(series.id)
    for episode in series.content:
        logger.debug(f"Checking episode '{episode.slug}'")
        if episode.slug.startswith(Config.EPISODE_HINTS[series.id]): # Ignore any specials or extras
            logger.debug(f"{Fore.GREEN}Found episode! #{bodgeEpisodeNumber(episode.slug, series.id)}: '{episode.title}' ({episode.slug}) {Style.RESET_ALL}")
            episodes.append((bodgeEpisodeNumber(episode.slug, series.id), episode.slug, episode.title))
    result = sorted(
        episodes,
        key=lambda x: x[0]
    )
    with open(cache_file, 'w') as f:
        json.dump(result, f, indent=2)

    return result

def grabEpisodeInfo(auth, series_id, desired_episode):
    '''Take the ID of a Beacon series and an episode number*, return info about the episode fetched from Beacon.tv.'''
    # Grab all episodes from the chosen series
    series = BeaconSeries.create(auth, series_id)
    logger.debug(f"Gathering a list of all episodes")
    episodes = episodesInOrder(auth, series)
    # logger.debug(f"episodes = {json.dumps(episodes, indent=4)}")


    # Pick out the relevant info for the most recent episode the latest episode
    if desired_episode == "latest":
        return {
            "number" : episodes[-1][0],
            "id"     : episodes[-1][1],
            "title"  : episodes[-1][2],
            "series" : series_id
        }
    else:
        for episode in episodes:
            if episode[0] == int(desired_episode):
                return {
                    "number" : episode[0],
                    "id"     : episode[1],
                    "title"  : episode[2],
                    "series" : series_id
                }

def constructDownloadPath(episode):
    '''Take a dict of an episode, return a Jellyfin-compliant download path.'''
    match episode["series"]:
        case "campaign-4":        return Path(Config.DL_ROOT) / "Critical Role" / "Season 4" / f"Episode {episode['number']}"
        case "critical-cooldown": return Path(Config.DL_ROOT) / "Critical Cooldown" / "Season 4" / f"Episode {episode['number']}"

def scrapeThumbnail(episode, download_path):
    match episode["series"]:
        case "campaign-4":        hint = ' '.join(episode['id'].split("-")[0:2]).upper()
        case "critical-cooldown": hint = ' '.join(episode['id'].split("-")[2:]).upper()
    logging.debug("Starting up Selenium webdriver on virtual display")
    driver = webdriver.Chrome()
    logging.debug("Opening series webpage")
    driver.get(f"https://beacon.tv/series/{episode['series']}")
    logging.debug("Trying to find a matching image...")
    img_element = driver.find_element(By.XPATH, f"//img[contains(@alt, '{hint}')]")
    thumb_url = img_element.get_attribute("src")
    logging.debug(f"Found image URL - {thumb_url}")

    response = requests.get(thumb_url, stream=True)
    if response.status_code == 200:
        with open(f"{download_path}/{episode['id']}.png", 'wb') as out_file:
            response.raw.decode_content = True
            shutil.copyfileobj(response.raw, out_file)

def seriesTitle(series_id):
    match series_id:
        case "campaign-4":        return "Critical Role: Campaign 4"
        case "critical-cooldown": return "Critical Cooldown"

def scrapeSubtitles(episode, download_path, webvtt_url):
    subtitles_path = f"{download_path}/{episode['id']}.srt"
    ffmpegRun([
        "ffmpeg",
        "-i",
        webvtt_url,
        "-y",
        "-v",
        "quiet",
        "-stats",
        subtitles_path
    ])
    return subtitles_path

def downloadEpisode(auth, episode):
    # logger.debug(episode["series"])
    download_path = constructDownloadPath(episode)
    logger.debug(f"Downloading to {download_path}")
    logger.info(f"Getting stream from Beacon...")
    content = BeaconContent.create(auth, episode["id"])
    if content:
        logger.info(f"Found! Downloading '{episode['title']}'")
        stream = content.video_and_audio_streams[0]  # Selects the highest resolution stream
        content.download(stream, destination_folder=download_path)
        logger.info("Finished")

        logger.info("Grabbing thumbnail for episode")
        scrapeThumbnail(episode, download_path)

        if content.closedCaptions == None:
            logger.warn("Either failed to grab subtitle link, or episode does not have subtitles to begin with.")
            logger.info("Converting to .mkv (no subtitles)")
            convertToMkv(download_path, episode['id'])
        else:
            logger.info("Grabbing subtitles for episode")
            scrapeSubtitles(episode, download_path, content.closedCaptions)
            logger.info(f"Embedding downloaded subtitles {episode['id']}.srt into video file {episode['id']}.mp4")
            mergeSubtitles(download_path, episode['id'])
        logger.info(f"New video file {episode['id']}.mkv")