Files
the-office/server/cli.py

194 lines
7.5 KiB
Python

"""
cli.py
CLI entrypoint for fetching, processing and compiling quote data.
"""
import logging
import os
import sys
import time
from typing import List, Tuple
import click
import enlighten
import requests
sys.path[0] += '\\..'
from server.process import get_episodes, get_filepath, sleep_from, verify_episode
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('cli')
logger.setLevel(logging.DEBUG)
manager = enlighten.get_manager()
@click.group()
def cli():
"""Base command group."""
pass
@cli.command('fetch')
@click.option('-s', '--season', type=int,
help='Season to be fetched. Without --episode, will download all episodes in a season.')
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
@click.option('-d', '--delay', type=float, default=0.5, help='Delay between each request')
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
def fetch(season: int, episode: int, delay: float, all: bool, overwrite: bool, silent_skip: bool):
"""
Downloads raw quote pages from 'officequotes.net'.
Fetches quote pages, placing them in 'html' folder in unmodified UTF-8 HTML files.
"""
episodes: List[Tuple[int, int]]
if all:
episodes = list(get_episodes())
elif season:
if episode:
if verify_episode(season, episode):
episodes = [(season, episode)]
else:
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
return
else:
episodes = list(get_episodes(season=season))
logger.info(f'Fetching Season {season}...')
else:
if episode:
logger.info('You must specify more than just an episode.')
else:
logger.info('You must specify which episodes to fetch.')
logger.info('Check --help for more information on this command.')
return
logger.debug(f'Ready to start fetching {len(episodes)} quote page{"s" if len(episodes) > 1 else ""}')
session = requests.Session()
last_request = time.time() - delay
with enlighten.Manager() as manager:
with manager.counter(total=len(episodes), desc='Fetching...', unit='episodes') as pbar:
for _season, _episode in episodes:
filepath = get_filepath(_season, _episode, 'html')
# Check if HTML file exists
if not overwrite and os.path.exists(filepath):
if not silent_skip:
logger.debug(f'Skipping Season {_season}, Episode {_episode}: File already exists.')
else:
logger.info(f'Fetching Season {_season}, Episode {_episode}...')
# Generate link, make request
link = f"http://officequotes.net/no{_season}-{str(_episode).zfill(2)}.php"
sleep_from(delay, last_request, manager) # Sleep at least :delay: seconds.
resp = session.get(link)
last_request = time.time()
if resp.ok:
# Write data to file
with open(filepath, 'w', encoding='utf-8') as file:
file.write(resp.text)
logger.debug('Successfully fetched.')
else:
logger.error(f'Fetching failed. Erroneous response code {resp.status_code}.')
pbar.update()
logger.info('Fetching complete.')
@cli.command('preprocess')
@click.option('-s', '--season', type=int,
help='Season to be fetched. Without --episode, will download all episodes in a season.')
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
@click.option('-d', '--dry-run', is_flag=True)
def preprocess(season: int, episode: int, all: bool, overwrite: bool, silent_skip: bool, dry_run: bool):
"""
Pre-processes raw HTML files into mangled custom quote data.
Custom quote data requires manual inspection and formatting, making it a dangerous operation that may overwrite
precious quote data.
"""
episodes: List[Tuple[int, int]]
if all:
episodes = list(get_episodes())
elif season:
if episode:
if verify_episode(season, episode):
episodes = [(season, episode)]
else:
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
return
else:
episodes = list(get_episodes(season=season))
logger.info(f'Preprocessing Season {season}...')
else:
if episode:
logger.info('You must specify more than just an episode.')
else:
logger.info('You must specify which episodes to pre-process.')
logger.info('Check --help for more information on this command.')
@cli.command('process')
@click.option('-s', '--season', type=int,
help='Season to be fetched. Without --episode, will download all episodes in a season.')
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
def process(season: int, episode: int, all: bool, overwrite: bool, silent_skip: bool):
"""
Processes manually processed raw quote data into JSON.
"""
episodes: List[Tuple[int, int]]
if all:
episodes = list(get_episodes())
elif season:
if episode:
if verify_episode(season, episode):
episodes = [(season, episode)]
else:
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
return
else:
episodes = list(get_episodes(season=season))
logger.info(f'Processing Season {season}...')
else:
if episode:
logger.info('You must specify more than just an episode.')
else:
logger.info('You must specify which episodes to process.')
logger.info('Check --help for more information on this command.')
return
@cli.group('build')
def build():
"""Build final data files used by Algolia and the backend API."""
@build.command('algolia')
def algolia():
"""
Generates algolia.json, a all encompassing file for Algolia's search index.
"""
files = list(filter(os.path.exists, [get_filepath(season, episode, 'processed') for season, episode in get_episodes()]))
@build.command('final')
def final():
"""Generates the latest application static data.json file, used by the backend API."""
files = list(filter(os.path.exists, [get_filepath(season, episode, 'processed') for season, episode in get_episodes()]))
if __name__ == "__main__":
cli()