mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-12 12:13:18 -06:00
194 lines
7.5 KiB
Python
194 lines
7.5 KiB
Python
"""
|
|
cli.py
|
|
|
|
CLI entrypoint for fetching, processing and compiling quote data.
|
|
"""
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
from typing import List, Tuple
|
|
|
|
import click
|
|
import enlighten
|
|
import requests
|
|
|
|
sys.path[0] += '\\..'
|
|
from server.process import get_episodes, get_filepath, sleep_from, verify_episode
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger('cli')
|
|
logger.setLevel(logging.DEBUG)
|
|
manager = enlighten.get_manager()
|
|
|
|
|
|
@click.group()
|
|
def cli():
|
|
"""Base command group."""
|
|
pass
|
|
|
|
|
|
@cli.command('fetch')
|
|
@click.option('-s', '--season', type=int,
|
|
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
|
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
|
|
@click.option('-d', '--delay', type=float, default=0.5, help='Delay between each request')
|
|
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
|
|
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
|
|
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
|
def fetch(season: int, episode: int, delay: float, all: bool, overwrite: bool, silent_skip: bool):
|
|
"""
|
|
Downloads raw quote pages from 'officequotes.net'.
|
|
|
|
Fetches quote pages, placing them in 'html' folder in unmodified UTF-8 HTML files.
|
|
"""
|
|
episodes: List[Tuple[int, int]]
|
|
|
|
if all:
|
|
episodes = list(get_episodes())
|
|
elif season:
|
|
if episode:
|
|
if verify_episode(season, episode):
|
|
episodes = [(season, episode)]
|
|
else:
|
|
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
|
|
return
|
|
else:
|
|
episodes = list(get_episodes(season=season))
|
|
logger.info(f'Fetching Season {season}...')
|
|
else:
|
|
if episode:
|
|
logger.info('You must specify more than just an episode.')
|
|
else:
|
|
logger.info('You must specify which episodes to fetch.')
|
|
logger.info('Check --help for more information on this command.')
|
|
return
|
|
|
|
logger.debug(f'Ready to start fetching {len(episodes)} quote page{"s" if len(episodes) > 1 else ""}')
|
|
session = requests.Session()
|
|
last_request = time.time() - delay
|
|
|
|
with enlighten.Manager() as manager:
|
|
with manager.counter(total=len(episodes), desc='Fetching...', unit='episodes') as pbar:
|
|
for _season, _episode in episodes:
|
|
|
|
filepath = get_filepath(_season, _episode, 'html')
|
|
|
|
# Check if HTML file exists
|
|
if not overwrite and os.path.exists(filepath):
|
|
if not silent_skip:
|
|
logger.debug(f'Skipping Season {_season}, Episode {_episode}: File already exists.')
|
|
else:
|
|
logger.info(f'Fetching Season {_season}, Episode {_episode}...')
|
|
|
|
# Generate link, make request
|
|
link = f"http://officequotes.net/no{_season}-{str(_episode).zfill(2)}.php"
|
|
|
|
sleep_from(delay, last_request, manager) # Sleep at least :delay: seconds.
|
|
|
|
resp = session.get(link)
|
|
last_request = time.time()
|
|
if resp.ok:
|
|
# Write data to file
|
|
with open(filepath, 'w', encoding='utf-8') as file:
|
|
file.write(resp.text)
|
|
logger.debug('Successfully fetched.')
|
|
else:
|
|
logger.error(f'Fetching failed. Erroneous response code {resp.status_code}.')
|
|
pbar.update()
|
|
logger.info('Fetching complete.')
|
|
|
|
|
|
@cli.command('preprocess')
|
|
@click.option('-s', '--season', type=int,
|
|
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
|
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
|
|
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
|
|
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
|
|
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
|
@click.option('-d', '--dry-run', is_flag=True)
|
|
def preprocess(season: int, episode: int, all: bool, overwrite: bool, silent_skip: bool, dry_run: bool):
|
|
"""
|
|
Pre-processes raw HTML files into mangled custom quote data.
|
|
|
|
Custom quote data requires manual inspection and formatting, making it a dangerous operation that may overwrite
|
|
precious quote data.
|
|
"""
|
|
episodes: List[Tuple[int, int]]
|
|
|
|
if all:
|
|
episodes = list(get_episodes())
|
|
elif season:
|
|
if episode:
|
|
if verify_episode(season, episode):
|
|
episodes = [(season, episode)]
|
|
else:
|
|
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
|
|
return
|
|
else:
|
|
episodes = list(get_episodes(season=season))
|
|
logger.info(f'Preprocessing Season {season}...')
|
|
else:
|
|
if episode:
|
|
logger.info('You must specify more than just an episode.')
|
|
else:
|
|
logger.info('You must specify which episodes to pre-process.')
|
|
logger.info('Check --help for more information on this command.')
|
|
|
|
|
|
@cli.command('process')
|
|
@click.option('-s', '--season', type=int,
|
|
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
|
@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
|
|
@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
|
|
@click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
|
|
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
|
def process(season: int, episode: int, all: bool, overwrite: bool, silent_skip: bool):
|
|
"""
|
|
Processes manually processed raw quote data into JSON.
|
|
"""
|
|
episodes: List[Tuple[int, int]]
|
|
|
|
if all:
|
|
episodes = list(get_episodes())
|
|
elif season:
|
|
if episode:
|
|
if verify_episode(season, episode):
|
|
episodes = [(season, episode)]
|
|
else:
|
|
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
|
|
return
|
|
else:
|
|
episodes = list(get_episodes(season=season))
|
|
logger.info(f'Processing Season {season}...')
|
|
else:
|
|
if episode:
|
|
logger.info('You must specify more than just an episode.')
|
|
else:
|
|
logger.info('You must specify which episodes to process.')
|
|
logger.info('Check --help for more information on this command.')
|
|
return
|
|
|
|
|
|
@cli.group('build')
|
|
def build():
|
|
"""Build final data files used by Algolia and the backend API."""
|
|
|
|
|
|
@build.command('algolia')
|
|
def algolia():
|
|
"""
|
|
Generates algolia.json, a all encompassing file for Algolia's search index.
|
|
"""
|
|
files = list(filter(os.path.exists, [get_filepath(season, episode, 'processed') for season, episode in get_episodes()]))
|
|
|
|
|
|
@build.command('final')
|
|
def final():
|
|
"""Generates the latest application static data.json file, used by the backend API."""
|
|
files = list(filter(os.path.exists, [get_filepath(season, episode, 'processed') for season, episode in get_episodes()]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli()
|