diff --git a/server/cli.py b/server/cli.py index e6a9042..5fcc962 100644 --- a/server/cli.py +++ b/server/cli.py @@ -8,6 +8,7 @@ import os import re import sys import time +from collections import defaultdict from typing import List, Optional, Tuple, Union import click @@ -15,8 +16,10 @@ import enlighten import requests from bs4 import BeautifulSoup +from server.helpers import algolia_transform + sys.path[0] += '\\..' -from server.process import DATA_DIR, get_characters, get_episodes, get_filepath, load_file, \ +from server.process import DATA_DIR, get_appearances, get_episodes, get_filepath, load_file, \ save_file, sleep_from, \ verify_episode @@ -31,6 +34,65 @@ def cli(): pass +@cli.group() +def misc(): + pass + + +@misc.command('characters') +@click.option('-s', '--season', type=int, help='Season to be processed for character names') +@click.option('-e', '--episode', type=int, help='Episode to be processed. Requires --season to be specified.') +@click.option('--all', is_flag=True, help='Process all episodes, regardless of previous specifications.') +@click.option('-i', '--individual', is_flag=True, + help='List characters from individual episodes instead of just compiling a masterlist') +def characters(season: int, episode: int, all: bool, individual: bool): + """ + Retrieves all characters from all quotes available. + Used in order to compile a list of characters for the character page, for scanning speakers for anomalies + and mistakes, as well as for compiling a list of verified 'main' characters. + """ + + if all: + episodes = list(get_episodes()) + elif season: + if episode: + if verify_episode(season, episode): + episodes = [(season, episode)] + else: + logger.error(f'Season {season}, Episode {episode} is not a valid combination.') + return + else: + episodes = list(get_episodes(season=season)) + logger.info(f'Fetching Season {season}...') + else: + if episode: + logger.info('You must specify more than just an episode.') + else: + logger.info('You must specify which episodes to process.') + logger.info('Check --help for more information on this command.') + return + + master = dict() + for _season, _episode in episodes: + appearances = get_appearances(_season, _episode) + + if not appearances: + continue + + if individual: + logger.info(' '.join(item['name'] for item in appearances)) + + for item in appearances: + if item['id'] in master.keys(): + master[item['id']]['appearances'] += item['appearances'] + else: + master[item['id']] = item + + # print(master) + logger.info( + ', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances']))) + + @cli.command('fetch') @click.option('-s', '--season', type=int, help='Season to be fetched. Without --episode, will download all episodes in a season.') @@ -317,6 +379,19 @@ def algolia(silent_skip: bool, process_: bool): save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True) +@build.command('character') +def character(): + """ + Uses algolia.json to build a characters.json file, a masterlist of quotes separated by the speaker. + Speakers not considered 'main characters' are excluded from the list. + This file also pulls information to build character descriptions and other relevant information. + """ + data = load_file(os.path.join(DATA_DIR, 'algolia.json'), True) + key_list = [('speaker',), ('text',), ('season',), ('episode_rel', 'episode'), ('section_rel', 'scene'), + ('quote_rel', 'quote')] + master = map(lambda item: algolia_transform(item, key_list), filter(lambda: True, data)) + + @build.command('final') @click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently') @click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.') @@ -338,7 +413,8 @@ def final(silent_skip: bool, process_: bool): episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True) except FileNotFoundError: if not silent_skip: - logger.warning(f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.') + logger.warning( + f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.') episode_data = None description = descriptions[season_id - 1][episode_id - 1] @@ -347,7 +423,7 @@ def final(silent_skip: bool, process_: bool): 'title': description['title'].strip(), 'description': description['description'].strip(), 'episode_id': episode_id, - 'characters': get_characters(season_id, episode_id), + 'characters': get_appearances(season_id, episode_id), 'scenes': episode_data } ) diff --git a/server/helpers.py b/server/helpers.py index 3f0296b..cda86f7 100644 --- a/server/helpers.py +++ b/server/helpers.py @@ -4,7 +4,7 @@ helpers.py """ -from typing import List, Tuple +from typing import List, Tuple, Optional episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23] @@ -30,3 +30,23 @@ def get_neighbors(array: List, index: int, distance: int = 2) -> Tuple[List, Lis if below_index < len(array): below.append(array[below_index]) return top[::-1], below + + +def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[str]]]) -> dict: + """ + Transforms a dictionary object of a quote (from algolia.json) into a API-ready quote. + Used for cli.character (i.e. characters.json) + :param old_dictionary: The original Algolia dictionary + :param key_list: A list of keys to keep in the dictionary in a tuple. One item tuple to keep the tuple's name, a + second item requests a 'rename' for the quote. + :return: The reformatted dictionary. + """ + + new_dictionary = {} + for keyItem in key_list: + if len(keyItem) > 1: + new_dictionary[keyItem[1]] = old_dictionary[keyItem[0]] + else: + new_dictionary[keyItem[0]] = old_dictionary[keyItem[0]] + + return new_dictionary diff --git a/server/process.py b/server/process.py index 95b9ac7..96e4e9f 100644 --- a/server/process.py +++ b/server/process.py @@ -103,7 +103,7 @@ def sleep_from(wait_time: float, moment: float, manager: enlighten.Manager = Non return 0 -def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]: +def get_appearances(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]: """ Extracts all characters and their number of appearances from a specific episode. Prepared in a list of dictionary, preferable storage/for loop method. @@ -120,3 +120,10 @@ def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]] characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()} for character, appearances in characters.items()] return list(sorted(characters, key=lambda item: item['appearances'], reverse=True)) + +def get_character(character): + """ + Retrieves a character's appearances from every season and episode available. + :param character: The character's name + :return: A list of dictionary quotes including reference IDs + """