begin working on charaters.json CLI data aggregation methods, algolia quote object transform (this needs to be standardized, maybe in the next 10,000 years), rename get_characters

This commit is contained in:
Xevion
2020-09-14 12:01:20 -05:00
parent 61945ce336
commit 21b471e9d4
3 changed files with 108 additions and 5 deletions

View File

@@ -8,6 +8,7 @@ import os
import re import re
import sys import sys
import time import time
from collections import defaultdict
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
import click import click
@@ -15,8 +16,10 @@ import enlighten
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from server.helpers import algolia_transform
sys.path[0] += '\\..' sys.path[0] += '\\..'
from server.process import DATA_DIR, get_characters, get_episodes, get_filepath, load_file, \ from server.process import DATA_DIR, get_appearances, get_episodes, get_filepath, load_file, \
save_file, sleep_from, \ save_file, sleep_from, \
verify_episode verify_episode
@@ -31,6 +34,65 @@ def cli():
pass pass
@cli.group()
def misc():
pass
@misc.command('characters')
@click.option('-s', '--season', type=int, help='Season to be processed for character names')
@click.option('-e', '--episode', type=int, help='Episode to be processed. Requires --season to be specified.')
@click.option('--all', is_flag=True, help='Process all episodes, regardless of previous specifications.')
@click.option('-i', '--individual', is_flag=True,
help='List characters from individual episodes instead of just compiling a masterlist')
def characters(season: int, episode: int, all: bool, individual: bool):
"""
Retrieves all characters from all quotes available.
Used in order to compile a list of characters for the character page, for scanning speakers for anomalies
and mistakes, as well as for compiling a list of verified 'main' characters.
"""
if all:
episodes = list(get_episodes())
elif season:
if episode:
if verify_episode(season, episode):
episodes = [(season, episode)]
else:
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
return
else:
episodes = list(get_episodes(season=season))
logger.info(f'Fetching Season {season}...')
else:
if episode:
logger.info('You must specify more than just an episode.')
else:
logger.info('You must specify which episodes to process.')
logger.info('Check --help for more information on this command.')
return
master = dict()
for _season, _episode in episodes:
appearances = get_appearances(_season, _episode)
if not appearances:
continue
if individual:
logger.info(' '.join(item['name'] for item in appearances))
for item in appearances:
if item['id'] in master.keys():
master[item['id']]['appearances'] += item['appearances']
else:
master[item['id']] = item
# print(master)
logger.info(
', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances'])))
@cli.command('fetch') @cli.command('fetch')
@click.option('-s', '--season', type=int, @click.option('-s', '--season', type=int,
help='Season to be fetched. Without --episode, will download all episodes in a season.') help='Season to be fetched. Without --episode, will download all episodes in a season.')
@@ -317,6 +379,19 @@ def algolia(silent_skip: bool, process_: bool):
save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True) save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True)
@build.command('character')
def character():
"""
Uses algolia.json to build a characters.json file, a masterlist of quotes separated by the speaker.
Speakers not considered 'main characters' are excluded from the list.
This file also pulls information to build character descriptions and other relevant information.
"""
data = load_file(os.path.join(DATA_DIR, 'algolia.json'), True)
key_list = [('speaker',), ('text',), ('season',), ('episode_rel', 'episode'), ('section_rel', 'scene'),
('quote_rel', 'quote')]
master = map(lambda item: algolia_transform(item, key_list), filter(lambda: True, data))
@build.command('final') @build.command('final')
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently') @click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
@click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.') @click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.')
@@ -338,7 +413,8 @@ def final(silent_skip: bool, process_: bool):
episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True) episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True)
except FileNotFoundError: except FileNotFoundError:
if not silent_skip: if not silent_skip:
logger.warning(f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.') logger.warning(
f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
episode_data = None episode_data = None
description = descriptions[season_id - 1][episode_id - 1] description = descriptions[season_id - 1][episode_id - 1]
@@ -347,7 +423,7 @@ def final(silent_skip: bool, process_: bool):
'title': description['title'].strip(), 'title': description['title'].strip(),
'description': description['description'].strip(), 'description': description['description'].strip(),
'episode_id': episode_id, 'episode_id': episode_id,
'characters': get_characters(season_id, episode_id), 'characters': get_appearances(season_id, episode_id),
'scenes': episode_data 'scenes': episode_data
} }
) )

View File

@@ -4,7 +4,7 @@ helpers.py
""" """
from typing import List, Tuple from typing import List, Tuple, Optional
episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23] episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23]
@@ -30,3 +30,23 @@ def get_neighbors(array: List, index: int, distance: int = 2) -> Tuple[List, Lis
if below_index < len(array): if below_index < len(array):
below.append(array[below_index]) below.append(array[below_index])
return top[::-1], below return top[::-1], below
def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[str]]]) -> dict:
"""
Transforms a dictionary object of a quote (from algolia.json) into a API-ready quote.
Used for cli.character (i.e. characters.json)
:param old_dictionary: The original Algolia dictionary
:param key_list: A list of keys to keep in the dictionary in a tuple. One item tuple to keep the tuple's name, a
second item requests a 'rename' for the quote.
:return: The reformatted dictionary.
"""
new_dictionary = {}
for keyItem in key_list:
if len(keyItem) > 1:
new_dictionary[keyItem[1]] = old_dictionary[keyItem[0]]
else:
new_dictionary[keyItem[0]] = old_dictionary[keyItem[0]]
return new_dictionary

View File

@@ -103,7 +103,7 @@ def sleep_from(wait_time: float, moment: float, manager: enlighten.Manager = Non
return 0 return 0
def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]: def get_appearances(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]:
""" """
Extracts all characters and their number of appearances from a specific episode. Extracts all characters and their number of appearances from a specific episode.
Prepared in a list of dictionary, preferable storage/for loop method. Prepared in a list of dictionary, preferable storage/for loop method.
@@ -120,3 +120,10 @@ def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]
characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()} characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()}
for character, appearances in characters.items()] for character, appearances in characters.items()]
return list(sorted(characters, key=lambda item: item['appearances'], reverse=True)) return list(sorted(characters, key=lambda item: item['appearances'], reverse=True))
def get_character(character):
"""
Retrieves a character's appearances from every season and episode available.
:param character: The character's name
:return: A list of dictionary quotes including reference IDs
"""