mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-14 14:13:22 -06:00
begin working on charaters.json CLI data aggregation methods, algolia quote object transform (this needs to be standardized, maybe in the next 10,000 years), rename get_characters
This commit is contained in:
@@ -8,6 +8,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import click
|
||||
@@ -15,8 +16,10 @@ import enlighten
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from server.helpers import algolia_transform
|
||||
|
||||
sys.path[0] += '\\..'
|
||||
from server.process import DATA_DIR, get_characters, get_episodes, get_filepath, load_file, \
|
||||
from server.process import DATA_DIR, get_appearances, get_episodes, get_filepath, load_file, \
|
||||
save_file, sleep_from, \
|
||||
verify_episode
|
||||
|
||||
@@ -31,6 +34,65 @@ def cli():
|
||||
pass
|
||||
|
||||
|
||||
@cli.group()
|
||||
def misc():
|
||||
pass
|
||||
|
||||
|
||||
@misc.command('characters')
|
||||
@click.option('-s', '--season', type=int, help='Season to be processed for character names')
|
||||
@click.option('-e', '--episode', type=int, help='Episode to be processed. Requires --season to be specified.')
|
||||
@click.option('--all', is_flag=True, help='Process all episodes, regardless of previous specifications.')
|
||||
@click.option('-i', '--individual', is_flag=True,
|
||||
help='List characters from individual episodes instead of just compiling a masterlist')
|
||||
def characters(season: int, episode: int, all: bool, individual: bool):
|
||||
"""
|
||||
Retrieves all characters from all quotes available.
|
||||
Used in order to compile a list of characters for the character page, for scanning speakers for anomalies
|
||||
and mistakes, as well as for compiling a list of verified 'main' characters.
|
||||
"""
|
||||
|
||||
if all:
|
||||
episodes = list(get_episodes())
|
||||
elif season:
|
||||
if episode:
|
||||
if verify_episode(season, episode):
|
||||
episodes = [(season, episode)]
|
||||
else:
|
||||
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
|
||||
return
|
||||
else:
|
||||
episodes = list(get_episodes(season=season))
|
||||
logger.info(f'Fetching Season {season}...')
|
||||
else:
|
||||
if episode:
|
||||
logger.info('You must specify more than just an episode.')
|
||||
else:
|
||||
logger.info('You must specify which episodes to process.')
|
||||
logger.info('Check --help for more information on this command.')
|
||||
return
|
||||
|
||||
master = dict()
|
||||
for _season, _episode in episodes:
|
||||
appearances = get_appearances(_season, _episode)
|
||||
|
||||
if not appearances:
|
||||
continue
|
||||
|
||||
if individual:
|
||||
logger.info(' '.join(item['name'] for item in appearances))
|
||||
|
||||
for item in appearances:
|
||||
if item['id'] in master.keys():
|
||||
master[item['id']]['appearances'] += item['appearances']
|
||||
else:
|
||||
master[item['id']] = item
|
||||
|
||||
# print(master)
|
||||
logger.info(
|
||||
', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances'])))
|
||||
|
||||
|
||||
@cli.command('fetch')
|
||||
@click.option('-s', '--season', type=int,
|
||||
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
||||
@@ -317,6 +379,19 @@ def algolia(silent_skip: bool, process_: bool):
|
||||
save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True)
|
||||
|
||||
|
||||
@build.command('character')
|
||||
def character():
|
||||
"""
|
||||
Uses algolia.json to build a characters.json file, a masterlist of quotes separated by the speaker.
|
||||
Speakers not considered 'main characters' are excluded from the list.
|
||||
This file also pulls information to build character descriptions and other relevant information.
|
||||
"""
|
||||
data = load_file(os.path.join(DATA_DIR, 'algolia.json'), True)
|
||||
key_list = [('speaker',), ('text',), ('season',), ('episode_rel', 'episode'), ('section_rel', 'scene'),
|
||||
('quote_rel', 'quote')]
|
||||
master = map(lambda item: algolia_transform(item, key_list), filter(lambda: True, data))
|
||||
|
||||
|
||||
@build.command('final')
|
||||
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
||||
@click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.')
|
||||
@@ -338,7 +413,8 @@ def final(silent_skip: bool, process_: bool):
|
||||
episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True)
|
||||
except FileNotFoundError:
|
||||
if not silent_skip:
|
||||
logger.warning(f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
|
||||
logger.warning(
|
||||
f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
|
||||
episode_data = None
|
||||
|
||||
description = descriptions[season_id - 1][episode_id - 1]
|
||||
@@ -347,7 +423,7 @@ def final(silent_skip: bool, process_: bool):
|
||||
'title': description['title'].strip(),
|
||||
'description': description['description'].strip(),
|
||||
'episode_id': episode_id,
|
||||
'characters': get_characters(season_id, episode_id),
|
||||
'characters': get_appearances(season_id, episode_id),
|
||||
'scenes': episode_data
|
||||
}
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ helpers.py
|
||||
|
||||
"""
|
||||
|
||||
from typing import List, Tuple
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23]
|
||||
|
||||
@@ -30,3 +30,23 @@ def get_neighbors(array: List, index: int, distance: int = 2) -> Tuple[List, Lis
|
||||
if below_index < len(array):
|
||||
below.append(array[below_index])
|
||||
return top[::-1], below
|
||||
|
||||
|
||||
def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[str]]]) -> dict:
|
||||
"""
|
||||
Transforms a dictionary object of a quote (from algolia.json) into a API-ready quote.
|
||||
Used for cli.character (i.e. characters.json)
|
||||
:param old_dictionary: The original Algolia dictionary
|
||||
:param key_list: A list of keys to keep in the dictionary in a tuple. One item tuple to keep the tuple's name, a
|
||||
second item requests a 'rename' for the quote.
|
||||
:return: The reformatted dictionary.
|
||||
"""
|
||||
|
||||
new_dictionary = {}
|
||||
for keyItem in key_list:
|
||||
if len(keyItem) > 1:
|
||||
new_dictionary[keyItem[1]] = old_dictionary[keyItem[0]]
|
||||
else:
|
||||
new_dictionary[keyItem[0]] = old_dictionary[keyItem[0]]
|
||||
|
||||
return new_dictionary
|
||||
|
||||
@@ -103,7 +103,7 @@ def sleep_from(wait_time: float, moment: float, manager: enlighten.Manager = Non
|
||||
return 0
|
||||
|
||||
|
||||
def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]:
|
||||
def get_appearances(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]:
|
||||
"""
|
||||
Extracts all characters and their number of appearances from a specific episode.
|
||||
Prepared in a list of dictionary, preferable storage/for loop method.
|
||||
@@ -120,3 +120,10 @@ def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]
|
||||
characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()}
|
||||
for character, appearances in characters.items()]
|
||||
return list(sorted(characters, key=lambda item: item['appearances'], reverse=True))
|
||||
|
||||
def get_character(character):
|
||||
"""
|
||||
Retrieves a character's appearances from every season and episode available.
|
||||
:param character: The character's name
|
||||
:return: A list of dictionary quotes including reference IDs
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user