mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-14 16:13:19 -06:00
begin working on charaters.json CLI data aggregation methods, algolia quote object transform (this needs to be standardized, maybe in the next 10,000 years), rename get_characters
This commit is contained in:
@@ -8,6 +8,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
from collections import defaultdict
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
import click
|
import click
|
||||||
@@ -15,8 +16,10 @@ import enlighten
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from server.helpers import algolia_transform
|
||||||
|
|
||||||
sys.path[0] += '\\..'
|
sys.path[0] += '\\..'
|
||||||
from server.process import DATA_DIR, get_characters, get_episodes, get_filepath, load_file, \
|
from server.process import DATA_DIR, get_appearances, get_episodes, get_filepath, load_file, \
|
||||||
save_file, sleep_from, \
|
save_file, sleep_from, \
|
||||||
verify_episode
|
verify_episode
|
||||||
|
|
||||||
@@ -31,6 +34,65 @@ def cli():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@cli.group()
|
||||||
|
def misc():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@misc.command('characters')
|
||||||
|
@click.option('-s', '--season', type=int, help='Season to be processed for character names')
|
||||||
|
@click.option('-e', '--episode', type=int, help='Episode to be processed. Requires --season to be specified.')
|
||||||
|
@click.option('--all', is_flag=True, help='Process all episodes, regardless of previous specifications.')
|
||||||
|
@click.option('-i', '--individual', is_flag=True,
|
||||||
|
help='List characters from individual episodes instead of just compiling a masterlist')
|
||||||
|
def characters(season: int, episode: int, all: bool, individual: bool):
|
||||||
|
"""
|
||||||
|
Retrieves all characters from all quotes available.
|
||||||
|
Used in order to compile a list of characters for the character page, for scanning speakers for anomalies
|
||||||
|
and mistakes, as well as for compiling a list of verified 'main' characters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if all:
|
||||||
|
episodes = list(get_episodes())
|
||||||
|
elif season:
|
||||||
|
if episode:
|
||||||
|
if verify_episode(season, episode):
|
||||||
|
episodes = [(season, episode)]
|
||||||
|
else:
|
||||||
|
logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
episodes = list(get_episodes(season=season))
|
||||||
|
logger.info(f'Fetching Season {season}...')
|
||||||
|
else:
|
||||||
|
if episode:
|
||||||
|
logger.info('You must specify more than just an episode.')
|
||||||
|
else:
|
||||||
|
logger.info('You must specify which episodes to process.')
|
||||||
|
logger.info('Check --help for more information on this command.')
|
||||||
|
return
|
||||||
|
|
||||||
|
master = dict()
|
||||||
|
for _season, _episode in episodes:
|
||||||
|
appearances = get_appearances(_season, _episode)
|
||||||
|
|
||||||
|
if not appearances:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if individual:
|
||||||
|
logger.info(' '.join(item['name'] for item in appearances))
|
||||||
|
|
||||||
|
for item in appearances:
|
||||||
|
if item['id'] in master.keys():
|
||||||
|
master[item['id']]['appearances'] += item['appearances']
|
||||||
|
else:
|
||||||
|
master[item['id']] = item
|
||||||
|
|
||||||
|
# print(master)
|
||||||
|
logger.info(
|
||||||
|
', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances'])))
|
||||||
|
|
||||||
|
|
||||||
@cli.command('fetch')
|
@cli.command('fetch')
|
||||||
@click.option('-s', '--season', type=int,
|
@click.option('-s', '--season', type=int,
|
||||||
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
help='Season to be fetched. Without --episode, will download all episodes in a season.')
|
||||||
@@ -317,6 +379,19 @@ def algolia(silent_skip: bool, process_: bool):
|
|||||||
save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True)
|
save_file(os.path.join(DATA_DIR, 'algolia.json'), data, True)
|
||||||
|
|
||||||
|
|
||||||
|
@build.command('character')
|
||||||
|
def character():
|
||||||
|
"""
|
||||||
|
Uses algolia.json to build a characters.json file, a masterlist of quotes separated by the speaker.
|
||||||
|
Speakers not considered 'main characters' are excluded from the list.
|
||||||
|
This file also pulls information to build character descriptions and other relevant information.
|
||||||
|
"""
|
||||||
|
data = load_file(os.path.join(DATA_DIR, 'algolia.json'), True)
|
||||||
|
key_list = [('speaker',), ('text',), ('season',), ('episode_rel', 'episode'), ('section_rel', 'scene'),
|
||||||
|
('quote_rel', 'quote')]
|
||||||
|
master = map(lambda item: algolia_transform(item, key_list), filter(lambda: True, data))
|
||||||
|
|
||||||
|
|
||||||
@build.command('final')
|
@build.command('final')
|
||||||
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
@click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
|
||||||
@click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.')
|
@click.option('--process', 'process_', is_flag=True, help='Run processing before building final data.')
|
||||||
@@ -338,7 +413,8 @@ def final(silent_skip: bool, process_: bool):
|
|||||||
episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True)
|
episode_data = load_file(get_filepath(season_id, episode_id, 'processed'), True)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
if not silent_skip:
|
if not silent_skip:
|
||||||
logger.warning(f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
|
logger.warning(
|
||||||
|
f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
|
||||||
episode_data = None
|
episode_data = None
|
||||||
|
|
||||||
description = descriptions[season_id - 1][episode_id - 1]
|
description = descriptions[season_id - 1][episode_id - 1]
|
||||||
@@ -347,7 +423,7 @@ def final(silent_skip: bool, process_: bool):
|
|||||||
'title': description['title'].strip(),
|
'title': description['title'].strip(),
|
||||||
'description': description['description'].strip(),
|
'description': description['description'].strip(),
|
||||||
'episode_id': episode_id,
|
'episode_id': episode_id,
|
||||||
'characters': get_characters(season_id, episode_id),
|
'characters': get_appearances(season_id, episode_id),
|
||||||
'scenes': episode_data
|
'scenes': episode_data
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ helpers.py
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple, Optional
|
||||||
|
|
||||||
episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23]
|
episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23]
|
||||||
|
|
||||||
@@ -30,3 +30,23 @@ def get_neighbors(array: List, index: int, distance: int = 2) -> Tuple[List, Lis
|
|||||||
if below_index < len(array):
|
if below_index < len(array):
|
||||||
below.append(array[below_index])
|
below.append(array[below_index])
|
||||||
return top[::-1], below
|
return top[::-1], below
|
||||||
|
|
||||||
|
|
||||||
|
def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[str]]]) -> dict:
|
||||||
|
"""
|
||||||
|
Transforms a dictionary object of a quote (from algolia.json) into a API-ready quote.
|
||||||
|
Used for cli.character (i.e. characters.json)
|
||||||
|
:param old_dictionary: The original Algolia dictionary
|
||||||
|
:param key_list: A list of keys to keep in the dictionary in a tuple. One item tuple to keep the tuple's name, a
|
||||||
|
second item requests a 'rename' for the quote.
|
||||||
|
:return: The reformatted dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_dictionary = {}
|
||||||
|
for keyItem in key_list:
|
||||||
|
if len(keyItem) > 1:
|
||||||
|
new_dictionary[keyItem[1]] = old_dictionary[keyItem[0]]
|
||||||
|
else:
|
||||||
|
new_dictionary[keyItem[0]] = old_dictionary[keyItem[0]]
|
||||||
|
|
||||||
|
return new_dictionary
|
||||||
|
|||||||
@@ -103,7 +103,7 @@ def sleep_from(wait_time: float, moment: float, manager: enlighten.Manager = Non
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]:
|
def get_appearances(season, episode) -> Optional[List[Dict[str, Union[int, str]]]]:
|
||||||
"""
|
"""
|
||||||
Extracts all characters and their number of appearances from a specific episode.
|
Extracts all characters and their number of appearances from a specific episode.
|
||||||
Prepared in a list of dictionary, preferable storage/for loop method.
|
Prepared in a list of dictionary, preferable storage/for loop method.
|
||||||
@@ -120,3 +120,10 @@ def get_characters(season, episode) -> Optional[List[Dict[str, Union[int, str]]]
|
|||||||
characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()}
|
characters = [{'name': character, 'appearances': appearances, 'id': '-'.join(character.split(' ')).lower()}
|
||||||
for character, appearances in characters.items()]
|
for character, appearances in characters.items()]
|
||||||
return list(sorted(characters, key=lambda item: item['appearances'], reverse=True))
|
return list(sorted(characters, key=lambda item: item['appearances'], reverse=True))
|
||||||
|
|
||||||
|
def get_character(character):
|
||||||
|
"""
|
||||||
|
Retrieves a character's appearances from every season and episode available.
|
||||||
|
:param character: The character's name
|
||||||
|
:return: A list of dictionary quotes including reference IDs
|
||||||
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user