mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-10 22:08:48 -06:00
Absolve linter warnings with variable renames
This commit is contained in:
@@ -122,7 +122,7 @@ def truth():
|
|||||||
@cli.command('merge')
|
@cli.command('merge')
|
||||||
def merge():
|
def merge():
|
||||||
"""Step 2: Merge all Speaker Mappings from source into one file."""
|
"""Step 2: Merge all Speaker Mappings from source into one file."""
|
||||||
speakerList = Counter()
|
speaker_list = Counter()
|
||||||
|
|
||||||
truth_files: List[str] = os.listdir(EPISODES_DIR)
|
truth_files: List[str] = os.listdir(EPISODES_DIR)
|
||||||
logger.debug(f"{len(truth_files)} truth files available.")
|
logger.debug(f"{len(truth_files)} truth files available.")
|
||||||
@@ -133,36 +133,36 @@ def merge():
|
|||||||
with open(truth_path, 'r') as truth_file:
|
with open(truth_path, 'r') as truth_file:
|
||||||
root = etree.parse(truth_file)
|
root = etree.parse(truth_file)
|
||||||
for speaker in root.xpath('//SceneList/Scene/Quote/Speaker/text()'):
|
for speaker in root.xpath('//SceneList/Scene/Quote/Speaker/text()'):
|
||||||
speakerList[speaker] += 1
|
speaker_list[speaker] += 1
|
||||||
pbar.update()
|
pbar.update()
|
||||||
|
|
||||||
logger.debug('Speakers acquired from Truth files.')
|
logger.debug('Speakers acquired from Truth files.')
|
||||||
|
|
||||||
speakerMapping = OrderedDict()
|
speaker_mapping = OrderedDict()
|
||||||
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as speaker_mapping_file:
|
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as speaker_mapping_file:
|
||||||
rootMappingElement: etree.ElementBase = etree.parse(speaker_mapping_file)
|
root_mapping_element: etree.ElementBase = etree.parse(speaker_mapping_file)
|
||||||
for mappingElement in rootMappingElement.xpath('//SpeakerMappings/Mapping'):
|
for mappingElement in root_mapping_element.xpath('//SpeakerMappings/Mapping'):
|
||||||
source, destination = mappingElement.xpath('.//Source/text()')[0], mappingElement.xpath('.//Destination/text()')[0]
|
source, destination = mappingElement.xpath('.//Source/text()')[0], mappingElement.xpath('.//Destination/text()')[0]
|
||||||
speakerMapping[source] = destination
|
speaker_mapping[source] = destination
|
||||||
|
|
||||||
logger.debug('Mappings loaded.')
|
logger.debug('Mappings loaded.')
|
||||||
|
|
||||||
root = etree.Element('CharacterList')
|
root = etree.Element('CharacterList')
|
||||||
pbar = enlighten.Counter(total=len(speakerList.keys()), unit='Speakers')
|
pbar = enlighten.Counter(total=len(speaker_list.keys()), unit='Speakers')
|
||||||
seen = set()
|
seen = set()
|
||||||
|
|
||||||
logger.debug('Merging Speaker Mappings...')
|
logger.debug('Merging Speaker Mappings...')
|
||||||
for speaker in speakerList.keys():
|
for speaker in speaker_list.keys():
|
||||||
while speakerMapping.get(speaker) is not None:
|
while speaker_mapping.get(speaker) is not None:
|
||||||
if speakerMapping.get(speaker) == speaker:
|
if speaker_mapping.get(speaker) == speaker:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
speaker = speakerMapping[speaker]
|
speaker = speaker_mapping[speaker]
|
||||||
|
|
||||||
if speaker not in seen:
|
if speaker not in seen:
|
||||||
seen.add(speaker)
|
seen.add(speaker)
|
||||||
characterElement = etree.SubElement(root, 'Character')
|
character_element = etree.SubElement(root, 'Character')
|
||||||
characterElement.text = speaker
|
character_element.text = speaker
|
||||||
pbar.update()
|
pbar.update()
|
||||||
|
|
||||||
logger.debug("Speaker mappings merged. Exporting to `characters.xml`")
|
logger.debug("Speaker mappings merged. Exporting to `characters.xml`")
|
||||||
@@ -209,59 +209,59 @@ def ids():
|
|||||||
logger.debug('Identifier file exists already. Pre-existing Speakers will be kept.')
|
logger.debug('Identifier file exists already. Pre-existing Speakers will be kept.')
|
||||||
|
|
||||||
with open(ConstantPaths.IDENTIFIERS, 'r') as identifier_file:
|
with open(ConstantPaths.IDENTIFIERS, 'r') as identifier_file:
|
||||||
preidentifiers: etree.ElementBase = etree.parse(identifier_file)
|
pre_identifiers: etree.ElementBase = etree.parse(identifier_file)
|
||||||
|
|
||||||
pre_existing = OrderedDict()
|
pre_existing = OrderedDict()
|
||||||
for speaker in preidentifiers.xpath('//SpeakerList/Speaker'):
|
for speaker in pre_identifiers.xpath('//SpeakerList/Speaker'):
|
||||||
speakerName = speaker.xpath('./RawText/text()')[0]
|
speaker_name = speaker.xpath('./RawText/text()')[0]
|
||||||
pre_existing[speakerName] = speaker
|
pre_existing[speaker_name] = speaker
|
||||||
|
|
||||||
root = etree.Element('SpeakerList')
|
root = etree.Element('SpeakerList')
|
||||||
splitPatterns: List[str] = [r'\s*,\s*',
|
split_patterns: List[str] = [r'\s*,\s*',
|
||||||
r'\s*&\s*',
|
r'\s*&\s*',
|
||||||
r'\s+and,?(?:\s+|$)',
|
r'\s+and,?(?:\s+|$)',
|
||||||
r'\s*[\\/]\s*']
|
r'\s*[\\/]\s*']
|
||||||
splitPattern: str = '|'.join(splitPatterns)
|
split_pattern: str = '|'.join(split_patterns)
|
||||||
|
|
||||||
existing_characters_count: int = 0
|
existing_characters_count: int = 0
|
||||||
new_characters_count: int = 0
|
new_characters_count: int = 0
|
||||||
|
|
||||||
# Pre-existing character identifiers are kept at the top, in order.
|
# Pre-existing character identifiers are kept at the top, in order.
|
||||||
for speakerName in characters:
|
for speaker_name in characters:
|
||||||
if pre_existing is not None:
|
if pre_existing is not None:
|
||||||
if speakerName in pre_existing.keys():
|
if speaker_name in pre_existing.keys():
|
||||||
root.append(pre_existing[speakerName])
|
root.append(pre_existing[speaker_name])
|
||||||
del pre_existing[speakerName]
|
del pre_existing[speaker_name]
|
||||||
existing_characters_count += 1
|
existing_characters_count += 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.debug(f'New speaker: `{speakerName}`')
|
logger.debug(f'New speaker: `{speaker_name}`')
|
||||||
new_characters_count += 1
|
new_characters_count += 1
|
||||||
|
|
||||||
# New speaker to insert
|
# New speaker to insert
|
||||||
speaker_element = etree.SubElement(root, 'Speaker', annotated="false")
|
speaker_element = etree.SubElement(root, 'Speaker', annotated="false")
|
||||||
raw_text_element = etree.SubElement(speaker_element, "RawText")
|
raw_text_element = etree.SubElement(speaker_element, "RawText")
|
||||||
raw_text_element.text = speakerName
|
raw_text_element.text = speaker_name
|
||||||
|
|
||||||
split_text: List[str] = re.split(splitPattern, speakerName)
|
split_text: List[str] = re.split(split_pattern, speaker_name)
|
||||||
split_text = [split for split in split_text if re.match(r'\w{2,}', split) is not None]
|
split_text = [split for split in split_text if re.match(r'\w{2,}', split) is not None]
|
||||||
|
|
||||||
isCompound: bool = len(split_text) > 1
|
is_compound: bool = len(split_text) > 1
|
||||||
isBackground: bool = re.search(r'#\d', speakerName) is not None # Not fool-proof, but filters some out.
|
is_background: bool = re.search(r'#\d', speaker_name) is not None # Not fool-proof, but filters some out.
|
||||||
|
|
||||||
if isCompound:
|
if is_compound:
|
||||||
speaker_element.attrib['annotated'] = "true"
|
speaker_element.attrib['annotated'] = "true"
|
||||||
annotated_text_element = etree.SubElement(speaker_element, 'AnnotatedText')
|
annotated_text_element = etree.SubElement(speaker_element, 'AnnotatedText')
|
||||||
characters_element = etree.SubElement(speaker_element, 'Characters')
|
characters_element = etree.SubElement(speaker_element, 'Characters')
|
||||||
annotated_text_element.text = speakerName
|
annotated_text_element.text = speaker_name
|
||||||
for sub_character in split_text:
|
for sub_character in split_text:
|
||||||
subcharacter_element = etree.SubElement(characters_element, 'Character')
|
subcharacter_element = etree.SubElement(characters_element, 'Character')
|
||||||
subcharacter_element.text = valuify(sub_character)
|
subcharacter_element.text = valuify(sub_character)
|
||||||
subcharacter_element.attrib['type'] = 'null'
|
subcharacter_element.attrib['type'] = 'null'
|
||||||
else:
|
else:
|
||||||
character_element = etree.SubElement(speaker_element, 'Character')
|
character_element = etree.SubElement(speaker_element, 'Character')
|
||||||
character_element.attrib['type'] = 'background' if isBackground else 'null'
|
character_element.attrib['type'] = 'background' if is_background else 'null'
|
||||||
character_element.text = valuify(speakerName)
|
character_element.text = valuify(speaker_name)
|
||||||
|
|
||||||
logger.debug(f'{new_characters_count} new speaker elements added. {existing_characters_count} speaker elements preserved.')
|
logger.debug(f'{new_characters_count} new speaker elements added. {existing_characters_count} speaker elements preserved.')
|
||||||
|
|
||||||
@@ -316,9 +316,9 @@ def meta() -> None:
|
|||||||
logger.debug('Meta file written.')
|
logger.debug('Meta file written.')
|
||||||
|
|
||||||
|
|
||||||
@cli.command('all')
|
@cli.command('run_all')
|
||||||
@click.option('--confirm', is_flag=True, help='Force confirm through the confirmation prompt')
|
@click.option('--confirm', is_flag=True, help='Force confirm through the confirmation prompt')
|
||||||
def all(confirm: bool) -> None:
|
def run_all(confirm: bool) -> None:
|
||||||
"""Runs all commands in order one after another."""
|
"""Runs all commands in order one after another."""
|
||||||
logger.warning('`all` command running...')
|
logger.warning('`all` command running...')
|
||||||
if confirm or click.confirm("This command can be very destructive to unstaged/uncommitted data, are you sure?"):
|
if confirm or click.confirm("This command can be very destructive to unstaged/uncommitted data, are you sure?"):
|
||||||
@@ -345,19 +345,19 @@ def similar(text: str, destination: Optional[bool], results: int, reversed: bool
|
|||||||
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as mapping_file:
|
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as mapping_file:
|
||||||
root: etree.ElementBase = etree.parse(mapping_file)
|
root: etree.ElementBase = etree.parse(mapping_file)
|
||||||
|
|
||||||
mappingType: str = "Source"
|
mapping_type: str = "Source"
|
||||||
if destination:
|
if destination:
|
||||||
mappingType = "Destination"
|
mapping_type = "Destination"
|
||||||
|
|
||||||
counts: Union[List[int], List[str]] = list(
|
counts: Union[List[int], List[str]] = list(
|
||||||
map(int, root.xpath('//SpeakerMappings/Mapping/@count'))) # Parse counts into integers for merge
|
map(int, root.xpath('//SpeakerMappings/Mapping/@count'))) # Parse counts into integers for merge
|
||||||
speakers = root.xpath(f"//SpeakerMappings/Mapping/{mappingType}/text()")
|
speakers = root.xpath(f"//SpeakerMappings/Mapping/{mapping_type}/text()")
|
||||||
if not no_merge: speakers, counts = marked_item_merge(speakers, counts) # Merge identical speakers together
|
if not no_merge: speakers, counts = marked_item_merge(speakers, counts) # Merge identical speakers together
|
||||||
if results == -1:
|
if results == -1:
|
||||||
results = len(speakers)
|
results = len(speakers)
|
||||||
|
|
||||||
resultIndexes: List[int] = get_close_matches_indexes(text, speakers, results, 0)
|
result_indexes: List[int] = get_close_matches_indexes(text, speakers, results, 0)
|
||||||
results = [f'{speakers[i]} ({counts[i]})' for i in resultIndexes]
|
results = [f'{speakers[i]} ({counts[i]})' for i in result_indexes]
|
||||||
results = [f'{i}. {item}' for i, item in enumerate(results, start=1)]
|
results = [f'{i}. {item}' for i, item in enumerate(results, start=1)]
|
||||||
if reversed: results.reverse()
|
if reversed: results.reverse()
|
||||||
|
|
||||||
@@ -375,8 +375,8 @@ def compile() -> None:
|
|||||||
speaker_mapping: Dict[str, str] = OrderedDict()
|
speaker_mapping: Dict[str, str] = OrderedDict()
|
||||||
logger.debug('Parsing speaker mappings...')
|
logger.debug('Parsing speaker mappings...')
|
||||||
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as speaker_mapping_file:
|
with open(ConstantPaths.SPEAKER_MAPPING, 'r') as speaker_mapping_file:
|
||||||
speakering_mapping_root: etree.ElementBase = etree.parse(speaker_mapping_file)
|
speaker_mapping_root: etree.ElementBase = etree.parse(speaker_mapping_file)
|
||||||
for mapping_element in speakering_mapping_root.xpath('//SpeakerMappings/Mapping'):
|
for mapping_element in speaker_mapping_root.xpath('//SpeakerMappings/Mapping'):
|
||||||
source = mapping_element.xpath('./Source/text()')[0]
|
source = mapping_element.xpath('./Source/text()')[0]
|
||||||
destination = mapping_element.xpath('./Destination/text()')[0]
|
destination = mapping_element.xpath('./Destination/text()')[0]
|
||||||
|
|
||||||
@@ -434,21 +434,21 @@ def compile() -> None:
|
|||||||
|
|
||||||
# This is the (possibly annotated) list of characters referenced by this quote's raw speaker.
|
# This is the (possibly annotated) list of characters referenced by this quote's raw speaker.
|
||||||
character_mapping: etree.ElementBase = character_mappings[speaker_mapping[truth_speaker]]
|
character_mapping: etree.ElementBase = character_mappings[speaker_mapping[truth_speaker]]
|
||||||
isAnnotated = character_mapping.attrib.get("annotated", "false") == "true"
|
is_annotated = character_mapping.attrib.get("annotated", "false") == "true"
|
||||||
|
|
||||||
# Speaker Text - the text displayed, annotated or not, that shows who exactly is speaking
|
# Speaker Text - the text displayed, annotated or not, that shows who exactly is speaking
|
||||||
speaker_text_element = etree.SubElement(speaker_element, "SpeakerText")
|
speaker_text_element = etree.SubElement(speaker_element, "SpeakerText")
|
||||||
speaker_text_element.attrib["annotated"] = "true" if isAnnotated else "false"
|
speaker_text_element.attrib["annotated"] = "true" if is_annotated else "false"
|
||||||
if isAnnotated:
|
if is_annotated:
|
||||||
speaker_text_element.text = character_mapping.find('AnnotatedText').text
|
speaker_text_element.text = character_mapping.find('AnnotatedText').text
|
||||||
else:
|
else:
|
||||||
speaker_text_element.text = character_mapping.find('RawText').text
|
speaker_text_element.text = character_mapping.find('RawText').text
|
||||||
|
|
||||||
# The constituent referenced characters in the SpeakerText element
|
# The constituent referenced characters in the SpeakerText element
|
||||||
characters_element = etree.SubElement(speaker_element, 'Characters')
|
characters_element = etree.SubElement(speaker_element, 'Characters')
|
||||||
hasMultiple = character_mapping.find("Characters") is not None
|
has_multiple = character_mapping.find("Characters") is not None
|
||||||
|
|
||||||
if hasMultiple:
|
if has_multiple:
|
||||||
for character in character_mapping.xpath('./Characters/Character'):
|
for character in character_mapping.xpath('./Characters/Character'):
|
||||||
characters_element.append(copy.deepcopy(
|
characters_element.append(copy.deepcopy(
|
||||||
character
|
character
|
||||||
@@ -457,9 +457,6 @@ def compile() -> None:
|
|||||||
characters_element.append(copy.deepcopy(
|
characters_element.append(copy.deepcopy(
|
||||||
character_mapping.find('Character')
|
character_mapping.find('Character')
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed while processing `{file}`", exc_info=e)
|
logger.error(f"Failed while processing `{file}`", exc_info=e)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user