mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-16 06:13:33 -06:00
Add normalization.meta command, add command help to truth
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -35,6 +36,7 @@ class Constants:
|
|||||||
|
|
||||||
@cli.command('truth')
|
@cli.command('truth')
|
||||||
def truth():
|
def truth():
|
||||||
|
"""Step 1: Builds raw files into truth files."""
|
||||||
logger.info("Processing all raw files into normalized truth files.")
|
logger.info("Processing all raw files into normalized truth files.")
|
||||||
|
|
||||||
speakers = Counter()
|
speakers = Counter()
|
||||||
@@ -103,7 +105,7 @@ def truth():
|
|||||||
|
|
||||||
@cli.command('merge')
|
@cli.command('merge')
|
||||||
def merge():
|
def merge():
|
||||||
"""Merge all Speaker Mappings from source into one file."""
|
"""Step 2: Merge all Speaker Mappings from source into one file."""
|
||||||
speakerList = Counter()
|
speakerList = Counter()
|
||||||
|
|
||||||
truth_files: List[str] = os.listdir(os.path.join(TRUTH_DIR, 'episodes'))
|
truth_files: List[str] = os.listdir(os.path.join(TRUTH_DIR, 'episodes'))
|
||||||
@@ -172,7 +174,7 @@ def valuify(value: str) -> str:
|
|||||||
|
|
||||||
@cli.command('ids')
|
@cli.command('ids')
|
||||||
def ids():
|
def ids():
|
||||||
"""Builds an XML file for identifying character id mappings"""
|
"""Step 3: Builds an XML file for identifying character id mappings"""
|
||||||
|
|
||||||
logger.info("Building ID Character mapping file...")
|
logger.info("Building ID Character mapping file...")
|
||||||
IDENTIFIER_FILE: str = os.path.join(CHARACTERS_DIR, 'identifiers.xml')
|
IDENTIFIER_FILE: str = os.path.join(CHARACTERS_DIR, 'identifiers.xml')
|
||||||
@@ -261,12 +263,41 @@ def ids():
|
|||||||
identifier_file.write(etree.tostring(root, encoding=str, pretty_print=True))
|
identifier_file.write(etree.tostring(root, encoding=str, pretty_print=True))
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command('meta')
|
||||||
|
def meta() -> None:
|
||||||
|
logger.debug('Creating meta.json')
|
||||||
|
|
||||||
|
IDENTIFIERS_FILE = os.path.join(CHARACTERS_DIR, 'identifiers.xml')
|
||||||
|
with open(IDENTIFIERS_FILE, 'r') as identifiers_file:
|
||||||
|
speakers: List[str] = etree.parse(identifiers_file).xpath('//SpeakerList/Speaker')
|
||||||
|
logger.debug(f'{len(speakers)} speakers parsed.')
|
||||||
|
|
||||||
|
meta_data = OrderedDict()
|
||||||
|
|
||||||
|
for speaker in speakers:
|
||||||
|
characters = speaker.xpath('./Characters/Character') or speaker.xpath('./Character')
|
||||||
|
for character in characters:
|
||||||
|
name = character.text
|
||||||
|
type = character.attrib['type']
|
||||||
|
if type == 'null':
|
||||||
|
type = None
|
||||||
|
|
||||||
|
if type is not None or name not in meta_data.keys():
|
||||||
|
meta_data[name] = type
|
||||||
|
|
||||||
|
logger.debug(f'Writing {len(meta_data.keys())} meta values to disk.')
|
||||||
|
with open(os.path.join(TRUTH_DIR, 'meta.json'), 'w') as meta_file:
|
||||||
|
json.dump(meta_data, meta_file, indent=4)
|
||||||
|
logger.debug('Meta file written.')
|
||||||
|
|
||||||
|
|
||||||
@cli.command('all')
|
@cli.command('all')
|
||||||
def all():
|
def all():
|
||||||
"""Runs all commands in order one after another."""
|
"""Runs all commands in order one after another."""
|
||||||
truth()
|
truth()
|
||||||
merge()
|
merge()
|
||||||
ids()
|
ids()
|
||||||
|
meta()
|
||||||
|
|
||||||
|
|
||||||
@cli.command('similar')
|
@cli.command('similar')
|
||||||
|
|||||||
Reference in New Issue
Block a user