mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-11 00:08:51 -06:00
Improve normalization.merge command with logging
This commit is contained in:
@@ -104,8 +104,11 @@ def truth():
|
|||||||
def merge():
|
def merge():
|
||||||
"""Merge all Speaker Mappings from source into one file."""
|
"""Merge all Speaker Mappings from source into one file."""
|
||||||
speakerList = Counter()
|
speakerList = Counter()
|
||||||
|
|
||||||
truth_files: List[str] = os.listdir(os.path.join(TRUTH_DIR, 'episodes'))
|
truth_files: List[str] = os.listdir(os.path.join(TRUTH_DIR, 'episodes'))
|
||||||
|
logger.debug(f"{len(truth_files)} truth files available.")
|
||||||
pbar = enlighten.Counter(total=len(truth_files), unit='Files')
|
pbar = enlighten.Counter(total=len(truth_files), unit='Files')
|
||||||
|
|
||||||
for truth_filename in truth_files:
|
for truth_filename in truth_files:
|
||||||
truth_path = os.path.join(TRUTH_DIR, 'episodes', truth_filename)
|
truth_path = os.path.join(TRUTH_DIR, 'episodes', truth_filename)
|
||||||
with open(truth_path, 'r') as truth_file:
|
with open(truth_path, 'r') as truth_file:
|
||||||
@@ -114,6 +117,8 @@ def merge():
|
|||||||
speakerList[speaker] += 1
|
speakerList[speaker] += 1
|
||||||
pbar.update()
|
pbar.update()
|
||||||
|
|
||||||
|
logger.debug('Speakers acquired from Truth files.')
|
||||||
|
|
||||||
speakerMapping = OrderedDict()
|
speakerMapping = OrderedDict()
|
||||||
with open(os.path.join(TRUTH_DIR, Constants.SPEAKER_MAPPING_XML), 'r') as speaker_mapping_file:
|
with open(os.path.join(TRUTH_DIR, Constants.SPEAKER_MAPPING_XML), 'r') as speaker_mapping_file:
|
||||||
rootMappingElement: etree.ElementBase = etree.parse(speaker_mapping_file)
|
rootMappingElement: etree.ElementBase = etree.parse(speaker_mapping_file)
|
||||||
@@ -121,12 +126,13 @@ def merge():
|
|||||||
source, destination = mappingElement.xpath('.//Source/text()')[0], mappingElement.xpath('.//Destination/text()')[0]
|
source, destination = mappingElement.xpath('.//Source/text()')[0], mappingElement.xpath('.//Destination/text()')[0]
|
||||||
speakerMapping[source] = destination
|
speakerMapping[source] = destination
|
||||||
|
|
||||||
print('Mappings acquired')
|
logger.debug('Mappings loaded.')
|
||||||
|
|
||||||
root = etree.Element('CharacterList')
|
root = etree.Element('CharacterList')
|
||||||
pbar = enlighten.Counter(total=len(speakerList.keys()), unit='Speakers')
|
pbar = enlighten.Counter(total=len(speakerList.keys()), unit='Speakers')
|
||||||
seen = set()
|
seen = set()
|
||||||
|
|
||||||
|
logger.debug('Merging Speaker Mappings...')
|
||||||
for speaker in speakerList.keys():
|
for speaker in speakerList.keys():
|
||||||
while speakerMapping.get(speaker) is not None:
|
while speakerMapping.get(speaker) is not None:
|
||||||
if speakerMapping.get(speaker) == speaker:
|
if speakerMapping.get(speaker) == speaker:
|
||||||
@@ -140,10 +146,13 @@ def merge():
|
|||||||
characterElement.text = speaker
|
characterElement.text = speaker
|
||||||
pbar.update()
|
pbar.update()
|
||||||
|
|
||||||
|
logger.debug("Speaker mappings merged. Exporting to `characters.xml`")
|
||||||
|
|
||||||
with open(os.path.join(TRUTH_DIR, 'characters.xml'), 'w') as character_file:
|
with open(os.path.join(TRUTH_DIR, 'characters.xml'), 'w') as character_file:
|
||||||
etree.indent(root, space=" " * 4)
|
etree.indent(root, space=" " * 4)
|
||||||
character_file.write(etree.tostring(root, encoding=str, pretty_print=True))
|
character_file.write(etree.tostring(root, encoding=str, pretty_print=True))
|
||||||
|
|
||||||
|
|
||||||
@cli.command('ids')
|
@cli.command('ids')
|
||||||
def ids():
|
def ids():
|
||||||
"""Builds an XML file for identifying character id mappings"""
|
"""Builds an XML file for identifying character id mappings"""
|
||||||
|
|||||||
Reference in New Issue
Block a user