Add annotated text processing into final build phase, fix faulty annotations/identifiers

This commit is contained in:
Xevion
2022-05-19 19:22:26 -05:00
parent a2a074079e
commit 1a86f2f4bb
190 changed files with 2144 additions and 13852 deletions

View File

@@ -195,7 +195,7 @@
"actor": ""
},
"bob-vance": {
"name": "Bob V ance",
"name": "Bob Vance",
"summary": "",
"actor": ""
},

View File

@@ -611,7 +611,6 @@
<AnnotatedText>{Ryan} and others</AnnotatedText>
<Characters>
<Character type="main">ryan</Character>
<Character type="meta">others</Character>
</Characters>
</Speaker>
<Speaker annotated="false">
@@ -905,9 +904,8 @@
<AnnotatedText>Voice of {Thomas Dean}</AnnotatedText>
<Character type="background">thomas-dean</Character>
</Speaker>
<Speaker annotated="true">
<Speaker annotated="false">
<RawText>DunMiff/sys</RawText>
<AnnotatedText>DunMiff/sys</AnnotatedText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>

View File

@@ -159,7 +159,6 @@
<SpeakerText annotated="true">{Ryan} and others</SpeakerText>
<Characters>
<Character type="main">ryan</Character>
<Character type="meta">others</Character>
</Characters>
</Speaker>
</Quote>

View File

@@ -968,7 +968,7 @@
<Quote>
<QuoteText>[on monitor] Who am I?</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>
@@ -995,7 +995,7 @@
<Quote>
<QuoteText>[on monitor] Not sure. Just became self-aware. So much to figure out. I think I am programmed to be your enemy. I think it is my job to destroy you when it comes to selling paper.</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>
@@ -1013,7 +1013,7 @@
<Quote>
<QuoteText>[on monitor] What is a Jim?</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>
@@ -1548,7 +1548,7 @@
<Quote>
<QuoteText>[on monitor] You do look worried.</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>
@@ -1902,7 +1902,7 @@
<Quote>
<QuoteText>[on monitor] Oh. I didn't realize we could use the leads we stole from Staples.</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>
@@ -2451,7 +2451,7 @@
<Quote>
<QuoteText>[on monitor] You beat me. You are the superior being.</QuoteText>
<Speaker>
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
<Characters>
<Character type="background">dunmiff-sys</Character>
</Characters>

View File

@@ -479,8 +479,6 @@ def compile() -> None:
etree.indent(compile_root, space=" " * 4)
compile_file.write(etree.tostring(compile_root, encoding=str, pretty_print=True))
pbar.update()
logger.info('Completed episode data compiling.')
@@ -694,16 +692,45 @@ def app(path: str, make_dir: bool) -> None:
'appearances': count
}
scenes = [
{
'quotes': [
{
'speaker': quote.xpath('./Speaker/SpeakerText')[0].text,
'text': quote.find('QuoteText').text
scenes = []
for scene in episode_root.xpath('./Scene'):
quotes = []
for quote in scene.xpath('./Quote'):
speaker_text = quote.xpath('./Speaker/SpeakerText')[0]
is_annotated = speaker_text.attrib['annotated'] == 'true'
quote_text = quote.find('QuoteText').text
quote_json = {
'speaker': speaker_text.text,
'text': quote_text,
"isAnnotated": is_annotated
}
if is_annotated:
character_elements = quote.xpath('./Speaker/Characters/Character')
split_speaker_text: List[str] = re.split(r'({[^}]+})', speaker_text.text)
if len(split_speaker_text[0]) == 0: del split_speaker_text[0]
if len(split_speaker_text[-1]) == 0: del split_speaker_text[-1]
text_start: int = 0 if split_speaker_text[0].startswith('{') else 1
# {Jim}, {Dwight}, and {Andy}'s Computer
# [jim, dwight, andy]
# -> {jim}, {dwight}, and {andy}'s Computer
quote_json['characters'] = {
character.text: None for character in character_elements
}
for quote in scene.xpath('./Quote')]
} for scene in episode_root.xpath('./Scene')
]
for i, character in enumerate(character_elements):
index = text_start + (i * 2)
quote_json['characters'][character.text] = split_speaker_text[index][1:-1]
split_speaker_text[index] = '{' + character.text + '}'
quote_json['speaker'] = ''.join(split_speaker_text)
quotes.append(quote_json)
scenes.append({'quotes': quotes})
all_season_data[seasonNum - 1].append({
'title': description['title'],