mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-15 20:13:22 -06:00
Add annotated text processing into final build phase, fix faulty annotations/identifiers
This commit is contained in:
@@ -195,7 +195,7 @@
|
||||
"actor": ""
|
||||
},
|
||||
"bob-vance": {
|
||||
"name": "Bob V ance",
|
||||
"name": "Bob Vance",
|
||||
"summary": "",
|
||||
"actor": ""
|
||||
},
|
||||
|
||||
@@ -611,7 +611,6 @@
|
||||
<AnnotatedText>{Ryan} and others</AnnotatedText>
|
||||
<Characters>
|
||||
<Character type="main">ryan</Character>
|
||||
<Character type="meta">others</Character>
|
||||
</Characters>
|
||||
</Speaker>
|
||||
<Speaker annotated="false">
|
||||
@@ -905,9 +904,8 @@
|
||||
<AnnotatedText>Voice of {Thomas Dean}</AnnotatedText>
|
||||
<Character type="background">thomas-dean</Character>
|
||||
</Speaker>
|
||||
<Speaker annotated="true">
|
||||
<Speaker annotated="false">
|
||||
<RawText>DunMiff/sys</RawText>
|
||||
<AnnotatedText>DunMiff/sys</AnnotatedText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
|
||||
@@ -159,7 +159,6 @@
|
||||
<SpeakerText annotated="true">{Ryan} and others</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="main">ryan</Character>
|
||||
<Character type="meta">others</Character>
|
||||
</Characters>
|
||||
</Speaker>
|
||||
</Quote>
|
||||
|
||||
@@ -968,7 +968,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] Who am I?</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
@@ -995,7 +995,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] Not sure. Just became self-aware. So much to figure out. I think I am programmed to be your enemy. I think it is my job to destroy you when it comes to selling paper.</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
@@ -1013,7 +1013,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] What is a Jim?</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
@@ -1548,7 +1548,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] You do look worried.</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
@@ -1902,7 +1902,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] Oh. I didn't realize we could use the leads we stole from Staples.</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
@@ -2451,7 +2451,7 @@
|
||||
<Quote>
|
||||
<QuoteText>[on monitor] You beat me. You are the superior being.</QuoteText>
|
||||
<Speaker>
|
||||
<SpeakerText annotated="true">DunMiff/sys</SpeakerText>
|
||||
<SpeakerText annotated="false">DunMiff/sys</SpeakerText>
|
||||
<Characters>
|
||||
<Character type="background">dunmiff-sys</Character>
|
||||
</Characters>
|
||||
|
||||
@@ -479,8 +479,6 @@ def compile() -> None:
|
||||
etree.indent(compile_root, space=" " * 4)
|
||||
compile_file.write(etree.tostring(compile_root, encoding=str, pretty_print=True))
|
||||
|
||||
pbar.update()
|
||||
|
||||
logger.info('Completed episode data compiling.')
|
||||
|
||||
|
||||
@@ -694,16 +692,45 @@ def app(path: str, make_dir: bool) -> None:
|
||||
'appearances': count
|
||||
}
|
||||
|
||||
scenes = [
|
||||
{
|
||||
'quotes': [
|
||||
{
|
||||
'speaker': quote.xpath('./Speaker/SpeakerText')[0].text,
|
||||
'text': quote.find('QuoteText').text
|
||||
scenes = []
|
||||
for scene in episode_root.xpath('./Scene'):
|
||||
quotes = []
|
||||
|
||||
for quote in scene.xpath('./Quote'):
|
||||
speaker_text = quote.xpath('./Speaker/SpeakerText')[0]
|
||||
is_annotated = speaker_text.attrib['annotated'] == 'true'
|
||||
quote_text = quote.find('QuoteText').text
|
||||
|
||||
quote_json = {
|
||||
'speaker': speaker_text.text,
|
||||
'text': quote_text,
|
||||
"isAnnotated": is_annotated
|
||||
}
|
||||
|
||||
if is_annotated:
|
||||
character_elements = quote.xpath('./Speaker/Characters/Character')
|
||||
split_speaker_text: List[str] = re.split(r'({[^}]+})', speaker_text.text)
|
||||
if len(split_speaker_text[0]) == 0: del split_speaker_text[0]
|
||||
if len(split_speaker_text[-1]) == 0: del split_speaker_text[-1]
|
||||
text_start: int = 0 if split_speaker_text[0].startswith('{') else 1
|
||||
|
||||
# {Jim}, {Dwight}, and {Andy}'s Computer
|
||||
# [jim, dwight, andy]
|
||||
# -> {jim}, {dwight}, and {andy}'s Computer
|
||||
|
||||
quote_json['characters'] = {
|
||||
character.text: None for character in character_elements
|
||||
}
|
||||
for quote in scene.xpath('./Quote')]
|
||||
} for scene in episode_root.xpath('./Scene')
|
||||
]
|
||||
|
||||
for i, character in enumerate(character_elements):
|
||||
index = text_start + (i * 2)
|
||||
quote_json['characters'][character.text] = split_speaker_text[index][1:-1]
|
||||
split_speaker_text[index] = '{' + character.text + '}'
|
||||
|
||||
quote_json['speaker'] = ''.join(split_speaker_text)
|
||||
|
||||
quotes.append(quote_json)
|
||||
scenes.append({'quotes': quotes})
|
||||
|
||||
all_season_data[seasonNum - 1].append({
|
||||
'title': description['title'],
|
||||
|
||||
Reference in New Issue
Block a user