diff --git a/server/api.py b/server/api.py
index c36e4d8..b963d73 100644
--- a/server/api.py
+++ b/server/api.py
@@ -8,8 +8,9 @@ import json
 import os
 from copy import deepcopy
 
+# from flask_caching import cache
 import flask_wtf
-from flask import current_app, jsonify, request
+from flask import current_app, jsonify, request, send_from_directory
 
 from server.helpers import default, get_neighbors
 
@@ -120,3 +121,8 @@ def api_character_quotes(character: str):
         return jsonify(quotes[index: index + 10])
     else:
         return jsonify(quotes)
+
+
+@current_app.route('/static/img/<path:filename>')
+def custom_static(filename):
+    return send_from_directory('./data/img/', filename)
diff --git a/server/cli.py b/server/cli.py
index 67253b8..39fc16e 100644
--- a/server/cli.py
+++ b/server/cli.py
@@ -8,17 +8,18 @@ import os
 import re
 import sys
 import time
-from collections import defaultdict
+from collections import OrderedDict, defaultdict
 from pprint import pprint
-from typing import List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import click
 import enlighten
 import requests
 from bs4 import BeautifulSoup
+from lxml import etree
 
 sys.path[0] += '\\..'
-from server.helpers import algolia_transform, character_id
+from server.helpers import algolia_transform, character_id, clean_string
 from server.process import DATA_DIR, get_appearances, get_episodes, get_filepath, load_file, \
     save_file, sleep_from, \
     verify_episode
@@ -42,7 +43,7 @@ def misc():
 @misc.command('characters')
 @click.option('-s', '--season', type=int, help='Season to be processed for character names')
 @click.option('-e', '--episode', type=int, help='Episode to be processed. Requires --season to be specified.')
-@click.option('--all', is_flag=True, help='Process all episodes, regardless of previous specifications.')
+@click.option('-a', '--all', is_flag=True, help='Process all episodes, regardless of previous specifications.')
 @click.option('-i', '--individual', is_flag=True,
               help='List characters from individual episodes instead of just compiling a masterlist')
 def characters(season: int, episode: int, all: bool, individual: bool):
@@ -90,7 +91,7 @@ def characters(season: int, episode: int, all: bool, individual: bool):
 
     # print(master)
     logger.info(
-        ', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances'])))
+            ', '.join(item['name'] for item in sorted(master.values(), reverse=True, key=lambda item: item['appearances'])))
 
 
 @cli.command('fetch')
@@ -98,7 +99,7 @@ def characters(season: int, episode: int, all: bool, individual: bool):
               help='Season to be fetched. Without --episode, will download all episodes in a season.')
 @click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
 @click.option('-d', '--delay', type=float, default=0.5, help='Delay between each request')
-@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
+@click.option('-a', '--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
 @click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
 @click.option('-ss', '--silent-skip', is_flag=True, help='Skip existing files silently')
 def fetch(season: int, episode: int, delay: float, all: bool, overwrite: bool, silent_skip: bool):
@@ -136,7 +137,6 @@ def fetch(season: int, episode: int, delay: float, all: bool, overwrite: bool, s
     with enlighten.Manager() as manager:
         with manager.counter(total=len(episodes), desc='Fetching...', unit='episodes') as pbar:
             for _season, _episode in episodes:
-
                 filepath = get_filepath(_season, _episode, 'html')
 
                 # Check if HTML file exists
@@ -167,7 +167,7 @@ def fetch(season: int, episode: int, delay: float, all: bool, overwrite: bool, s
 @click.option('-s', '--season', type=int,
               help='Season to be fetched. Without --episode, will download all episodes in a season.')
 @click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
-@click.option('--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
+@click.option('-a', '--all', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
 @click.option('-o', '--overwrite', is_flag=True, help='Overwrite if a file already exists.')
 @click.option('-ss', '--silent-skip', is_flag=True, help='Skip missing/existing files silently')
 @click.option('-ssm', '--silent-skip-missing', is_flag=True, help='Skip missing files silently')
@@ -239,7 +239,7 @@ def preprocess(season: int, episode: int, all: bool, overwrite: bool, silent_ski
 @click.option('-s', '--season', type=int,
               help='Season to be fetched. Without --episode, will download all episodes in a season.')
 @click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
-@click.option('--all', 'all_', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
+@click.option('-a', '--all', 'all_', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
 @click.option('-r', '--report', is_flag=True, help='Report quote statistics once processing completed.')
 def process(season: Optional[int], episode: Optional[int], all_: bool, report: bool):
     """
@@ -267,6 +267,9 @@ def process(season: Optional[int], episode: Optional[int], all_: bool, report: b
         logger.info('Check --help for more information on this command.')
         return
 
+    speakers: Dict = load_file(os.path.join(DATA_DIR, 'speakers.json'), True)
+    speakers = {original: new for original, new in speakers.items() if original != new and type(new) == str}
+
     quote: Union[str, List[str]]
     section_num: int
     for _season, _episode in episodes:
@@ -284,11 +287,12 @@ def process(season: Optional[int], episode: Optional[int], all_: bool, report: b
 
                 for quote in section_data:
                     quote = quote.split('|', 1)
+
                     section['quotes'].append(
-                        {
-                            'speaker': quote[0],
-                            'text': quote[1]
-                        }
+                            {
+                                'speaker': clean_string(speakers.get(quote[0], quote[0])),
+                                'text': clean_string(quote[1])
+                            }
                     )
                 sections.append(section)
         except FileNotFoundError:
@@ -298,7 +302,7 @@ def process(season: Optional[int], episode: Optional[int], all_: bool, report: b
             logger.exception(f'Skipped Season {_season}, Episode {_episode}: Malformed data.')
             if quote:
                 logger.info(
-                    f'Last quote seen "{quote if type(quote) is str else "|".join(quote)}" in section {section_num}')
+                        f'Last quote seen "{quote if type(quote) is str else "|".join(quote)}" in section {section_num}')
         else:
             # Save processed data
             save_file(get_filepath(_season, _episode, 'processed'), sections, True)
@@ -324,6 +328,81 @@ def process(season: Optional[int], episode: Optional[int], all_: bool, report: b
             logger.info(', '.join(speakers))
 
 
+@cli.command('xml')
+@click.option('-s', '--season', type=int, help='Season to be fetched. Without --episode, will download all episodes in a season.')
+@click.option('-e', '--episode', type=int, help='Specific episode to be fetched. Requires --season to be specified.')
+@click.option('-a', '--all', 'all_', is_flag=True, help='Fetch all episodes, regardless of previous specifications.')
+@click.option('-r', '--report', is_flag=True, help='Report quote statistics once processing completed.')
+def xml(season: Optional[int], episode: Optional[int], all_: bool, report: bool):
+    """
+    Processes manually processed raw quote data into JSON.
+    """
+    episodes: List[Tuple[int, int]]
+
+    if all_:
+        episodes = list(get_episodes())
+    elif season:
+        if episode:
+            if verify_episode(season, episode):
+                episodes = [(season, episode)]
+            else:
+                logger.error(f'Season {season}, Episode {episode} is not a valid combination.')
+                return
+        else:
+            episodes = list(get_episodes(season=season))
+            logger.info(f'Processing Season {season}...')
+    else:
+        if episode:
+            logger.info('You must specify more than just an episode.')
+        else:
+            logger.info('You must specify which episodes to process.')
+        logger.info('Check --help for more information on this command.')
+        return
+
+    for _season, _episode in episodes:
+        try:
+            processed_data = load_file(get_filepath(_season, _episode, 'processed'), True)
+            rootElement = etree.Element('SceneList')
+            for scene in processed_data:
+                sceneElement = etree.Element('Scene')
+                for quote in scene['quotes']:
+                    charactersElement = etree.Element('Characters')
+                    sceneElement.append(charactersElement)
+
+                rootElement.append(sceneElement)
+
+            save_file(get_filepath(_season, _episode, 'xml'))
+        except FileNotFoundError:
+            logger.info(f'Skipped Season {_season}, Episode {_episode}, no file found.')
+            continue
+
+@cli.command('truth')
+def truth():
+    """Modify"""
+
+
+@cli.command('characters')
+def characters():
+    """Collects all characters from every single processed JSON file."""
+    episodes = list(get_episodes())
+    speakersList = OrderedDict()
+
+    for _season, _episode in episodes:
+        try:
+            processed_data = load_file(get_filepath(_season, _episode, 'processed'), True)
+            for scene in processed_data:
+                for quote in scene['quotes']:
+                    speakersList[quote['speaker']] = None
+        except FileNotFoundError:
+            logger.warning(f"Skipped  {_season}-{_episode}, no file found.")
+
+    speaker_data = OrderedDict([(item, item) for item in sorted(speakersList.keys())])
+    print(f'{len(speaker_data)} speakers identified.')
+
+    pprint(list(speaker_data.keys()))
+    save_file(os.path.join(DATA_DIR, 'speakers.json'), speaker_data, True)
+
+
 @cli.group('build')
 def build():
     """Build final data files used by Algolia and the backend API."""
@@ -437,18 +516,18 @@ def final(silent_skip: bool, process_: bool):
         except FileNotFoundError:
             if not silent_skip:
                 logger.warning(
-                    f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
+                        f'No data for Season {season_id}, Episode {episode_id} available. Null data inserted.')
             episode_data = None
 
         description = descriptions[season_id - 1][episode_id - 1]
         seasons[season_id - 1]['episodes'].append(
-            {
-                'title': description['title'].strip(),
-                'description': description['description'].strip(),
-                'episode_id': episode_id,
-                'characters': get_appearances(season_id, episode_id),
-                'scenes': episode_data
-            }
+                {
+                    'title': description['title'].strip(),
+                    'description': description['description'].strip(),
+                    'episode_id': episode_id,
+                    'characters': get_appearances(season_id, episode_id),
+                    'scenes': episode_data
+                }
         )
 
     logger.info('Saving to data.json')
diff --git a/server/data.py b/server/data.py
new file mode 100644
index 0000000..267ef7e
--- /dev/null
+++ b/server/data.py
@@ -0,0 +1,15 @@
+"""
+data.py
+
+Manages API quote/character data, caching static responses and reloading from disk.
+"""
+import os
+import json
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(BASE_DIR, 'data', 'data.json'), 'r', encoding='utf-8') as file:
+    data = json.load(file)
+
+with open(os.path.join(BASE_DIR, 'data', 'characters.json'), 'r', encoding='utf-8') as file:
+    character_data = json.load(file)
+
diff --git a/server/helpers.py b/server/helpers.py
index 0b6109f..3d91174 100644
--- a/server/helpers.py
+++ b/server/helpers.py
@@ -3,8 +3,16 @@ helpers.py
 
 
 """
+import random
+import re
+import string
+import unicodedata
+from collections import OrderedDict
+from difflib import SequenceMatcher
+from heapq import nlargest as _nlargest
+from typing import List, Optional, Tuple
 
-from typing import List, Tuple, Optional
+import unidecode
 
 episode_counts = [6, 22, 23, 14, 26, 24, 24, 24, 23]
 
@@ -55,5 +63,82 @@ def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[s
 def is_main_character(name: str) -> bool:
     return None
 
+
 def character_id(name: str) -> str:
     return '-'.join(name.split(' ')).lower()
+
+
+alphabet: str = string.ascii_letters + string.digits
+
+
+def random_id(length: int = 8) -> str:
+    """Generate a random {length} character long string."""
+    return ''.join(random.choices(alphabet, k=length))
+
+
+def char_filter(string):
+    latin = re.compile('[a-zA-Z]+')
+    for char in unicodedata.normalize('NFC', string):
+        decoded = unidecode.unidecode(char)
+        if latin.match(decoded):
+            yield char
+        else:
+            yield decoded
+
+
+def clean_string(string):
+    return "".join(char_filter(string))
+
+
+def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6):
+    """Use SequenceMatcher to return a list of the indexes of the best
+    "good enough" matches. word is a sequence for which close matches
+    are desired (typically a string).
+    possibilities is a list of sequences against which to match word
+    (typically a list of strings).
+    Optional arg n (default 3) is the maximum number of close matches to
+    return.  n must be > 0.
+    Optional arg cutoff (default 0.6) is a float in [0, 1].  Possibilities
+    that don't score at least that similar to word are ignored.
+    """
+
+    if not n > 0:
+        raise ValueError("n must be > 0: %r" % (n,))
+    if not 0.0 <= cutoff <= 1.0:
+        raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
+    result = []
+    s = SequenceMatcher()
+    s.set_seq2(word)
+    for idx, x in enumerate(possibilities):
+        s.set_seq1(x)
+        if s.real_quick_ratio() >= cutoff and \
+                s.quick_ratio() >= cutoff and \
+                s.ratio() >= cutoff:
+            result.append((s.ratio(), idx))
+
+    # Move the best scorers to head of list
+    result = _nlargest(n, result)
+
+    # Strip scores for the best n matches
+    return [x for score, x in result]
+
+
+def marked_item_merge(keys: List[str], values: List[int]) -> Tuple[List[str], List[str]]:
+    """Add the values of identical keys together, then return both the keys and values"""
+    merge = OrderedDict()
+    for key, value in zip(keys, values):
+        # Already inserted, now make/keep it negative
+        if key in merge.keys():
+            # Keys that haven't been turned over need to be made negative
+            if merge[key] > 0:
+                merge[key] = -merge[key]
+
+            # And then subtract the value in all cases
+            merge[key] -= value
+        else:
+            # Values that are positive didn't merge with other counts.
+            merge[key] = value
+
+    keys, values = zip(*merge.items())
+    values = [f'{-value}*' if value < 0 else str(value) for value in values]
+    return keys, values