From 6ccc5c73d8a2dbd841f7ab72e4fc3c192d4ef19d Mon Sep 17 00:00:00 2001 From: Xevion Date: Wed, 11 May 2022 02:24:21 -0500 Subject: [PATCH] Switch to Rich for logging & progress bars, remove enlighten --- Pipfile | 3 +- Pipfile.lock | 58 +++++++++++++++++------------------- server/normalization/main.py | 17 +++++------ 3 files changed, 36 insertions(+), 42 deletions(-) diff --git a/Pipfile b/Pipfile index 1fdb196..cab30c6 100644 --- a/Pipfile +++ b/Pipfile @@ -5,17 +5,16 @@ name = "pypi" [packages] click = "~=7.1.2" -enlighten = "~=1.6.2" requests = "~=2.24.0" bs4 = "~=0.0.1" beautifulsoup4 = "~=4.9.1" Flask = "~=1.1.2" -coloredlogs = "*" markupsafe = "<2.1.0" flask_cors = "*" flask_wtf = "*" unidecode = "*" lxml = "*" +rich = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index dd03177..c67aa88 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "c149bd4c6287166da9864b2a6657f7128c53bf34b8ebe5630949a7cca4d63de7" + "sha256": "5ba400034ba62c71fdb8dfbdac533e8a93a4dd4e3ee9ddd5abf4344df59460a3" }, "pipfile-spec": 6, "requires": { @@ -38,7 +38,6 @@ "sha256:63b8554ae2e0e7f43749b6715c734cc8f3883010a809bf16790102563e6cf25b", "sha256:9a0d099695bf621d4680dd6c73f6ad547f6a3442fbdbe80c4b1daa1edbc492fc" ], - "markers": "python_version >= '2.7'", "version": "==1.19.1" }, "bs4": { @@ -70,13 +69,12 @@ "index": "pypi", "version": "==7.1.2" }, - "coloredlogs": { + "commonmark": { "hashes": [ - "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", - "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0" + "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", + "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9" ], - "index": "pypi", - "version": "==15.0.1" + "version": "==0.9.1" }, "enlighten": { "hashes": [ @@ -110,20 +108,11 @@ "index": "pypi", "version": "==1.0.1" }, - "humanfriendly": { - "hashes": [ - "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", - "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==10.0" - }, "idna": { "hashes": [ "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.10" }, "itsdangerous": { @@ -131,7 +120,6 @@ "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.1.0" }, "jinja2": { @@ -139,16 +127,15 @@ "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419", "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==2.11.3" }, "jinxed": { "hashes": [ - "sha256:6a61ccf963c16aa885304f27e6e5693783676897cea0c7f223270c8b8e78baf8", - "sha256:d8f1731f134e9e6b04d95095845ae6c10eb15cb223a5f0cabdea87d4a279c305" + "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5", + "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b" ], "markers": "platform_system == 'Windows'", - "version": "==1.1.0" + "version": "==1.2.0" }, "lxml": { "hashes": [ @@ -292,13 +279,12 @@ "index": "pypi", "version": "==2.0.1" }, - "pyreadline3": { + "pygments": { "hashes": [ - "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae", - "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb" + "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb", + "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519" ], - "markers": "python_version >= '3.8' and sys_platform == 'win32'", - "version": "==3.4.1" + "version": "==2.12.0" }, "requests": { "hashes": [ @@ -308,12 +294,19 @@ "index": "pypi", "version": "==2.24.0" }, + "rich": { + "hashes": [ + "sha256:d13c6c90c42e24eb7ce660db397e8c398edd58acb7f92a2a88a95572b838aaa4", + "sha256:d239001c0fb7de985e21ec9a4bb542b5150350330bbc1849f835b9cbc8923b91" + ], + "index": "pypi", + "version": "==12.4.1" + }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "soupsieve": { @@ -324,6 +317,14 @@ "markers": "python_version >= '3.0'", "version": "==2.3.2.post1" }, + "typing-extensions": { + "hashes": [ + "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708", + "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376" + ], + "markers": "python_version < '3.9'", + "version": "==4.2.0" + }, "unidecode": { "hashes": [ "sha256:8e4352fb93d5a735c788110d2e7ac8e8031eb06ccbfe8d324ab71735015f9342", @@ -337,7 +338,6 @@ "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2", "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.25.11" }, "wcwidth": { @@ -352,7 +352,6 @@ "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43", "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==1.0.1" }, "wtforms": { @@ -360,7 +359,6 @@ "sha256:6b351bbb12dd58af57ffef05bc78425d08d1914e0fd68ee14143b7ade023c5bc", "sha256:837f2f0e0ca79481b92884962b914eba4e72b7a2daaf1f939c890ed0124b834b" ], - "markers": "python_version >= '3.7'", "version": "==3.0.1" } }, diff --git a/server/normalization/main.py b/server/normalization/main.py index e29f479..7ac3479 100644 --- a/server/normalization/main.py +++ b/server/normalization/main.py @@ -13,8 +13,11 @@ from typing import List, Optional, Union import click from lxml import etree from helpers import clean_string, get_close_matches_indexes, marked_item_merge +from lxml import etree +from rich.logging import RichHandler +from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn, track -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.INFO, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]) logger = logging.getLogger('normalization.main') logger.setLevel(logging.DEBUG) coloredlogs.install(level=logger.level, logger=logger) @@ -133,15 +136,13 @@ def merge(): truth_files: List[str] = os.listdir(EPISODES_DIR) logger.debug(f"{len(truth_files)} truth files available.") - pbar = enlighten.Counter(total=len(truth_files), unit='Files') - for truth_filename in truth_files: + for truth_filename in track(truth_files): truth_path = os.path.join(EPISODES_DIR, truth_filename) with open(truth_path, 'r') as truth_file: root = etree.parse(truth_file) for speaker in root.xpath('//SceneList/Scene/Quote/Speaker/text()'): speaker_list[speaker] += 1 - pbar.update() logger.debug('Speakers acquired from Truth files.') @@ -155,11 +156,10 @@ def merge(): logger.debug('Mappings loaded.') root = etree.Element('CharacterList') - pbar = enlighten.Counter(total=len(speaker_list.keys()), unit='Speakers') seen = set() logger.debug('Merging Speaker Mappings...') - for speaker in speaker_list.keys(): + for speaker in track(speaker_list.keys(), 'Merging Map...'): while speaker_mapping.get(speaker) is not None: if speaker_mapping.get(speaker) == speaker: break @@ -170,7 +170,6 @@ def merge(): seen.add(speaker) character_element = etree.SubElement(root, 'Character') character_element.text = speaker - pbar.update() logger.debug("Speaker mappings merged. Exporting to `characters.xml`") @@ -405,9 +404,7 @@ def compile() -> None: episode_files = os.listdir(EPISODES_DIR) logger.debug(f'Beginning processing for {len(episode_files)} episode files.') - pbar = enlighten.Counter(total=len(episode_files), unit='Episodes') - - for file in episode_files: + for file in track(episode_files, 'Compiling Episodes'): file_path = os.path.join(EPISODES_DIR, file) output_path = os.path.join(COMPILE_DIR, file)