Add meta.json value checking, add coloredlogs to improve logging output visually

This commit is contained in:
Xevion
2022-05-07 00:32:27 -05:00
parent 0be969e5bf
commit 574bd4c449
3 changed files with 38 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ requests = "~=2.24.0"
bs4 = "~=0.0.1" bs4 = "~=0.0.1"
beautifulsoup4 = "~=4.9.1" beautifulsoup4 = "~=4.9.1"
Flask = "~=1.1.2" Flask = "~=1.1.2"
coloredlogs = "*"
[dev-packages] [dev-packages]

28
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "ba1c0e145f712b73a7cc267fef64a02068e76c3084ec627e3973cdcbe5217a12" "sha256": "c56576435e88c13e4d8b9cfa57a32a0f50bd7113e24c0bd28a0731a6abe4fd7c"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
@@ -70,6 +70,14 @@
"index": "pypi", "index": "pypi",
"version": "==7.1.2" "version": "==7.1.2"
}, },
"coloredlogs": {
"hashes": [
"sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934",
"sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"
],
"index": "pypi",
"version": "==15.0.1"
},
"enlighten": { "enlighten": {
"hashes": [ "hashes": [
"sha256:db00dfc4027a2dad2aaa4bff4b5fd8d8ab8376e175a02d02e156992f08062437", "sha256:db00dfc4027a2dad2aaa4bff4b5fd8d8ab8376e175a02d02e156992f08062437",
@@ -86,6 +94,14 @@
"index": "pypi", "index": "pypi",
"version": "==1.1.4" "version": "==1.1.4"
}, },
"humanfriendly": {
"hashes": [
"sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477",
"sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==10.0"
},
"idna": { "idna": {
"hashes": [ "hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
@@ -164,6 +180,14 @@
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==2.1.1" "version": "==2.1.1"
}, },
"pyreadline3": {
"hashes": [
"sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae",
"sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"
],
"markers": "python_version >= '3.8' and sys_platform == 'win32'",
"version": "==3.4.1"
},
"requests": { "requests": {
"hashes": [ "hashes": [
"sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
@@ -177,7 +201,7 @@
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
], ],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0" "version": "==1.16.0"
}, },
"soupsieve": { "soupsieve": {

View File

@@ -4,6 +4,7 @@ import os
import re import re
import sys import sys
import enlighten import enlighten
import coloredlogs
from collections import Counter, OrderedDict from collections import Counter, OrderedDict
from pprint import pprint from pprint import pprint
from typing import List, Optional, Union from typing import List, Optional, Union
@@ -17,6 +18,7 @@ from server.helpers import clean_string, get_close_matches_indexes, marked_item_
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('normalization.main') logger = logging.getLogger('normalization.main')
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
coloredlogs.install(level=logger.level, logger=logger)
CUR_DIR = os.path.dirname(os.path.abspath(__file__)) CUR_DIR = os.path.dirname(os.path.abspath(__file__))
TRUTH_DIR = os.path.join(CUR_DIR, 'truth') TRUTH_DIR = os.path.join(CUR_DIR, 'truth')
@@ -38,12 +40,14 @@ class Constants:
CHARACTERS_XML = 'characters.xml' CHARACTERS_XML = 'characters.xml'
META_JSON = 'meta.json' META_JSON = 'meta.json'
class ConstantPaths: class ConstantPaths:
SPEAKER_MAPPING = os.path.join(TRUTH_DIR, Constants.SPEAKER_MAPPING_XML) SPEAKER_MAPPING = os.path.join(TRUTH_DIR, Constants.SPEAKER_MAPPING_XML)
IDENTIFIERS = os.path.join(CHARACTERS_DIR, Constants.CHARACTERS_XML) IDENTIFIERS = os.path.join(CHARACTERS_DIR, Constants.IDENTIFIERS_XML)
CHARACTERS = os.path.join(TRUTH_DIR, Constants.CHARACTERS_XML) CHARACTERS = os.path.join(TRUTH_DIR, Constants.CHARACTERS_XML)
META = os.path.join(TRUTH_DIR, Constants.META_JSON) META = os.path.join(TRUTH_DIR, Constants.META_JSON)
@cli.command('truth') @cli.command('truth')
def truth(): def truth():
"""Step 1: Builds raw files into truth files.""" """Step 1: Builds raw files into truth files."""
@@ -274,6 +278,7 @@ def ids():
@cli.command('meta') @cli.command('meta')
def meta() -> None: def meta() -> None:
"""Creates a meta file for storing each character identifier's meta meaning (main/recurring/background/meta)"""
logger.debug('Creating meta.json') logger.debug('Creating meta.json')
with open(ConstantPaths.IDENTIFIERS, 'r') as identifiers_file: with open(ConstantPaths.IDENTIFIERS, 'r') as identifiers_file:
@@ -284,6 +289,11 @@ def meta() -> None:
if os.path.exists(ConstantPaths.META): if os.path.exists(ConstantPaths.META):
with open(ConstantPaths.META, 'r') as meta_file: with open(ConstantPaths.META, 'r') as meta_file:
meta_data = OrderedDict(json.load(meta_file)) meta_data = OrderedDict(json.load(meta_file))
possible_values = [None, 'main', 'recurring', 'background', 'meta']
for character_id, character_type in meta_data.items():
if character_type not in possible_values:
logger.warning(f'Unexpected value for `{character_id}` = `{character_type}`')
else: else:
meta_data = OrderedDict() meta_data = OrderedDict()