Add meta.json value checking, add coloredlogs to improve logging output visually

This commit is contained in:
Xevion
2022-05-07 00:32:27 -05:00
parent 0be969e5bf
commit 574bd4c449
3 changed files with 38 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ requests = "~=2.24.0"
bs4 = "~=0.0.1"
beautifulsoup4 = "~=4.9.1"
Flask = "~=1.1.2"
coloredlogs = "*"
[dev-packages]

28
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "ba1c0e145f712b73a7cc267fef64a02068e76c3084ec627e3973cdcbe5217a12"
"sha256": "c56576435e88c13e4d8b9cfa57a32a0f50bd7113e24c0bd28a0731a6abe4fd7c"
},
"pipfile-spec": 6,
"requires": {
@@ -70,6 +70,14 @@
"index": "pypi",
"version": "==7.1.2"
},
"coloredlogs": {
"hashes": [
"sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934",
"sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"
],
"index": "pypi",
"version": "==15.0.1"
},
"enlighten": {
"hashes": [
"sha256:db00dfc4027a2dad2aaa4bff4b5fd8d8ab8376e175a02d02e156992f08062437",
@@ -86,6 +94,14 @@
"index": "pypi",
"version": "==1.1.4"
},
"humanfriendly": {
"hashes": [
"sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477",
"sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==10.0"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
@@ -164,6 +180,14 @@
"markers": "python_version >= '3.7'",
"version": "==2.1.1"
},
"pyreadline3": {
"hashes": [
"sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae",
"sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"
],
"markers": "python_version >= '3.8' and sys_platform == 'win32'",
"version": "==3.4.1"
},
"requests": {
"hashes": [
"sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
@@ -177,7 +201,7 @@
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0"
},
"soupsieve": {

View File

@@ -4,6 +4,7 @@ import os
import re
import sys
import enlighten
import coloredlogs
from collections import Counter, OrderedDict
from pprint import pprint
from typing import List, Optional, Union
@@ -17,6 +18,7 @@ from server.helpers import clean_string, get_close_matches_indexes, marked_item_
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('normalization.main')
logger.setLevel(logging.DEBUG)
coloredlogs.install(level=logger.level, logger=logger)
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
TRUTH_DIR = os.path.join(CUR_DIR, 'truth')
@@ -38,12 +40,14 @@ class Constants:
CHARACTERS_XML = 'characters.xml'
META_JSON = 'meta.json'
class ConstantPaths:
SPEAKER_MAPPING = os.path.join(TRUTH_DIR, Constants.SPEAKER_MAPPING_XML)
IDENTIFIERS = os.path.join(CHARACTERS_DIR, Constants.CHARACTERS_XML)
IDENTIFIERS = os.path.join(CHARACTERS_DIR, Constants.IDENTIFIERS_XML)
CHARACTERS = os.path.join(TRUTH_DIR, Constants.CHARACTERS_XML)
META = os.path.join(TRUTH_DIR, Constants.META_JSON)
@cli.command('truth')
def truth():
"""Step 1: Builds raw files into truth files."""
@@ -274,6 +278,7 @@ def ids():
@cli.command('meta')
def meta() -> None:
"""Creates a meta file for storing each character identifier's meta meaning (main/recurring/background/meta)"""
logger.debug('Creating meta.json')
with open(ConstantPaths.IDENTIFIERS, 'r') as identifiers_file:
@@ -284,6 +289,11 @@ def meta() -> None:
if os.path.exists(ConstantPaths.META):
with open(ConstantPaths.META, 'r') as meta_file:
meta_data = OrderedDict(json.load(meta_file))
possible_values = [None, 'main', 'recurring', 'background', 'meta']
for character_id, character_type in meta_data.items():
if character_type not in possible_values:
logger.warning(f'Unexpected value for `{character_id}` = `{character_type}`')
else:
meta_data = OrderedDict()