Take necessary helpers from server.data.helpers for normalization, Add unidecode to Pipfile

- General cleanup, getting ready to delete server.data
This commit is contained in:
Xevion
2022-05-10 00:46:24 -05:00
parent cb06edc0d3
commit fd107b0d60
5 changed files with 94 additions and 77 deletions

View File

@@ -12,9 +12,7 @@ from typing import List, Optional, Union
import click
from lxml import etree
sys.path[0] += '\\..\\..'
from server.helpers import clean_string, get_close_matches_indexes, marked_item_merge
from helpers import clean_string, get_close_matches_indexes, marked_item_merge
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('normalization.main')
@@ -218,9 +216,9 @@ def ids():
root = etree.Element('SpeakerList')
split_patterns: List[str] = [r'\s*,\s*',
r'\s*&\s*',
r'\s+and,?(?:\s+|$)',
r'\s*[\\/]\s*']
r'\s*&\s*',
r'\s+and,?(?:\s+|$)',
r'\s*[\\/]\s*']
split_pattern: str = '|'.join(split_patterns)
existing_characters_count: int = 0