mirror of
https://github.com/Xevion/the-office.git
synced 2025-12-11 00:08:51 -06:00
Take necessary helpers from server.data.helpers for normalization, Add unidecode to Pipfile
- General cleanup, getting ready to delete server.data
This commit is contained in:
@@ -12,9 +12,7 @@ from typing import List, Optional, Union
|
||||
|
||||
import click
|
||||
from lxml import etree
|
||||
|
||||
sys.path[0] += '\\..\\..'
|
||||
from server.helpers import clean_string, get_close_matches_indexes, marked_item_merge
|
||||
from helpers import clean_string, get_close_matches_indexes, marked_item_merge
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger('normalization.main')
|
||||
@@ -218,9 +216,9 @@ def ids():
|
||||
|
||||
root = etree.Element('SpeakerList')
|
||||
split_patterns: List[str] = [r'\s*,\s*',
|
||||
r'\s*&\s*',
|
||||
r'\s+and,?(?:\s+|$)',
|
||||
r'\s*[\\/]\s*']
|
||||
r'\s*&\s*',
|
||||
r'\s+and,?(?:\s+|$)',
|
||||
r'\s*[\\/]\s*']
|
||||
split_pattern: str = '|'.join(split_patterns)
|
||||
|
||||
existing_characters_count: int = 0
|
||||
|
||||
Reference in New Issue
Block a user