Take necessary helpers from server.data.helpers for normalization, Add unidecode to Pipfile

- General cleanup, getting ready to delete server.data
This commit is contained in:
Xevion
2022-05-10 00:46:24 -05:00
parent cb06edc0d3
commit fd107b0d60
5 changed files with 94 additions and 77 deletions

View File

@@ -59,11 +59,6 @@ def algolia_transform(old_dictionary: dict, key_list: List[Tuple[str, Optional[s
return new_dictionary
def is_main_character(name: str) -> bool:
return None
def character_id(name: str) -> str:
return '-'.join(name.split(' ')).lower()
@@ -76,69 +71,4 @@ def random_id(length: int = 8) -> str:
return ''.join(random.choices(alphabet, k=length))
def char_filter(string):
latin = re.compile('[a-zA-Z]+')
for char in unicodedata.normalize('NFC', string):
decoded = unidecode.unidecode(char)
if latin.match(decoded):
yield char
else:
yield decoded
def clean_string(string):
return "".join(char_filter(string))
def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6):
"""Use SequenceMatcher to return a list of the indexes of the best
"good enough" matches. word is a sequence for which close matches
are desired (typically a string).
possibilities is a list of sequences against which to match word
(typically a list of strings).
Optional arg n (default 3) is the maximum number of close matches to
return. n must be > 0.
Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
that don't score at least that similar to word are ignored.
"""
if not n > 0:
raise ValueError("n must be > 0: %r" % (n,))
if not 0.0 <= cutoff <= 1.0:
raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
result = []
s = SequenceMatcher()
s.set_seq2(word)
for idx, x in enumerate(possibilities):
s.set_seq1(x)
if s.real_quick_ratio() >= cutoff and \
s.quick_ratio() >= cutoff and \
s.ratio() >= cutoff:
result.append((s.ratio(), idx))
# Move the best scorers to head of list
result = _nlargest(n, result)
# Strip scores for the best n matches
return [x for score, x in result]
def marked_item_merge(keys: List[str], values: List[int]) -> Tuple[List[str], List[str]]:
"""Add the values of identical keys together, then return both the keys and values"""
merge = OrderedDict()
for key, value in zip(keys, values):
# Already inserted, now make/keep it negative
if key in merge.keys():
# Keys that haven't been turned over need to be made negative
if merge[key] > 0:
merge[key] = -merge[key]
# And then subtract the value in all cases
merge[key] -= value
else:
# Values that are positive didn't merge with other counts.
merge[key] = value
keys, values = zip(*merge.items())
values = [f'{-value}*' if value < 0 else str(value) for value in values]
return keys, values