[+] Add nlp utils
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
__version__ = "1.0.9"
|
||||
__version__ = "1.0.10"
|
||||
|
||||
import dataclasses
|
||||
import hashlib
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
Natual language processing utils
|
||||
"""
|
||||
|
||||
|
||||
def camel_split(camel: str) -> list[str]:
|
||||
"""
|
||||
Split camel case string into sentence
|
||||
|
||||
Credit: https://stackoverflow.com/a/58996565/7346633
|
||||
|
||||
:param camel: E.g. HelloWorld or helloWorld
|
||||
:return: E.g. ['Hello', 'World']
|
||||
"""
|
||||
# Ignore all caps or all lower
|
||||
if camel.isupper() or camel.islower() or camel.isnumeric():
|
||||
return [camel]
|
||||
|
||||
idx = list(map(str.isupper, camel))
|
||||
|
||||
# Mark change of case
|
||||
word = [0]
|
||||
for (i, (x, y)) in enumerate(zip(idx, idx[1:])):
|
||||
if x and not y: # "Ul"
|
||||
word.append(i)
|
||||
elif not x and y: # "lU"
|
||||
word.append(i + 1)
|
||||
word.append(len(camel))
|
||||
|
||||
# for "lUl", index of "U" will pop twice, have to filter that
|
||||
return [camel[x:y] for x, y in zip(word, word[1:]) if x < y]
|
||||
Reference in New Issue
Block a user