From 3b72889785d1868ff911a52cec17d4ebfa6a3921 Mon Sep 17 00:00:00 2001 From: Hykilpikonna Date: Mon, 25 Jul 2022 13:16:46 -0400 Subject: [PATCH] [+] Add nlp utils --- hypy_utils/__init__.py | 2 +- hypy_utils/nlp_utils.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 hypy_utils/nlp_utils.py diff --git a/hypy_utils/__init__.py b/hypy_utils/__init__.py index 7da3d30..a64cc74 100644 --- a/hypy_utils/__init__.py +++ b/hypy_utils/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -__version__ = "1.0.9" +__version__ = "1.0.10" import dataclasses import hashlib diff --git a/hypy_utils/nlp_utils.py b/hypy_utils/nlp_utils.py new file mode 100644 index 0000000..2124a84 --- /dev/null +++ b/hypy_utils/nlp_utils.py @@ -0,0 +1,31 @@ +""" +Natual language processing utils +""" + + +def camel_split(camel: str) -> list[str]: + """ + Split camel case string into sentence + + Credit: https://stackoverflow.com/a/58996565/7346633 + + :param camel: E.g. HelloWorld or helloWorld + :return: E.g. ['Hello', 'World'] + """ + # Ignore all caps or all lower + if camel.isupper() or camel.islower() or camel.isnumeric(): + return [camel] + + idx = list(map(str.isupper, camel)) + + # Mark change of case + word = [0] + for (i, (x, y)) in enumerate(zip(idx, idx[1:])): + if x and not y: # "Ul" + word.append(i) + elif not x and y: # "lU" + word.append(i + 1) + word.append(len(camel)) + + # for "lUl", index of "U" will pop twice, have to filter that + return [camel[x:y] for x, y in zip(word, word[1:]) if x < y]