diff --git a/sgs/__init__.py b/sgs/__init__.py index 450ff8d..fc30eae 100644 --- a/sgs/__init__.py +++ b/sgs/__init__.py @@ -1,4 +1,4 @@ import sgs.api import sgs.calculations -__version__ = '1.0.5' +__version__ = '1.0.6' diff --git a/sgs/api.py b/sgs/api.py index 0054566..124f7a7 100644 --- a/sgs/api.py +++ b/sgs/api.py @@ -16,6 +16,7 @@ Gender = Literal['f', 'm'] _kde_functions: dict[Feature, dict[Gender, gaussian_kde]] = {} +_kde_boundaries: dict[Feature, tuple[float, float]] = {} def load_kde() -> dict[Feature, dict[Gender, gaussian_kde]]: @@ -34,12 +35,17 @@ def load_kde() -> dict[Feature, dict[Gender, gaussian_kde]]: data = {k.lower(): data[k] for k in data} # Fit KDE functions + # Also find boundaries (99th percentile for fem and 1st percentile for masc) for feature in data: _kde_functions[feature] = {} for gender in data[feature]: kde = gaussian_kde(data[feature][gender], 'scott') _kde_functions[feature][gender] = kde + # Boundaries + _kde_boundaries[feature] = (np.percentile(data[feature]['m'], 1), + np.percentile(data[feature]['f'], 99)) + return _kde_functions @@ -63,6 +69,14 @@ def _calculate_fem_prob(feature: Feature, value: float) -> float: """ f = load_kde()[feature]['f'].evaluate([value])[0] m = load_kde()[feature]['m'].evaluate([value])[0] + + # Boundaries + m1, f99 = _kde_boundaries[feature] + if value > f99: + return 1 + if value < m1: + return 0 + return f / (f + m) diff --git a/test.py b/test.py index c0701bd..1c37df2 100644 --- a/test.py +++ b/test.py @@ -2,3 +2,4 @@ import sgs if __name__ == '__main__': print(sgs.api._calculate_fem_prob('pitch', 200)) + print(sgs.api._calculate_fem_prob('f1', 741))