[+] Create function to remove outliers
This commit is contained in:
+10
-7
@@ -1,25 +1,28 @@
|
||||
|
||||
####################
|
||||
# Data Collection
|
||||
# Json5 is a human-readable json format that allows for things such as unquoted keys or comments.
|
||||
json5~=0.9.6
|
||||
|
||||
# Tweepy is a python SDK for twitter
|
||||
tweepy==4.4.0
|
||||
|
||||
# requests is for getting html from a website URL
|
||||
requests==2.26.0
|
||||
|
||||
# beautifulsoup is used to extract data from html
|
||||
beautifulsoup4==4.10.0
|
||||
|
||||
# 7zip packing utility for packing our processed data
|
||||
py7zr==0.16.3
|
||||
|
||||
#####################
|
||||
# Data visualization
|
||||
# Data Visualization
|
||||
# Print table data
|
||||
tabulate==0.8.9
|
||||
# Draw local graphs
|
||||
matplotlib==3.5.0
|
||||
# Calculate data statistics
|
||||
numpy==1.21.4
|
||||
|
||||
####################
|
||||
# Data Packing
|
||||
# 7zip packing utility for packing our processed data
|
||||
py7zr==0.16.3
|
||||
|
||||
#####################
|
||||
# Testing and code checking
|
||||
|
||||
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import json5
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -98,6 +99,29 @@ def read(file: str) -> str:
|
||||
return f.read()
|
||||
|
||||
|
||||
def remove_outliers(points: list[float], z_threshold: float = 3.5) -> list[float]:
|
||||
"""
|
||||
Create list with outliers removed for graphing
|
||||
|
||||
Credit to: https://stackoverflow.com/a/11886564/7346633
|
||||
|
||||
:param points: Input points list
|
||||
:param z_threshold: Z threshold for identifying whether or not a point is an outlier
|
||||
:return: List with outliers removed
|
||||
"""
|
||||
points = np.array(points)
|
||||
if len(points.shape) == 1:
|
||||
points = points[:, None]
|
||||
median = np.median(points, axis=0)
|
||||
diff = np.sum((points - median)**2, axis=-1)
|
||||
diff = np.sqrt(diff)
|
||||
med_abs_deviation = np.median(diff)
|
||||
|
||||
modified_z_score = 0.6745 * diff / med_abs_deviation
|
||||
|
||||
is_outlier = modified_z_score > z_threshold
|
||||
|
||||
|
||||
class EnhancedJSONEncoder(json.JSONEncoder):
|
||||
def default(self, o):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user