Added module docstring to utils.py

Updated filter_days_avg and updated comments in utils.py
Fixed typos in twitter_process.py
This commit is contained in:
MstrPikachu
2021-12-09 19:55:32 -05:00
parent ab8a685c8c
commit b4c5fc254d
2 changed files with 31 additions and 16 deletions
+2 -2
View File
@@ -110,7 +110,7 @@ class UserSample:
def select_user_sample() -> None:
"""
Select our sample of 500 most popular users and 500 random users who meet the criteria. The
Select our sample of the 500 most popular users and 500 random users who meet the criteria. The
criteria we use is that the user must have at least 150 followers, and must have a number of
postings in between 1000 and 3250. Analyzing someone who don't post or someone who doesn't have
enough followers for interaction might not reveal useful information. We also filter based on
@@ -284,7 +284,7 @@ def is_covid_related(text: str) -> bool:
:return: Whether the text is covid related
"""
# English
# We're hesitate to include words like "pandemic" or "vaccine" because they might refer to other
# We're hesitant to include words like "pandemic" or "vaccine" because they might refer to other
# pandemics or other vaccines. However, I think we need to include "the pandemic" because many
# posts refer to covid only as "the pandemic."
keywords = ['covid', 'the pandemic', 'lockdown', 'spikevax', 'comirnaty', 'vaxzevria',
+29 -14
View File
@@ -1,3 +1,10 @@
"""This module contains useful functions and classes, including:
- debug messages
- file I/O
- statistics functions, removing outliers and averaging values over a period
- date-related functions
- classes for configs, reports, statistics, and JSON"""
import dataclasses
import inspect
import json
@@ -57,7 +64,7 @@ def load_config(path: str = 'config.json5') -> Config:
def debug(msg: object) -> None:
"""
Output a debug message
Output a debug message, usually from another function
:param msg: Message
"""
@@ -71,7 +78,7 @@ def calculate_rate_delay(rate_limit: float) -> float:
Calculate the rate delay for each request given rate limit in request per minute
:param rate_limit: Rate limit in requests per minute
:return: Rate delay in seconds per request (added one second just to be safe)
:return: Rate delay in seconds per request
"""
return 1 / rate_limit * 60
@@ -263,7 +270,7 @@ def parse_date_only(iso: str) -> datetime:
def daterange(start_date: str, end_date: str) -> Generator[tuple[str, datetime], None, None]:
"""
Date range for looping
Date range for looping, excluding the end date
:param start_date: Start date in "YYYY-MM-DD" format
:param end_date: End date in "YYYY-MM-DD" format
@@ -278,7 +285,9 @@ def daterange(start_date: str, end_date: str) -> Generator[tuple[str, datetime],
def map_to_dates(y: dict[str, Union[int, float]], dates: list[str],
default: float = 0) -> list[float]:
"""
Map y axis to date
Takes y-axis data in the form of a mapping of date to values, and returns a list of all the
values mapped to the date in dates. If a date in dates isn't in y, then the default values is
used instead.
Preconditions:
- The date in dates must be in the same format as the dates in the keys of y
@@ -308,18 +317,24 @@ def filter_days_avg(y: list[float], n: int) -> list[float]:
if n % 2 != 1:
ValueError(f'n must be odd (you entered {n})')
# Calculate
results = []
buffer = [y[0] * n // 2]
# Sliding window; maintain a sum of an interval centered around i
# if the interval exceeds the beginning/end, pretend that the first/last elements are "extended"
radius = n // 2
current_sum = (radius + 1) * y[0] # current sum is sum(y[-r:0] + y[0:1] + y[1:r + 1])
for i in range(radius):
i = min(i, len(y) - 1)
current_sum += y[i] # adding the values in y[1:r + 1]
ret = []
for i in range(len(y)):
buffer.append(y[i])
results.append(sum(buffer) / len(buffer))
# Queue longer than n days, remove first
if len(buffer) > n:
buffer.pop(0)
return results
l, r = i - radius, i + radius
l = max(0, l) # avoid index out of bounds by "extending" first/last element
r = min(r, len(y) - 1)
current_sum += y[r] # extend sliding window
ret.append(current_sum / n)
current_sum -= y[l] # remove old values
return ret
def divide_zeros(numerator: list[float], denominator: list[float]) -> list[float]: