diff --git a/src/collect_others.py b/src/collect_others.py index 14600be..3af7d8d 100644 --- a/src/collect_others.py +++ b/src/collect_others.py @@ -1,3 +1,6 @@ +""" +This module uses web requests to collect and process other data we are using in our analysis. +""" from dataclasses import dataclass import requests diff --git a/src/constants.py b/src/constants.py index 757e82a..cef8dcb 100644 --- a/src/constants.py +++ b/src/constants.py @@ -1,6 +1,10 @@ -# Constants (The instructors said that we can use global constants here: -# https://piazza.com/class/ksovzjrlsye72f?cid=1664 -# They should not end with "/" +""" +This module stores constant variables in our projects. + +Instructors said that we can use global constants: https://piazza.com/class/ksovzjrlsye72f?cid=1664 +""" + +# Paths, should not end with "/" DATA_DIR = '../data' TWEETS_DIR = f'{DATA_DIR}/twitter/user-tweets' USER_DIR = f'{DATA_DIR}/twitter/user' diff --git a/src/main.py b/src/main.py index 4bb6b77..db52016 100644 --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,7 @@ +""" +This module is the main module of our program which runs different functions in different modules +by steps. +""" from visualization import * from collect_twitter import * from report import serve_report diff --git a/src/processing.py b/src/processing.py index 70eaaea..cd16ad1 100644 --- a/src/processing.py +++ b/src/processing.py @@ -162,7 +162,7 @@ def get_english_news_channels() -> list[str]: Run this after download_all_tweets(api, 'TwitterNews') - Precondition: + Preconditions: - /user/TwitterNews.json exists. :return: A list of news channel screen names @@ -193,7 +193,7 @@ def filter_news_channels() -> None: """ Filter out news channels that don't exist anymore or have been banned by Twitter. - Precondition: + Preconditions: - Run this after downloading all tweets from the news channels in Step 2.3 in main. :return: None diff --git a/src/report.py b/src/report.py index 072d279..1d37fbc 100644 --- a/src/report.py +++ b/src/report.py @@ -1,3 +1,6 @@ +""" +This module generates report HTML and serves it in an HTTP server. +""" import json import os.path import shutil diff --git a/src/utils.py b/src/utils.py index 4ee4bec..5b21fc3 100644 --- a/src/utils.py +++ b/src/utils.py @@ -3,7 +3,8 @@ - file I/O - statistics functions, removing outliers and averaging values over a period - date-related functions -- classes for configs, reports, statistics, and JSON""" +- classes for configs, reports, statistics, and JSON +""" import dataclasses import inspect @@ -87,6 +88,9 @@ def write(file: str, text: str) -> None: """ Write text to a file + Preconditions: + - file != '' + :param file: File path (will be converted to lowercase) :param text: Text :return: None @@ -104,6 +108,9 @@ def read(file: str) -> str: """ Read file content + Preconditions: + - file != '' + :param file: File path (will be converted to lowercase) :return: None """ @@ -168,6 +175,9 @@ def remove_outliers(points: list[float], z_threshold: float = 3.5) -> list[float Credit to: https://stackoverflow.com/a/11886564/7346633 + Preconditions: + - len(points) > 0 + :param points: Input points list :param z_threshold: Z threshold for identifying whether or not a point is an outlier :return: List with outliers removed @@ -212,6 +222,9 @@ def get_statistics(points: list[float]) -> Stats: """ Calculate statistics for a set of points + Preconditions: + - len(points) > 0 + :param points: Input points :return: Statistics """ @@ -262,7 +275,7 @@ def parse_date_only(iso: str) -> datetime: Parse date faster. Preconditions: - - iso is in the format of "YYYY-MM-DD" (e.g. "2021-10-20") + - iso starts with the format of "YYYY-MM-DD" (e.g. "2021-10-20" or "2021-10-20T10:04:14") - iso is a valid date (this function does not check for the validity of the input) :param iso: Input date @@ -275,6 +288,10 @@ def daterange(start_date: str, end_date: str) -> Generator[tuple[str, datetime], """ Date range for looping, excluding the end date + Preconditions: + - start_date starts with the "YYYY-MM-DD" format + - end_date starts with the "YYYY-MM-DD" format + :param start_date: Start date in "YYYY-MM-DD" format :param end_date: End date in "YYYY-MM-DD" format :return: Generator for looping through the dates one day at a time. @@ -307,7 +324,7 @@ def filter_days_avg(y: list[float], n: int) -> list[float]: """ Filter y by taking an average over a n-days window. If n = 0, then return y without processing. - Precondition: + Preconditions: - n % 2 == 1 - len(y) > 0 @@ -391,6 +408,9 @@ def json_stringify(obj, indent: Union[int, None] = None) -> str: Serialize json string with support for dataclasses and datetime and sets and with custom configuration. + Preconditions: + - obj != None + :param obj: Objects :param indent: Indent size or none :return: Json strings diff --git a/src/visualization.py b/src/visualization.py index 426f48c..5e394b9 100644 --- a/src/visualization.py +++ b/src/visualization.py @@ -87,7 +87,7 @@ class Sample: To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't post at all. - Precondition: + Preconditions: - Downloaded tweets data are sorted by date """ debug(f'Calculating sample tweets data for {self.name}...')