diff --git a/src/process/twitter_process.py b/src/process/twitter_process.py index 6777c73..cbcc27a 100644 --- a/src/process/twitter_process.py +++ b/src/process/twitter_process.py @@ -1,3 +1,4 @@ +import os import random from dataclasses import dataclass from typing import NamedTuple @@ -210,6 +211,18 @@ def process_tweets(tweets_dir: str = './data/twitter/user-tweets/') -> None: debug(f'Processed: {filename}') +def load_tweets(tweets_dir: str, username: str) -> list[Posting]: + """ + Load tweets for a specific user + + :param tweets_dir: Tweets directory + :param username: User's screen name + :return: User's processed tweets + """ + return [Posting(*p) for p in json.loads(read( + os.path.join(tweets_dir, f'processed/{username}.json')))] + + def is_covid_related(text: str) -> bool: """ Is a tweet / article covid-related. Currently, this is done through keyword matching. Even