diff --git a/src/main.py b/src/main.py index af7b9e1..771b818 100644 --- a/src/main.py +++ b/src/main.py @@ -20,23 +20,36 @@ if __name__ == '__main__': ##################### # Data processing - Step P1 - # (After step C1) Process the downloaded twitter users by popularity - process_users_popularity() + # (After step C1) Process the downloaded twitter users, extract screen name, popularity, and + # number of tweets data. + # process_users_popularity() ##################### - # Data collection - Step C2 - # (After step P1) Load the downloaded twitter users by popularity, and start downloading all + # Data processing - Step P2 + # (After step P1) Select 500 most popular users and 500 random users who meet a particular + # criteria as our sample. + # select_user_sample() + + ##################### + # Data collection - Step C2.1 + # (After step P2) Load the downloaded twitter users by popularity, and start downloading all # tweets from 500 of the most popular users. - # users = load_users_popularity()[:500] + # sample = load_user_sample() # Just curious, who are the 20 most popular individuals on twitter? - # print(tabulate(((u.username, u.popularity) for u in users[:20]), + # print(tabulate(((u.username, u.popularity) for u in sample.most_popular[:20]), # headers=['Name', 'Followers'])) # Start download - # for u in users: + # for u in sample.most_popular: # download_all_tweets(api, u.username) + ##################### + # Data collection - Step C2.2 + # (After step P2) Download all tweets from the 500 randomly selected users + for u in load_user_sample().random: + download_all_tweets(api, u.username) + ##################### # Data processing - Step P2 # (After step C2) Process the downloaded tweets, determine whether they are covid-related