diff --git a/src/main.py b/src/main.py
index af7b9e1..771b818 100644
--- a/src/main.py
+++ b/src/main.py
@@ -20,23 +20,36 @@ if __name__ == '__main__':
 
     #####################
     # Data processing - Step P1
-    # (After step C1) Process the downloaded twitter users by popularity
-    process_users_popularity()
+    # (After step C1) Process the downloaded twitter users, extract screen name, popularity, and
+    # number of tweets data.
+    # process_users_popularity()
 
     #####################
-    # Data collection - Step C2
-    # (After step P1) Load the downloaded twitter users by popularity, and start downloading all
+    # Data processing - Step P2
+    # (After step P1) Select 500 most popular users and 500 random users who meet a particular
+    # criteria as our sample.
+    # select_user_sample()
+
+    #####################
+    # Data collection - Step C2.1
+    # (After step P2) Load the downloaded twitter users by popularity, and start downloading all
     # tweets from 500 of the most popular users.
-    # users = load_users_popularity()[:500]
+    # sample = load_user_sample()
 
     # Just curious, who are the 20 most popular individuals on twitter?
-    # print(tabulate(((u.username, u.popularity) for u in users[:20]),
+    # print(tabulate(((u.username, u.popularity) for u in sample.most_popular[:20]),
     #                headers=['Name', 'Followers']))
 
     # Start download
-    # for u in users:
+    # for u in sample.most_popular:
     #     download_all_tweets(api, u.username)
 
+    #####################
+    # Data collection - Step C2.2
+    # (After step P2) Download all tweets from the 500 randomly selected users
+    for u in load_user_sample().random:
+        download_all_tweets(api, u.username)
+
     #####################
     # Data processing - Step P2
     # (After step C2) Process the downloaded tweets, determine whether they are covid-related