[+] Create main

2021-11-22 11:33:08 -05:00
parent 12327d6aab
commit a95374302a
1 changed files with 35 additions and 0 deletions
@@ -0,0 +1,35 @@
+from tabulate import tabulate
+
+from process.twitter_process import load_users_popularity, process_users_popularity
+from raw_collect.twitter import tweepy_login
+from utils import load_config
+
+if __name__ == '__main__':
+    # Load config and create API
+    conf = load_config('config.json5')
+    api = tweepy_login(conf)
+
+    #####################
+    # Data collection - Step C1
+    # Download a wide range of users from Twitter using follow-chaining starting from a single user.
+    # download_users_start(api, 'sauricat')
+
+    # This task will run for a very very long time to obtain a large dataset of twitter users. If
+    # you want to stop the process, you can resume it later using the following line:
+    # download_users_resume_progress(api)
+
+    #####################
+    # Data processing - Step P1
+    # (After step C1) Process the downloaded twitter users by popularity
+    users = process_users_popularity()
+
+
+
+    # Just curious, who are the 20 most popular individuals on twitter?
+    print(tabulate(((u.username, u.popularity) for u in users[:20]), headers=['Name', 'Followers']))
+
+    #####################
+    # Data collection - Step C2
+    # Download as many posts of the most popular individuals as possible.
+
+