[+] Create visualization script

This commit is contained in:
Hykilpikonna
2021-11-23 21:14:17 -05:00
parent c91ac3433d
commit 464ab9502e
+48
View File
@@ -0,0 +1,48 @@
from matplotlib import pyplot as plt
from tabulate import tabulate
from process.twitter_process import *
def view_covid_tweets_freq(users: list[ProcessedUser],
sample_name: str,
tweets_dir: str = './data/twitter/user-tweets/'):
tweets_dir = normalize_directory(tweets_dir)
# Load tweets, and get the frequency of covid tweets for each user
user_frequency = []
for u in users:
# Load processed tweet
tweets = load_tweets(tweets_dir, u.username)
# Get the frequency of COVID-related tweets
freq = len([1 for t in tweets if t.covid_related]) / len(tweets)
user_frequency.append((u.username, freq))
# Sort by frequency
user_frequency.sort(key=lambda x: x[1], reverse=True)
# How many people didn't post about COVID?
print(f"In {sample_name} -")
print("How many people didn't post about COVID:",
len([a for a in user_frequency if a[1] == 0]))
print("How many people have less than 1% of their posts about COVID:",
len([a for a in user_frequency if a[1] <= 0.01]))
print()
# Top 20
print(f"20 Users of who post COVID-related tweets most frequently:")
print(tabulate([[u[0], f'{u[1] * 100:.1f}%'] for u in user_frequency[:20]],
['Username', 'Frequency']))
# Graph histogram
plt.title(f'COVID-related posting frequency for {sample_name}')
plt.xticks(rotation=90)
plt.tight_layout()
plt.hist([f[1] for f in user_frequency], bins=100, color='#ffcccc')
plt.show()
if __name__ == '__main__':
sample = load_user_sample()
view_covid_tweets_freq(sample.most_popular, '500 most popular Twitter users')
view_covid_tweets_freq(sample.random, '500 random Twitter users')