[+] Visualize covid tweets popularity ratio
This commit is contained in:
@@ -51,7 +51,67 @@ def view_covid_tweets_freq(users: list[ProcessedUser],
|
||||
plt.show()
|
||||
|
||||
|
||||
def view_covid_tweets_pop(users: list[ProcessedUser],
|
||||
sample_name: str) -> None:
|
||||
"""
|
||||
Visualize the relative popularity of the sampled users' posts about COVID. For example, if one
|
||||
person posted a COVID post and got 1000 likes, while their other posts (including this one) got
|
||||
an average of 1 like, they will have a relative popularity of 1000. If, on the other hand, one
|
||||
person posted a COVID post and got 1 like, while their other posts (including this one) got an
|
||||
average of 1000 likes, they will have a relative popularity of 1/1000.
|
||||
|
||||
To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't post
|
||||
at all.
|
||||
|
||||
:param users: Sample users
|
||||
:param sample_name: Name of the sample
|
||||
:return: None
|
||||
"""
|
||||
# Load tweets, and get the frequency of covid tweets for each user
|
||||
user_popularity = []
|
||||
for u in users:
|
||||
# Load processed tweet
|
||||
tweets = load_tweets(u.username)
|
||||
# Ignore retweets
|
||||
tweets = [t for t in tweets if not t.repost]
|
||||
# Filter covid tweets
|
||||
covid = [t for t in tweets if t.covid_related]
|
||||
# To prevent divide by zero, ignore everyone who didn't post about covid or who didn't post
|
||||
# at all.
|
||||
if len(covid) == 0 or len(tweets) == 0:
|
||||
continue
|
||||
# Get the average popularity for COVID-related tweets
|
||||
covid_avg = sum(t.popularity for t in covid) / len(covid)
|
||||
global_avg = sum(t.popularity for t in tweets) / len(tweets)
|
||||
# Get the relative popularity
|
||||
user_popularity.append((u.username, covid_avg / global_avg))
|
||||
|
||||
# Sort by relative popularity
|
||||
user_popularity.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# How many people are ignored
|
||||
print(f"In {sample_name} -")
|
||||
print("To prevent division by zero, we ignored people who didn't post about COVID or didn't "
|
||||
f"post at all. We ignored {len(users) - len(user_popularity)} people in this list.")
|
||||
print()
|
||||
|
||||
# Top 20
|
||||
print(f"20 Users of whose COVID-related posts are the most popular:")
|
||||
print(tabulate([[u[0], f'{u[1]:.2f}'] for u in user_popularity[:20]],
|
||||
['Username', 'Popularity Ratio']))
|
||||
|
||||
# Graph histogram
|
||||
plt.title(f'COVID-related popularity ratios for {sample_name}')
|
||||
plt.xticks(rotation=90)
|
||||
plt.tight_layout()
|
||||
plt.hist([f[1] for f in user_popularity], bins=100, color='#ffcccc')
|
||||
plt.axvline([1], color='lightgray')
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sample = load_user_sample()
|
||||
view_covid_tweets_freq(sample.most_popular, '500 most popular Twitter users')
|
||||
view_covid_tweets_freq(sample.random, '500 random Twitter users')
|
||||
# view_covid_tweets_freq(sample.most_popular, '500 most popular Twitter users')
|
||||
# view_covid_tweets_freq(sample.random, '500 random Twitter users')
|
||||
view_covid_tweets_pop(sample.most_popular, '500 most popular Twitter users')
|
||||
view_covid_tweets_pop(sample.random, '500 random Twitter users')
|
||||
|
||||
Reference in New Issue
Block a user