From ea27d0fec2a0d656ac72fae638e3c23a091b3cac Mon Sep 17 00:00:00 2001 From: Hykilpikonna Date: Wed, 24 Nov 2021 15:44:08 -0500 Subject: [PATCH] [+] Visualize covid tweets popularity ratio --- src/process/twitter_visualization.py | 64 +++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/src/process/twitter_visualization.py b/src/process/twitter_visualization.py index 37f233c..6cc5581 100644 --- a/src/process/twitter_visualization.py +++ b/src/process/twitter_visualization.py @@ -51,7 +51,67 @@ def view_covid_tweets_freq(users: list[ProcessedUser], plt.show() +def view_covid_tweets_pop(users: list[ProcessedUser], + sample_name: str) -> None: + """ + Visualize the relative popularity of the sampled users' posts about COVID. For example, if one + person posted a COVID post and got 1000 likes, while their other posts (including this one) got + an average of 1 like, they will have a relative popularity of 1000. If, on the other hand, one + person posted a COVID post and got 1 like, while their other posts (including this one) got an + average of 1000 likes, they will have a relative popularity of 1/1000. + + To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't post + at all. + + :param users: Sample users + :param sample_name: Name of the sample + :return: None + """ + # Load tweets, and get the frequency of covid tweets for each user + user_popularity = [] + for u in users: + # Load processed tweet + tweets = load_tweets(u.username) + # Ignore retweets + tweets = [t for t in tweets if not t.repost] + # Filter covid tweets + covid = [t for t in tweets if t.covid_related] + # To prevent divide by zero, ignore everyone who didn't post about covid or who didn't post + # at all. + if len(covid) == 0 or len(tweets) == 0: + continue + # Get the average popularity for COVID-related tweets + covid_avg = sum(t.popularity for t in covid) / len(covid) + global_avg = sum(t.popularity for t in tweets) / len(tweets) + # Get the relative popularity + user_popularity.append((u.username, covid_avg / global_avg)) + + # Sort by relative popularity + user_popularity.sort(key=lambda x: x[1], reverse=True) + + # How many people are ignored + print(f"In {sample_name} -") + print("To prevent division by zero, we ignored people who didn't post about COVID or didn't " + f"post at all. We ignored {len(users) - len(user_popularity)} people in this list.") + print() + + # Top 20 + print(f"20 Users of whose COVID-related posts are the most popular:") + print(tabulate([[u[0], f'{u[1]:.2f}'] for u in user_popularity[:20]], + ['Username', 'Popularity Ratio'])) + + # Graph histogram + plt.title(f'COVID-related popularity ratios for {sample_name}') + plt.xticks(rotation=90) + plt.tight_layout() + plt.hist([f[1] for f in user_popularity], bins=100, color='#ffcccc') + plt.axvline([1], color='lightgray') + plt.show() + + if __name__ == '__main__': sample = load_user_sample() - view_covid_tweets_freq(sample.most_popular, '500 most popular Twitter users') - view_covid_tweets_freq(sample.random, '500 random Twitter users') + # view_covid_tweets_freq(sample.most_popular, '500 most popular Twitter users') + # view_covid_tweets_freq(sample.random, '500 random Twitter users') + view_covid_tweets_pop(sample.most_popular, '500 most popular Twitter users') + view_covid_tweets_pop(sample.random, '500 random Twitter users')