[U] Rename, restructure

This commit is contained in:
Hykilpikonna
2021-11-24 21:34:34 -05:00
parent 3c2ad5462a
commit cc90af631d
2 changed files with 46 additions and 35 deletions
+6 -6
View File
@@ -99,7 +99,7 @@ def get_user_popularity_ranking(user: str) -> int:
@dataclass()
class Sample:
class UserSample:
"""
This is a data class storing our different samples.
"""
@@ -147,7 +147,7 @@ def select_user_sample() -> None:
sample = random.sample(filtered, 500)
# Save
write(file, json_stringify(Sample(most_popular, sample, get_english_news_channels())))
write(file, json_stringify(UserSample(most_popular, sample, get_english_news_channels())))
def get_english_news_channels() -> list[str]:
@@ -202,16 +202,16 @@ def filter_news_channels() -> None:
write(f'{USER_DIR}/processed/sample.json', json_stringify(sample))
def load_user_sample() -> Sample:
def load_user_sample() -> UserSample:
"""
Load the selected sample
:return: None
"""
j = json.loads(read(f'{USER_DIR}/processed/sample.json'))
return Sample([ProcessedUser(*u) for u in j['most_popular']],
[ProcessedUser(*u) for u in j['random']],
j['english_news'])
return UserSample([ProcessedUser(*u) for u in j['most_popular']],
[ProcessedUser(*u) for u in j['random']],
j['english_news'])
class Posting(NamedTuple):
+40 -29
View File
@@ -53,7 +53,7 @@ class UserFloat:
@dataclass()
class UserSample:
class Sample:
name: str
users: list[str]
frequencies: list[UserFloat] = field(default_factory=list)
@@ -62,20 +62,15 @@ class UserSample:
tweets: list[Posting] = field(default_factory=list)
def view_covid_tweets_freq(users: list[str],
sample_name: str) -> None:
def view_covid_tweets_freq(sample: Sample) -> None:
"""
Visualize the frequency that the sampled users post about COVID. For example, someone who
posted every single tweet about COVID will have a frequency of 1, and someone who doesn't
post about COVID will have a frequency of 0.
:param users: Sample users
:param sample_name: Name of the sample
:param sample: Sample
:return: None
"""
# Init reporter
r = Reporter(f'{REPORT_DIR}/report.report.1-covid-tweet-frequency/{sample_name}.md')
r.print(f"In {sample_name} -")
r = Reporter(f'{REPORT_DIR}/report.report.1-covid-tweet-frequency/{sample.name}.md')
r.print(f"In {sample.name} -")
# How many people didn't post about COVID?
r.print("How many people didn't post about COVID:",
@@ -93,24 +88,17 @@ def view_covid_tweets_freq(users: list[str],
r.save()
# Graph histogram
plt.title(f'COVID-related posting frequency for {sample_name}')
plt.title(f'COVID-related posting frequency for {sample.name}')
plt.xticks(rotation=90)
plt.tight_layout()
plt.hist([f[1] for f in user_frequency], bins=100, color='#ffcccc')
plt.savefig(f'{REPORT_DIR}/report.report.1-covid-tweet-frequency/{sample_name}.png')
plt.savefig(f'{REPORT_DIR}/report.report.1-covid-tweet-frequency/{sample.name}.png')
def view_covid_tweets_pop(users: list[str],
sample_name: str) -> None:
"""
Visualize the relative popularity of the sampled users' posts about COVID. For example, if one
person posted a COVID post and got 1000 likes, while their other posts (including this one) got
an average of 1 like, they will have a relative popularity of 1000. If, on the other hand, one
person posted a COVID post and got 1 like, while their other posts (including this one) got an
average of 1000 likes, they will have a relative popularity of 1/1000.
To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't post
at all.
:param users: Sample users
:param sample_name: Name of the sample
@@ -163,12 +151,44 @@ def view_covid_tweets_pop(users: list[str],
plt.savefig(f'{REPORT_DIR}/2-covid-tweet-popularity/{sample_name}.png')
def load_samples() -> list[Sample]:
"""
Load samples and calculate their data
:return: Samples
"""
# Load sample, convert format
samples = load_user_sample()
samples = [Sample('500-pop', [u.username for u in samples.most_popular]),
Sample('500-rand', [u.username for u in samples.random]),
Sample('eng-news', list(samples.english_news))]
# Calculate frequencies and popularity ratios
for s in samples:
s.frequencies, s.popularity_ratios, s.tweets = calculate_sample_data(s.users)
return samples
def calculate_sample_data(users: list[str]) -> tuple[list[UserFloat], list[UserFloat], list[Posting]]:
"""
This function loads and calculates the frequency that a list of user posts about COVID, and
also calculates their relative popularity of COVID posts.
This function also creates a combined list of all users in a sample
This function also creates a combined list of all users in a sample.
Frequency: the frequency that the sampled users post about COVID. For example, someone who
posted every single tweet about COVID will have a frequency of 1, and someone who doesn't
post about COVID will have a frequency of 0.
Popularity ratio: the relative popularity of the sampled users' posts about COVID. If one
person posted a COVID post and got 1000 likes, while their other posts (including this one) got
an average of 1 like, they will have a relative popularity of 1000. If, on the other hand, one
person posted a COVID post and got 1 like, while their other posts (including this one) got an
average of 1000 likes, they will have a relative popularity of 1/1000.
To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't post
at all.
:param users: Users in a sample
:return: Frequencies, Popularity ratios, Combined tweets list for the sample
@@ -225,15 +245,6 @@ def view_covid_tweets_date(tweets: list[Posting]):
if __name__ == '__main__':
# Load sample, convert format
samples = load_user_sample()
samples = [UserSample('500-pop', [u.username for u in samples.most_popular]),
UserSample('500-rand', [u.username for u in samples.random]),
UserSample('eng-news', list(samples.english_news))]
# Calculate frequencies and popularity ratios
for s in samples:
s.frequencies, s.popularity_ratios, s.tweets = calculate_sample_data(s.users)
view_covid_tweets_freq([u.username for u in samples.most_popular], '500-pop')
# view_covid_tweets_freq(sample.random, '500-rand')