diff --git a/src/process/twitter_visualization.py b/src/process/twitter_visualization.py
index a52a3f9..231c6dc 100644
--- a/src/process/twitter_visualization.py
+++ b/src/process/twitter_visualization.py
@@ -32,11 +32,16 @@ class Sample:
     user_freqs: list[UserFloat]
     # Total popularity ratios of all posts for each user across all dates (sorted)
     user_pops: list[UserFloat]
-    # Tweets by all users in a sample (always sorted by date)
-    tweets: list[Posting]
+    # Average popularity of all u's posts
+    user_all_pop_avg: dict[str, float]
+    # Average popularity of COVID tweets by a specific user on a specific date
+    # user_covid_tweets_pop[user][date] = Average popularity of COVID-posts by {user} on {date}
+    user_date_covid_pop_avg: dict[str, dict[str, float]]
+    # Total COVID-tweets frequency on a specific date for all users.
+    date_covid_freq: dict[str, float]
     # dates[i] = The i-th day since the first tweet
     dates: list[datetime]
-    # date_freqs[i] = Total frequency of all posts from all users in this sample on date[i]
+    # date_freqs[i] = COVID frequency of all posts from all users in this sample on date[i]
     date_freqs: list[float]
     # date_pops[i] = Average popularity ratio of all posts from all users in this sample on date[i]
     date_pops: list[float]
@@ -67,11 +72,17 @@ class Sample:
 
         To prevent divide-by-zero, we ignored everyone who didn't post about covid and who didn't
         post at all.
+
+        Precondition:
+          - Downloaded tweets data are sorted by date
         """
         debug(f'Calculating sample tweets data for {self.name}...')
         popularity = []
         frequency = []
-        all_tweets: list[Posting] = []
+        date_covid_count = dict()
+        date_all_count = dict()
+        self.user_all_pop_avg = dict()
+        self.user_date_covid_pop_avg = dict()
         for i in range(len(self.users)):
             u = self.users[i]
 
@@ -81,45 +92,72 @@ class Sample:
 
             # Load processed tweet
             tweets = load_tweets(u)
-            # Ignore retweets
-            tweets = [t for t in tweets if not t.repost]
-            all_tweets += tweets
+            # Ignore retweets, and ignore tweets that are earlier than the start of COVID
+            tweets = [t for t in tweets if not t.repost and t.date > '2020-01-01T01:01:01']
             # Filter covid tweets
             covid = [t for t in tweets if t.covid_related]
 
             # To prevent divide by zero, ignore people who didn't post at all
             if len(tweets) == 0:
+                frequency.append(UserFloat(u, 0))
                 continue
             # Calculate the frequency of COVID-related tweets
             freq = len(covid) / len(tweets)
             frequency.append(UserFloat(u, freq))
 
+            # Calculate date fields
+            # Assume tweets are sorted
+            # tweets.sort(key=lambda x: x.date)
+            # Calculate popularity by date
+            date_cp_sum = dict()
+            date_cp_count = dict()
+            for t in tweets:
+                d = t.date[:10]
+
+                # For covid popularity on date
+                if t.covid_related:
+                    if d not in date_cp_sum:
+                        date_cp_sum[d] = 0
+                        date_cp_count[d] = 0
+                    date_cp_sum[d] += t.popularity
+                    date_cp_count[d] += 1
+
+                # For frequency on date
+                if d not in date_covid_count:
+                    date_covid_count[d] = 0
+                    date_all_count[d] = 0
+                if t.covid_related:
+                    date_covid_count[d] += 1
+                date_all_count[d] += 1
+
+            self.user_date_covid_pop_avg[u] = \
+                {d: date_cp_sum[d] / date_cp_count[d] for d in date_cp_sum}
+
+            # Calculate total popularity ratio for a user
             # To prevent divide by zero, ignore everyone who didn't post about covid
             if len(covid) == 0:
                 continue
             # Get the average popularity for COVID-related tweets
-            covid_avg = sum(t.popularity for t in covid) / len(covid)
-            global_avg = sum(t.popularity for t in tweets) / len(tweets)
+            covid_pop_avg = sum(t.popularity for t in covid) / len(covid)
+            all_pop_avg = sum(t.popularity for t in tweets) / len(tweets)
+            # Save global_avg
+            self.user_all_pop_avg[u] = all_pop_avg
             # To prevent divide by zero, ignore everyone who literally have no likes on any post
-            if global_avg == 0:
+            if all_pop_avg == 0:
                 continue
             # Get the relative popularity
-            popularity.append(UserFloat(u, covid_avg / global_avg))
+            popularity.append(UserFloat(u, covid_pop_avg / all_pop_avg))
+
+        # Calculate frequency on date
+        self.date_covid_freq = {d: date_covid_count[d] / date_all_count[d] for d in date_covid_count}
 
         # Sort by relative popularity or frequency
         popularity.sort(key=lambda x: x.data, reverse=True)
         frequency.sort(key=lambda x: x.data, reverse=True)
 
-        # Sort by date, latest first
-        all_tweets.sort(key=lambda x: x.date)
-
-        # Ignore tweets that are earlier than the start of COVID
-        all_tweets = [t for t in all_tweets if t.date > '2020-01-01T01:01:01']
-
         # Assign to sample
         self.user_freqs = frequency
         self.user_pops = popularity
-        self.tweets = all_tweets
         debug('- Done.')
 
     def calculate_change_data(self) -> None:
@@ -136,41 +174,38 @@ class Sample:
 
         :return: None
         """
-        # List indicies are days since the first tweet
-        covid_count = [0]
-        covid_popularity = [0]
-        all_count = [0]
-        all_popularity = [0]
-        current_date = self.tweets[0][:10]
-        i = 0
+        self.dates = []
+        self.date_freqs = []
+        self.date_pops = []
 
-        # Loop through all tweets
-        for tweet in self.tweets:
-            # Move on to the next date
-            tweet_date = tweet.date[:10]
-            if tweet_date != current_date:
-                current_date = tweet_date
-                covid_count.append(0)
-                covid_popularity.append(0)
-                all_count.append(0)
-                all_popularity.append(0)
-                i += 1
+        # Loop through all dates from the start of COVID to when the data is obtained
+        for (ds, dt) in daterange('2020-01-01', '2021-11-25'):
+            self.dates.append(dt)
 
-            # Add current tweet data
-            all_count[i] += 1
-            all_popularity[i] += tweet.popularity
-            if tweet.covid_related:
-                covid_count[i] += 1
-                covid_popularity[i] += tweet.popularity
+            # Convert date covid freq format
+            if ds in self.date_covid_freq:
+                self.date_freqs.append(self.date_covid_freq[ds])
+            else:
+                self.date_freqs.append(0)
 
-        # Calculate frequency and popularity ratio for each date, which will be our y-axis
-        self.date_freqs = divide_zeros(covid_count, all_count)
-        self.date_pops = divide_zeros(divide_zeros(covid_popularity, covid_count),
-                                      divide_zeros(all_popularity, all_count))
+            # Calculate date covid popularity ratio
+            users_posted_today = [u for u in self.users if u in self.user_date_covid_pop_avg and
+                                  ds in self.user_date_covid_pop_avg[u]]
+            if len(users_posted_today) != 0:
+                user_pop_ratio_sum = sum(self.user_date_covid_pop_avg[u][ds] /
+                                         self.user_all_pop_avg[u] for u in users_posted_today
+                                         if self.user_all_pop_avg[u] != 0)
+                pops_i = user_pop_ratio_sum / len(users_posted_today)
 
-        # Convert indicies to dates, which will be our x-axis
-        first_date = parse_date(self.tweets[0].date).replace(hour=0, minute=0, second=0)
-        self.dates = [first_date + timedelta(days=j) for j in range(len(all_count))]
+                if pops_i > 20:
+                    print('Date: ', ds)
+                    for u in users_posted_today:
+                        if self.user_all_pop_avg[u] != 0:
+                            print('-', u, self.user_date_covid_pop_avg[u][ds] /
+                                  self.user_all_pop_avg[u])
+            else:
+                pops_i = 1
+            self.date_pops.append(pops_i)
 
 
 def load_samples() -> list[Sample]:
@@ -215,7 +250,7 @@ def report_ignored(samples: list[Sample]) -> None:
     :return: None
     """
     # For frequencies, report who didn't post
-    table = [["Total users"] + [str(len(s.user_freqs)) for s in samples],
+    table = [["Total users"] + [str(len(s.users)) for s in samples],
              ["Users who didn't post at all"] +
              [str(len([1 for a in s.user_freqs if a.data == 0])) for s in samples],
              ["Users who posted less than 1%"] +
@@ -390,16 +425,6 @@ def report_stats(samples: list[Sample]) -> None:
     Reporter('freq/stats.md').table(table, [s.name for s in samples], True)
 
 
-def view_covid_tweets_date(tweets: list[Posting]):
-    # Graph histogram
-    plt.title(f'COVID posting dates')
-    plt.xticks(rotation=45)
-    plt.yticks(rotation=45)
-    plt.tight_layout()
-    plt.hist([parse_date(t.date) for t in tweets if t.covid_related], bins=40, color='#ffcccc')
-    plt.show()
-
-
 def report_change_different_n(sample: Sample) -> None:
     """
     Experiment wth different n values for IIR filter
@@ -420,6 +445,7 @@ def report_change_graphs(sample: Sample) -> None:
     graph_line_plot(sample.dates, sample.date_freqs, f'change/freq/{sample.name}.png',
                     f'COVID-posting frequency over time for {sample.name} IIR(10)',
                     True, 10)
+    print(sum(sample.date_pops) / len(sample.dates))
 
 
 def report_all() -> None:
diff --git a/src/report/report_document.md b/src/report/report_document.md
index e69f948..558dba0 100644
--- a/src/report/report_document.md
+++ b/src/report/report_document.md
@@ -12,7 +12,7 @@ Our data come from three samples:
 
 ## COVID-19 Posting Frequency
 
-First, we analyzed how frequently the users in these three datasets are posing about COVID-19. Initially, we were expecting that most people will post about COVID-19 because this pandemic is very relevant to every one of us. However, we found that there are many people in our samples didn't post about COVID-19 at all. The following table shows how many people in each sample didn't post or posted less than 1% about COVID-19:
+First, we analyzed how frequently the users in these three datasets are posing about COVID-19 (ignoring retweets). Initially, we were expecting that most people will post about COVID-19 because this pandemic is very relevant to every one of us. However, we found that there are many people in our samples didn't post about COVID-19 at all. The following table shows how many people in each sample didn't post or posted less than 1% about COVID-19:
 
 @include `/freq/didnt-post.md`
 
@@ -80,7 +80,7 @@ $$ \text{freqs}_i = \frac{|\text{COVID-posts on date}_{i}|}{|\text{All posts on
 </blockquote>
 
 <blockquote>
-$$ \text{pops}_i = \left(\frac{\sum\text{Popularity of COVID-posts on date}_i}{|\text{COVID-posts on date}_i|}\right) / \left(\frac{\sum \text{Popularity of all posts on date}_i}{|\text{All posts on date}_i|}\right) $$
+$$ \text{pops}_i = \frac{ \sum_{u \in \text{Users}} \left(\frac{\sum\text{Popularity of u's COVID-posts on date}_i}{(\text{Average popularity of all u's posts}) \cdot |\text{u's COVID-posts on date}_i|}\right)}{(\text{Number of users posted on date}_i)} $$
 </blockquote>
 
 After calculation, we decided to plot line charts of `freqs` or `pops` against `dates`. When we plot the graph without a filter, we found that the graph is actually very noisy as shown in the first graph below. So, we experimented with different filters from the `scipy` library and different parameter values, and chose to use an IIR filter with `n = 10`.
diff --git a/src/utils.py b/src/utils.py
index 371277e..c6a4af0 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -4,9 +4,9 @@ import json
 import os
 import statistics
 from dataclasses import dataclass
-from datetime import datetime, date
+from datetime import datetime, date, timedelta
 from pathlib import Path
-from typing import Union, NamedTuple, Any
+from typing import Union, NamedTuple, Any, Generator
 
 import json5
 import numpy as np
@@ -230,7 +230,7 @@ def tabulate_stats(stats: list[Stats], percent: bool = False) -> list[list[str]]
             ]
 
 
-def parse_date(iso: str) -> datetime:
+def parse_date_time(iso: str) -> datetime:
     """
     Parse date faster. Running 1,000,000 trials, this parse_date function is 4.03 times faster than
     python's built-in dateutil.parser.isoparse() function.
@@ -246,6 +246,34 @@ def parse_date(iso: str) -> datetime:
                     int(iso[11:13]), int(iso[14:16]), int(iso[17:19]))
 
 
+def parse_date_only(iso: str) -> datetime:
+    """
+    Parse date faster.
+
+    Preconditions:
+      - iso is in the format of "YYYY-MM-DD" (e.g. "2021-10-20")
+      - iso is a valid date (this function does not check for the validity of the input)
+
+    :param iso: Input date
+    :return: Datetime object
+    """
+    return datetime(int(iso[:4]), int(iso[5:7]), int(iso[8:10]))
+
+
+def daterange(start_date: str, end_date: str) -> Generator[tuple[str, datetime], None, None]:
+    """
+    Date range for looping
+
+    :param start_date: Start date in "YYYY-MM-DD" format
+    :param end_date: End date in "YYYY-MM-DD" format
+    :return: Generator for looping through the dates one day at a time.
+    """
+    start = parse_date_only(start_date)
+    for n in range(int((parse_date_only(end_date) - start).days)):
+        dt = start + timedelta(n)
+        yield dt.strftime('%Y-%m-%d'), dt
+
+
 def divide_zeros(numerator: list[float], denominator: list[float]) -> list[float]:
     """
     Divide two lists of floats, ignoring zeros (anything dividing by zero will produce zero)