From a031e9b53e10f0ad277c16ade1cfbc7dc820f3d2 Mon Sep 17 00:00:00 2001 From: Hykilpikonna Date: Thu, 25 Nov 2021 16:55:28 -0500 Subject: [PATCH] [O] B64 encode markdown --- src/report/report.py | 7 ++++++- src/report/report_document.md | 12 +++++++++--- src/report/report_page.html | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/report/report.py b/src/report/report.py index 271e6e0..20ac94d 100644 --- a/src/report/report.py +++ b/src/report/report.py @@ -1,3 +1,4 @@ +import base64 import os.path import webbrowser from pathlib import Path @@ -67,8 +68,12 @@ def serve_report() -> None: :return: HTML report """ + # Generate markdown report and b64 encode it (this is to prevent interpretation by JS code) + md_b64 = base64.b64encode(generate_report().encode('utf-8')).decode('utf-8') + # Inject into HTML html = read(str(src_dir.joinpath('report_page.html'))) \ - .replace('{{markdown}}', generate_report().replace('`', '\\`')) + .replace('{{markdown}}', md_b64) + # Return return html @app.route('/') diff --git a/src/report/report_document.md b/src/report/report_document.md index 96b83e1..b07eb91 100644 --- a/src/report/report_document.md +++ b/src/report/report_document.md @@ -18,7 +18,7 @@ First, we analyzed how frequently the users in these three datasets are posing a The `eng-news` sample has the lowest number of users who didn't have COVID-related posts, the `500-rand` sample has the highest, while `500-pop` sits in between. This large difference between `eng-news` and the rest can be explained by the news channels' obligation to report news, which includes news about new outbreaks, progress of vaccination, new cross-border policies, etc. Also, we observed that `500-pop` has much more users who posted COVID-related content than `500-rand`, while they have similar amounts of users posting less than 1%. This finding might be explained by how influential people have more incentive to express their support toward slowing the spread of the pandemic than regular users, which doesn't require frequent posting like news channels. -We might graph the frequencies on a histogram to gain more insight: (You can click on the images to enlarge them, and hold down E to view full screen). +We calculated frequency by dividing the total number of tweets by the number of COVID-related tweets. We might graph the frequencies on a histogram to gain more insight: (You can click on the images to enlarge them, and hold down E to view full screen).
hist
@@ -48,7 +48,7 @@ Since there are many outliers, medians and IQR will more accurately represent th Then, we analyzed the popularity ratio of COVID-related posts for our three samples. The popularity of a post defines how much other people are interested in the post, measured by the total number of user interactions (likes and comments) on that post. From that data, the relative popularity ratio for COVID-related posts calculates how popular are COVID-related posts compared to all other posts, calculated by the equation, which is a ratio of the average popularity of both: -$$\\left(\\frac{\\sum\\text{Popularity of COVID-posts}}{\\text{Number of COVID-posts}}\\right) / \\left(\\frac{\\sum \\text{Popularity of all posts}}{\\text{Total number of posts}}\\right)$$ +$$\left(\frac{\sum\text{Popularity of COVID-posts}}{\text{Number of COVID-posts}}\right) / \left(\frac{\sum \text{Popularity of all posts}}{\text{Total number of posts}}\right)$$ There are three divisions in this equation, so there are three possible places where it might divide by zero. So, to prevent division by zero, we ignored people who didn't post about COVID-19 or didn't post anything at all, and we also ignored people who have literally 0 popularity on any of their posts. In our data, we ignored this amount of people for each sample: @@ -67,7 +67,13 @@ Graphing the results, we find that the ***TODO*** # Change Analysis -After we answered how frequently people posted about COVID-19 and how interested are people to view these posts, we analyze our data over the posting dates to answer the second part of our research question: **How does posting frequency and people's interests in COVID-19 posts changes from the beginning of the pandemic to now?** +After we answered how frequently people posted about COVID-19 and how interested are people to view these posts, we analyze our data over the posting dates to answer the second part of our research question: **How does posting frequency and people's interests in COVID-19 posts changes from the beginning of the pandemic to now?** + +## Method + +In this analysis, we defined the start of COVID-19 as `2020-01-01` and ignored all posts prior to this date. Then, we split the dataset into intervals of 7-days for every day since `2020-01-01`, and calculated the frequency and popularity of that interval, which gave us a list of numbers `x` where: + + **_TODO_** diff --git a/src/report/report_page.html b/src/report/report_page.html index 63969fa..433c7fc 100644 --- a/src/report/report_page.html +++ b/src/report/report_page.html @@ -21,7 +21,7 @@ markdown = ` {{markdown}} ` document.getElementById('content').innerHTML = - marked.parse(markdown); + marked.parse(atob(markdown)); // Make images clickable // Improved from: https://stackoverflow.com/a/50430187/7346633