[+] Implement @include-cut
This commit is contained in:
@@ -234,7 +234,7 @@ def report_histograms(sample: Sample) -> None:
|
||||
x = [f.data for f in sample.frequencies]
|
||||
title = f'COVID-related posting frequency for {sample.name}'
|
||||
report_histogram(x, f'freq/{sample.name}-hist-outliers.png', title, False, 100)
|
||||
x = [p for p in x if p > 0.0005]
|
||||
x = [p for p in x if p > 0.001]
|
||||
report_histogram(x, f'freq/{sample.name}-hist.png', title, True)
|
||||
|
||||
x = [f.data for f in sample.popularity_ratios]
|
||||
@@ -259,7 +259,7 @@ def report_stats(samples: list[Sample]) -> None:
|
||||
Reporter('pop/stats.md').table(table, [s.name for s in samples], True)
|
||||
|
||||
xs = [[d.data for d in s.frequencies if d.data > 0.0005] for s in samples]
|
||||
table = tabulate_stats([get_statistics(remove_outliers(x)) for x in xs])
|
||||
table = tabulate_stats([get_statistics(x) for x in xs], percent=True)
|
||||
Reporter('freq/stats.md').table(table, [s.name for s in samples], True)
|
||||
|
||||
|
||||
@@ -289,7 +289,7 @@ def report_all() -> None:
|
||||
debug('Creating reports...')
|
||||
|
||||
report_ignored(samples)
|
||||
report_pop_stats(samples)
|
||||
report_stats(samples)
|
||||
for s in samples:
|
||||
report_top_20_tables(s)
|
||||
report_histograms(s)
|
||||
|
||||
+10
-3
@@ -24,9 +24,16 @@ def generate_report() -> str:
|
||||
for i in range(len(md)):
|
||||
line = md[i]
|
||||
if line.startswith('@include'):
|
||||
line = line[line.index('`') + 1:]
|
||||
line = line[:line.index('`')]
|
||||
md[i] = read(REPORT_DIR + line)
|
||||
path = line[line.index('`') + 1:]
|
||||
path = path[:path.index('`')]
|
||||
md[i] = read(REPORT_DIR + path)
|
||||
|
||||
if line.startswith('@include-cut'):
|
||||
args = [int(i) for i in line.split()[2:]]
|
||||
if len(args) == 1:
|
||||
md[i] = '\n'.join(md[i].split('\n')[args[0]:])
|
||||
if len(args) == 2:
|
||||
md[i] = '\n'.join(md[i].split('\n')[args[0]:args[1]])
|
||||
|
||||
return '\n'.join(md)
|
||||
|
||||
|
||||
@@ -24,6 +24,10 @@ We might graph the frequencies on a histogram to gain more insight: (You can cli
|
||||
<div><img src="/freq/eng-news-hist-outliers.png" alt="hist"></div>
|
||||
</div>
|
||||
|
||||
However, as you can see, the graphs are not very helpful because the majority of the sample post below 0.1%, and there are many outliers who post very frequently, like 40%. For example, if we sort the samples by their frequency, we have a few outliers who post more than 20% even in the random sample:
|
||||
|
||||
@include-cut `/freq/500-rand-top-20.md` 0 10
|
||||
|
||||
## COVID-19 Popularity Ratios
|
||||
|
||||
To prevent division by zero, we ignored people who didn't post about COVID or didn't post at all.
|
||||
|
||||
+8
-4
@@ -187,16 +187,20 @@ def get_statistics(points: list[float]) -> Stats:
|
||||
return Stats(statistics.mean(points), statistics.median(points), statistics.stdev(points))
|
||||
|
||||
|
||||
def tabulate_stats(stats: list[Stats]) -> list[list[str]]:
|
||||
def tabulate_stats(stats: list[Stats], percent: bool = False) -> list[list[str]]:
|
||||
"""
|
||||
Create a table structure from statistics for tabulate
|
||||
|
||||
:param stats: Statistics
|
||||
:param percent: Whether the numbers are percentages
|
||||
:return: Table for tabulate
|
||||
"""
|
||||
return [['Mean'] + [f'{s.mean:.2f}' for s in stats],
|
||||
['Median'] + [f'{s.median:.2f}' for s in stats],
|
||||
['StdDev'] + [f'{s.stddev:.2f}' for s in stats]]
|
||||
def num(n: float) -> str:
|
||||
return f'{n:.2f}' if not percent else f'{n * 100:.1f}%'
|
||||
|
||||
return [['Mean'] + [num(s.mean) for s in stats],
|
||||
['Median'] + [num(s.median) for s in stats],
|
||||
['StdDev'] + [num(s.stddev) for s in stats]]
|
||||
|
||||
|
||||
def parse_date(iso: str) -> datetime:
|
||||
|
||||
Reference in New Issue
Block a user