Files
Azalea (on HyDEV-Daisy) 428d5afc58 [U] Update
2022-10-01 13:34:56 -04:00

61 lines
1.9 KiB
Python

import os
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
from dateutil.relativedelta import relativedelta
from pandas import DataFrame
def calculate_stats(date):
string = date.strftime('%Y-%m-%d').split(' ')[0]
files = os.listdir(f"../../data.absolute/{string}/patterns")
array = []
for file in files:
t = int(file.split('#')[1])
print(t)
array.append(t)
df = DataFrame(array)
plt.hist(array)
plt.savefig(f'figs/{string}.png')
return [string, len(df), df.mean(), df.median(), df.max()]
if __name__ == '__main__':
start_string = "2001-06-01"
start_date = datetime.strptime(start_string, '%Y-%m-%d')
interval = relativedelta(months=6)
calculate_stats(start_date)
csv = []
data_path = Path('../../data.absolute')
new = os.listdir(data_path / f"{start_date.strftime('%Y-%m-%d').split(' ')[0]}/patterns")
added = sorted(list(set(new)))
remove = sorted([])
csv.append((calculate_stats(start_date)))
while True:
# end = start + interval
end_date = start_date + interval
end_string = end_date.strftime('%Y-%m-%d').split(' ')[0]
start_string = start_date.strftime('%Y-%m-%d').split(' ')[0]
if not os.path.isdir(data_path / str(end_string)):
# new = os.listdir(data_path / f"{end_string}/patterns")
# added = sorted(list(set(new)))
# remove = sorted([])
# csv.append((end_string, len(new), added, remove))
break
csv.append(calculate_stats(start_date))
start_date += interval
# plt.plot([v[1] for v in csv], [v[2] for v in csv])
# plt.show()
df = DataFrame(csv, columns=('Time', 'Number of Patches',
'Mean', 'Median', 'Max'
))
df.to_csv('Patch-stats-fixminer.csv')