53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from datetime import datetime, timezone, date
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pandas
|
|
from matplotlib import pyplot as plt
|
|
from pandas import DataFrame
|
|
|
|
if __name__ == '__main__':
|
|
path = Path('../../data.relative/0/commitsDF')
|
|
project_list=[p.replace('-fix.pickle.gz','') for p in os.listdir(path) if
|
|
p.endswith('pickle.gz')]
|
|
dates_by_repo: list[list[datetime]] = \
|
|
[list(pandas.read_pickle(path / str(p)).commitDate) for p in os.listdir(path) if
|
|
p.endswith('pickle.gz')]
|
|
dates = np.hstack(dates_by_repo)
|
|
|
|
plt.hist(dates, bins=50)
|
|
plt.xlabel('Date')
|
|
plt.ylabel('Total Commits')
|
|
plt.xlim([date(2001, 1, 1), date(2022, 6, 1)])
|
|
plt.savefig("fm-dates.png")
|
|
plt.show()
|
|
|
|
|
|
first_dates = [cs[-1] for cs in dates_by_repo]
|
|
plt.hist(first_dates, bins=20)
|
|
plt.xlabel('First Commit Date')
|
|
plt.xlim([date(2001, 1, 1), date(2022, 6, 1)])
|
|
#plt.show()
|
|
|
|
last_dates = [cs[0] for cs in dates_by_repo]
|
|
plt.hist(last_dates, bins=20)
|
|
plt.xlabel('Last Commit Date')
|
|
plt.xlim([date(2001, 1, 1), date(2022, 6, 1)])
|
|
#plt.show()
|
|
|
|
mean_dates = [datetime.fromtimestamp(
|
|
float(np.mean([c.replace(tzinfo=timezone.utc).timestamp() for c in cs]))) for cs in
|
|
dates_by_repo]
|
|
plt.hist(mean_dates, bins=20)
|
|
plt.xlabel('Mean Commit Date')
|
|
plt.xlim([date(2001, 1, 1), date(2022, 6, 1)])
|
|
#plt.show()
|
|
csv = []
|
|
for i in range(len(first_dates)):
|
|
csv.append((project_list[i],first_dates[i], last_dates[i]))
|
|
df = DataFrame(csv, columns=('project','first_dates','last commit dates'))
|
|
df.to_csv('commits-dates.csv')
|