64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
def createDS(subject):
|
|
# # if job == 'clone':
|
|
from commitCollector import *
|
|
|
|
caseClone(subject)
|
|
# # elif job == 'collect':
|
|
from commitCollector import *
|
|
|
|
caseCollect(subject)
|
|
# # elif job == 'fix':
|
|
from filterBugFixingCommits import caseFix
|
|
|
|
caseFix(subject)
|
|
#
|
|
# # elif job =='brDownload':
|
|
from bugReportDownloader import caseBRDownload
|
|
|
|
caseBRDownload(subject)
|
|
# # elif job =='brParser':
|
|
from bugReportParser import step1
|
|
|
|
step1(subject)
|
|
|
|
# elif job =='dataset':
|
|
|
|
if not isfile(join(DATA_PATH, 'singleBR.pickle')):
|
|
|
|
brs = load_zipped_pickle(join(DATA_PATH, subject + "bugReportsComplete.pickle"))
|
|
|
|
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
|
|
|
|
|
def getCommit(x):
|
|
bid, project = x
|
|
|
|
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
|
repo = subjects.query("Subject == '{0}'".format(project)).Repo.tolist()[0]
|
|
commits = load_zipped_pickle(join(DATA_PATH, COMMIT_DFS, repo + '.pickle'))
|
|
correspondingCommit = commits.query("fix =='{0}'".format(bid)).commit.tolist()
|
|
if len(correspondingCommit) == 1:
|
|
return [bid, correspondingCommit[0], project]
|
|
else:
|
|
return None
|
|
print('error')
|
|
|
|
|
|
wl = brs[['bid', 'project']].values.tolist()
|
|
dataL = parallelRunMerge(getCommit, wl)
|
|
|
|
commits = pd.DataFrame(
|
|
columns=['bid', 'commit', 'project'],
|
|
data=list(filter(None.__ne__, dataL)))
|
|
|
|
save_zipped_pickle(commits, join(DATA_PATH, 'singleBR.pickle'))
|
|
else:
|
|
commits = load_zipped_pickle(join(DATA_PATH, 'singleBR.pickle'))
|
|
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
|
logging.info('done matching commits')
|
|
commits['repo'] = commits.project.apply(lambda x: subjects.query("Subject == '{0}'".format(x)).Repo.tolist()[0])
|
|
|
|
workList = commits[['commit', 'repo']].values.tolist()
|
|
from dataset import prepareFiles
|
|
|
|
parallelRun(prepareFiles, workList) |