merge python scripts
This commit is contained in:
@@ -0,0 +1,64 @@
|
||||
def createDS(subject):
|
||||
# # if job == 'clone':
|
||||
from commitCollector import *
|
||||
|
||||
caseClone(subject)
|
||||
# # elif job == 'collect':
|
||||
from commitCollector import *
|
||||
|
||||
caseCollect(subject)
|
||||
# # elif job == 'fix':
|
||||
from filterBugFixingCommits import caseFix
|
||||
|
||||
caseFix(subject)
|
||||
#
|
||||
# # elif job =='brDownload':
|
||||
from bugReportDownloader import caseBRDownload
|
||||
|
||||
caseBRDownload(subject)
|
||||
# # elif job =='brParser':
|
||||
from bugReportParser import step1
|
||||
|
||||
step1(subject)
|
||||
|
||||
# elif job =='dataset':
|
||||
|
||||
if not isfile(join(DATA_PATH, 'singleBR.pickle')):
|
||||
|
||||
brs = load_zipped_pickle(join(DATA_PATH, subject + "bugReportsComplete.pickle"))
|
||||
|
||||
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
|
||||
|
||||
def getCommit(x):
|
||||
bid, project = x
|
||||
|
||||
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
repo = subjects.query("Subject == '{0}'".format(project)).Repo.tolist()[0]
|
||||
commits = load_zipped_pickle(join(DATA_PATH, COMMIT_DFS, repo + '.pickle'))
|
||||
correspondingCommit = commits.query("fix =='{0}'".format(bid)).commit.tolist()
|
||||
if len(correspondingCommit) == 1:
|
||||
return [bid, correspondingCommit[0], project]
|
||||
else:
|
||||
return None
|
||||
print('error')
|
||||
|
||||
|
||||
wl = brs[['bid', 'project']].values.tolist()
|
||||
dataL = parallelRunMerge(getCommit, wl)
|
||||
|
||||
commits = pd.DataFrame(
|
||||
columns=['bid', 'commit', 'project'],
|
||||
data=list(filter(None.__ne__, dataL)))
|
||||
|
||||
save_zipped_pickle(commits, join(DATA_PATH, 'singleBR.pickle'))
|
||||
else:
|
||||
commits = load_zipped_pickle(join(DATA_PATH, 'singleBR.pickle'))
|
||||
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
logging.info('done matching commits')
|
||||
commits['repo'] = commits.project.apply(lambda x: subjects.query("Subject == '{0}'".format(x)).Repo.tolist()[0])
|
||||
|
||||
workList = commits[['commit', 'repo']].values.tolist()
|
||||
from dataset import prepareFiles
|
||||
|
||||
parallelRun(prepareFiles, workList)
|
||||
Reference in New Issue
Block a user