diff --git a/python/common/commons.py b/python/common/commons.py index a081168..68056ba 100644 --- a/python/common/commons.py +++ b/python/common/commons.py @@ -569,7 +569,7 @@ def unique_everseen(iterable, key=None): seen_add(k) yield element -def plotBox(yList,labels, fn, rotate=False,limit=True): +def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True): import matplotlib matplotlib.use("TkAgg") import matplotlib.pyplot as plt @@ -583,7 +583,7 @@ def plotBox(yList,labels, fn, rotate=False,limit=True): flierprops = dict(markeredgecolor ='black',markerfacecolor= 'black',marker='.',markersize=2) - box = ax1.boxplot(yList, 0, flierprops=flierprops,widths=0.5, showmeans=False, vert=True,meanprops=meanpointsprops) + box = ax1.boxplot(yList, 0, flierprops=flierprops,widths=0.5, showmeans=True, vert=False,meanprops=meanpointsprops) for line in box['medians']: x,y = line.get_xydata()[1] line.set(linewidth=3) @@ -598,15 +598,17 @@ def plotBox(yList,labels, fn, rotate=False,limit=True): else: # ax1.set_xticklabels(labels) # ax1.set_xticklabels(None) - ax1.get_xaxis().set_ticklabels([]) + ax1.set_yticklabels(labels, rotation=45, ha='right') + ax1.get_yaxis().set_ticklabels(labels) # sns.boxplot(yList, ax=ax1) if limit: - ax1.set_ylim(top=1.1,bottom=0) + ax1.set_xlim(left=0) ax1.yaxis.set_ticks([0.0,1.0]) else: - ax1.set_yscale('log') - ax1.set_xlabel('Cluster Member Size') - ax1.set_ylabel('Folds') + # ax1.set_yscale('log') + ax1.set_xlim(left=0) + ax1.set_xlabel(xAxisLabel) + ax1.set_ylabel(yAxisLabel) plt.ion() plt.subplots_adjust(wspace=0, hspace=0) diff --git a/python/main.py b/python/main.py index a94fa00..8c1516d 100644 --- a/python/main.py +++ b/python/main.py @@ -33,7 +33,7 @@ if __name__ == '__main__': # subject = 'ALL' # rootType = 'if' - # job = 'validateCodeFlaws' + job = 'validateIntro' print(job) @@ -42,29 +42,55 @@ if __name__ == '__main__': createDS() elif job == 'introRes': - with open(join(DATA_PATH,'introTestResults186'),'r') as f: - lines = f.readlines() - success = [i for i in lines if i.strip().endswith('success')] + def readResultFile(resFile): + with open(join(DATA_PATH,resFile),'r') as f: + lines = f.readlines() - def getPatterns(x): - regex = r"fix (.*) by (.*) times:1, success" - matches = re.finditer(regex, x, re.MULTILINE) - match = list(matches) - fixes = [] - if len(match) >= 1: - for m in match: - t = m.group(1), m.group(2) - fixes.append(t) - return fixes + success = [i for i in lines if i.strip().endswith('success')] + patchCandidates = {} + def getPatterns(x): + regex = r"fix (.*) by (.*) times:1, success" + matches = re.finditer(regex, x, re.MULTILINE) + match = list(matches) + fixes = [] + if len(match) >= 1: + patchCandidates[x.split(',')[0]] =re.findall(r"@True[a-zA-z\:0-9\.\-\_]+@", x, re.MULTILINE) + for m in match: + t = m.group(1), m.group(2) + fixes.append(t) + return fixes - success = [getPatterns(i) for i in success] + success = [getPatterns(i) for i in success] + aDf = pd.DataFrame(columns=['bid','candidates']) + idx = 0 + for k,v in patchCandidates.items(): + aDf.loc[idx] = [k,[v]] + idx+=1 + aDf['noTested'] = aDf.candidates.apply(lambda x: len(x[0])) + aDf['pos'] = aDf.candidates.apply(lambda x: x[0][-1].split(':')[1]) + aDf['pos'] = aDf['pos'].apply(lambda x: int(x)) + return aDf,success + + aDf,success =readResultFile('introTestResults186') + bDf,success =readResultFile('introTestResultsWhite') + + plotBox([aDf['noTested'].values.tolist(),bDf['noTested'].values.tolist()], ['blackbox','whitebox'] ,'test.pdf',yAxisLabel='',xAxisLabel='Position of the first plausible patch', limit=False) patterns = pd.DataFrame(columns=['bug','pj','pattern']) for idx,suc in enumerate(success): bug,pattern =suc[0] pj =bug.split(':')[1] patterns.loc[idx] = [bug,pj,pattern.split(pj+'.c')[-1]] - patterns + fixPatterns = patterns.groupby(by=['pattern'], as_index=False).agg(lambda x: x.tolist()) + fixPatterns['count'] = fixPatterns.bug.apply(lambda x: len(x)) + fixPatterns.sort_values(by=['count'], ascending=False, inplace=True) + fixPatterns[['pattern','count']].to_latex(index=False) + + for i in fixPatterns.pattern.values.tolist(): + i = re.findall(r"((.*)\.c$)", i, re.MULTILINE)[0][1] + + shutil.copy2(join(DATA_PATH, 'patches', 'cocci', i), join(DATA_PATH, 'white', i)) + summary = patterns.groupby(by=['pj'], as_index=False).agg(lambda x: x.tolist()) summary['bCount'] = summary.bug.apply(lambda x:len(x)) @@ -74,6 +100,10 @@ if __name__ == '__main__': elif job =='dataset4c': from otherDatasets import core core() + + elif job =='datasetIntro': + from introDS import core + core() elif job =='richedit': dbDir = join(DATA_PATH, 'redis') stopDB(dbDir, REDIS_PORT) @@ -102,7 +132,7 @@ if __name__ == '__main__': elif job == 'cluster': from abstractPatch import cluster - dbDir = join(DATA_PATH, 'redis') + dbDir = join(ROOT_DIR,'data','redis') startDB(dbDir, REDIS_PORT, PROJECT_TYPE) cluster(join(DATA_PATH,'actions'),join(DATA_PATH, 'pairs'),'actions')