diff --git a/python/getIntroClass.py b/python/getIntroClass.py index d47358e..8d33911 100644 --- a/python/getIntroClass.py +++ b/python/getIntroClass.py @@ -59,6 +59,11 @@ def exportCore(bugName): if not os.path.exists(join(BUGDIR,bugName)): os.makedirs(join(BUGDIR,bugName,)) + cmd = 'docker cp dummy:/experiment/whitebox_test.sh ' + join(BUGDIR,bugName) + logging.info(cmd) + output, e = shellGitCheckout(cmd) + logging.info(output) + cmd = 'docker cp dummy:/experiment/'+ bugName.split(':')[1] + '.c ' + join(BUGDIR,bugName) logging.info(cmd) output, e = shellGitCheckout(cmd) diff --git a/python/main.py b/python/main.py index 531d9ec..a94fa00 100644 --- a/python/main.py +++ b/python/main.py @@ -33,7 +33,7 @@ if __name__ == '__main__': # subject = 'ALL' # rootType = 'if' - # job = 'validateIntro' + # job = 'validateCodeFlaws' print(job) @@ -41,6 +41,36 @@ if __name__ == '__main__': from javaDS import createDS createDS() + elif job == 'introRes': + with open(join(DATA_PATH,'introTestResults186'),'r') as f: + lines = f.readlines() + + success = [i for i in lines if i.strip().endswith('success')] + + def getPatterns(x): + regex = r"fix (.*) by (.*) times:1, success" + matches = re.finditer(regex, x, re.MULTILINE) + match = list(matches) + fixes = [] + if len(match) >= 1: + for m in match: + t = m.group(1), m.group(2) + fixes.append(t) + return fixes + + success = [getPatterns(i) for i in success] + patterns = pd.DataFrame(columns=['bug','pj','pattern']) + for idx,suc in enumerate(success): + bug,pattern =suc[0] + pj =bug.split(':')[1] + patterns.loc[idx] = [bug,pj,pattern.split(pj+'.c')[-1]] + patterns + summary = patterns.groupby(by=['pj'], as_index=False).agg(lambda x: x.tolist()) + summary['bCount'] = summary.bug.apply(lambda x:len(x)) + + + success + elif job =='dataset4c': from otherDatasets import core core() @@ -168,7 +198,9 @@ if __name__ == '__main__': elif job == 'validateMany': from patch_validate import patch_validate patch_validate() - + elif job == 'validateCodeFlaws': + from validateCodeFlaws import validate + validate() elif job == 'introclass': from getIntroClass import export export() diff --git a/python/test_patched_file.py b/python/test_patched_file.py index b1f5246..3164077 100755 --- a/python/test_patched_file.py +++ b/python/test_patched_file.py @@ -84,7 +84,7 @@ def testCore(t): # output += '@fail:' + str(pre_failure) + '@total:' + str(total) + ', ' # spfiles = listdir(join(DATASET, 'cocci')) - spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatternsL.pickle')) + spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle')) spfiles.sort_values(by='uFreq', inplace=True, ascending=False) spfiles = spfiles[['uid']] # print("patching... " + bugName) @@ -92,6 +92,8 @@ def testCore(t): if spfile == '.DS_Store': continue + # spfile ='if_8_102.cocci0-cocci_patches' + path = join(DATA_PATH,'introclass',bugName) patch = patchSourceFile(path,spfile,bugName) @@ -120,8 +122,15 @@ def testCore(t): # output += '@True@' output += '@True:' + str(idx) + ':' + patch.split('/')[-1] + '@' # print("Second_test:", end=' ') - post_test_outcomes = {} - post_failure_cases, post_failure, total, post_test_outcomes = test_all(bug, container, client) + + + #black + # post_test_outcomes = {} + # post_failure_cases, post_failure, total, post_test_outcomes = test_all(bug, container, client) + + validTests = readTestSuite(join(path, 'whitebox_test.sh')) + post_failure_cases, post_failure, total = test_all_white(bug, container, client,validTests) + # print("{}".format(post_failure), end=' ') output += str(post_failure) + ' ' if post_failure == 0: @@ -206,7 +215,7 @@ def patch_validate(): # t = 'introclass:syllables:99cbb4:000',6000 # testCore(t) # results = parallelRunMerge(testCore, bugList,max_workers=10) - results = parallelRunMerge(testCore, bugList) + results = parallelRunMerge(testCore, bugList , max_workers=10) print('\n'.join(results)) with open(join(DATA_PATH, 'introTestResults'), 'w', encoding='utf-8') as writeFile: @@ -219,6 +228,44 @@ def patch_validate(): from bugzoo import Patch, Client +def readTestSuite(testPath): + regex = r"([p|n0-9]+)\)" + with open(testPath,mode='r') as testFile: + test_str = testFile.read() + matches = re.finditer(regex, test_str, re.MULTILINE) + + testList = [] + for matchNum, match in enumerate(matches, start=1): + + for groupNum in range(0, len(match.groups())): + groupNum = groupNum + 1 + testList.append(match.group(groupNum)) + return testList + +def test_all_white(bug, container, client,validTests): + test_outcomes = {} # type: Dict[TestCase, TestOutcome] + failure_cases = [] + failure = 0 + total = len(validTests) + for test in validTests: + # if test.name in validTests: + cmd = './whitebox_test.sh {}'.format(test) + out = client.containers.exec(container=container, command=cmd, context='/experiment/') + + if 'passed' not in out.output or out.code != 0: + failure += 1 + failure_cases.append(test) + # test_outcomes.append(out.output) + break + + # test_outcomes[test] = client.containers.test(container, test) + # if test.expected_outcome != test_outcomes[test].passed: + # if test_outcomes[test].passed != True: + # failure.append(test.name) + # failure_cases.append(test.command) + # break + return failure_cases, failure, total + def test_all(bug, container, client): test_outcomes = {} # type: Dict[TestCase, TestOutcome] failure_cases = [] diff --git a/python/validateCodeFlaws.py b/python/validateCodeFlaws.py new file mode 100644 index 0000000..829a8c9 --- /dev/null +++ b/python/validateCodeFlaws.py @@ -0,0 +1,183 @@ +import bugzoo +from bugzoo import server, Container +import csv +import os +from common.commons import * +DATA_PATH = os.environ["DATA_PATH"] +ROOT_DIR = os.environ["ROOT_DIR"] +DATASET = os.environ["dataset"] +COCCI_PATH = join(os.environ["coccinelle"],'spatch') +def patchSourceFile(bugPath,spfile,bugName): + # print(bugPath) + # srcName = bugPath.split('/')[-1].split('-')[0] + srcPath = bugPath + patchName = bugName + + + + if(isfile(join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c'))): + return join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c') + + if not (isfile(join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt'))): + cmd = COCCI_PATH + ' --sp-file ' + join(DATASET, 'cocci', spfile) + ' ' + srcPath + ' --patch -o' + join( + DATA_PATH, "codeflaws", bugName, 'patches', patchName) + ' > ' + join(DATA_PATH, "codeflaws", bugName, + 'patches', + patchName + spfile + '.txt') + + output, e = shellGitCheckout(cmd) + # logging.info(output) + patchSize = os.path.getsize(join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt')) + if patchSize == 0 : + # os.remove(join(DATA_PATH,"introclass",bugName,'patches',patchName+spfile+'.txt')) + return None + else: + + cmd = 'patch -d '+'/'.join(srcPath.split('/')[:-1])+' -i '+join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt')+' -o '+join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c') + o,e = shellGitCheckout(cmd) + return join(DATA_PATH, "codeflaws", bugName, 'patched', patchName + spfile + '.c') + + +def readTestSuite(testPath): + regex = r"([p|n0-9]+)\)" + with open(testPath,mode='r') as testFile: + test_str = testFile.read() + matches = re.finditer(regex, test_str, re.MULTILINE) + + testList = [] + for matchNum, match in enumerate(matches, start=1): + + for groupNum in range(0, len(match.groups())): + groupNum = groupNum + 1 + testList.append(match.group(groupNum)) + return testList + +def test_all(testerPath,validTests): + test_outcomes = {} # type: Dict[TestCase, TestOutcome] + failure_cases = [] + failure = 0 + total = len(validTests) + for test in validTests: + # if test.name in validTests: + cmd ='bash ' + testerPath + ' {}'.format(test) + out,e = shellGitCheckout(cmd) + # out = client.containers.exec(container=container, command=cmd, context='/experiment/') + + if 'Accepted' not in out or e != '': + failure += 1 + failure_cases.append(test) + # test_outcomes.append(out.output) + break + + # test_outcomes[test] = client.containers.test(container, test) + # if test.expected_outcome != test_outcomes[test].passed: + # if test_outcomes[test].passed != True: + # failure.append(test.name) + # failure_cases.append(test.command) + # break + return failure_cases, failure, total + +def validateCore(bugName): + + if not os.path.exists(join(DATA_PATH, 'codeflaws', bugName, 'patches')): + os.makedirs(join(DATA_PATH, 'codeflaws', bugName, 'patches')) + if not os.path.exists(join(join(DATA_PATH, 'codeflaws', bugName, 'patched'))): + os.makedirs(join(DATA_PATH, 'codeflaws', bugName, 'patched')) + + fix = 'failure' + output = '' + # print("bugName: {}".format(bugName), end=' ') + output += 'bugName:' + bugName + ', ' + + # spfiles = listdir(join(DATASET, 'cocci')) + spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle')) + + spfiles['uProjects'] = spfiles.uFiles.apply(lambda x: list(set([i.split('/{')[0].replace('(','') for i in x]))) + spfiles[~spfiles.uProjects.apply(lambda x: np.all([i == 'codeflaws' for i in x]))] + spfiles = spfiles[['uid']] + + + cmd = 'make -C ' + join(DATA_PATH, 'codeflaws', bugName) + ' clean' + o, e = shellGitCheckout(cmd) + # print("patching... " + bugName) + + contestid, problem, _, buggyId, acceptedId = bugName.split('-') + + + # for idx, spfile in enumerate(spfiles): + for idx, spfile in enumerate(spfiles.uid.values.tolist()): + if spfile == '.DS_Store': + continue + + # originalBugs = get_filepaths(join(DATA_PATH, 'manybugs', bugName, 'diffs'), preId) + buggyFileName = contestid+'-'+problem+'-'+buggyId+'.c' + path = join(DATA_PATH,'codeflaws',bugName,buggyFileName) + patch = patchSourceFile(path, spfile, bugName) + + times = 0 + if patch is None: + continue + + shutil.copy2(patch,join(DATA_PATH, 'codeflaws', bugName)) + + cmd = 'make -C ' + join(DATA_PATH, 'codeflaws', bugName) + ' FILENAME=' + bugName + spfile + o, e = shellGitCheckout(cmd) + + + # patch_result = output + # TODO logic + # if patch_result.successful: + if isfile(join(DATA_PATH,'codeflaws',bugName,bugName+spfile)): + + output += '@True:' + str(idx) + ':' + patch.split('/')[-1] + '@' + + validTests = readTestSuite(join(DATA_PATH, 'codeflaws', bugName, 'test-valid.sh')) + post_failure_cases, post_failure, total = test_all(join(DATA_PATH, 'codeflaws', bugName, 'test-valid.sh'), validTests) + + # print("{}".format(post_failure), end=' ') + output += str(post_failure) + ' ' + if post_failure == 0: + times += 1 + fix = 'success' + # print("fix {} by {}".format(bugName, patch_name)) + output += 'fix {} by {} '.format(bugName, patch) + break + # print("@fail:{}@total:{}".format(post_failure, total),end=' ') + # print("@post_failure_cases:{}".format(post_failure_cases)) + + # cmd = 'docker rm -fv {}'.format(container.id) + # out, e = shellGitCheckout(cmd) + + output += 'times:{}, '.format(times) + fix + print(output) + return output + + # failure_cases, failure, total, test_outcomes = test_all(bug, container, client) + # if failure == 0: + # fix = 'success' + # # print("fix {} by {}".format(bugName, patch_name)) + # output += 'fix {} by {} '.format(bugName, patch) + # break + # else: + # output += ' {}'.format(failure_cases) + # else: + # output += '@False:' + str(idx) + ':' + patch.split('/')[-1] + '@' + # output += 'times:{}, '.format(times) + fix + # + # return output + +def validate(): + bugs2test= listdir(join(DATA_PATH, 'codeflaws')) + + bugList = [] + for b in bugs2test: + if b == '.DS_Store' or b == 'README.md' or b == 'codeflaws-defect-detail-info.txt': + continue + bugList.append(b) + + # results = parallelRunMerge(testCore, bugList,max_workers=10) + results = parallelRunMerge(validateCore, bugList) + print('\n'.join(results)) + with open(join(DATA_PATH, 'codeFlawsResults'), 'w', + encoding='utf-8') as writeFile: + writeFile.write('\n'.join(results)) + validateCore(b) \ No newline at end of file