codeflaws and whitebox

2020-08-23 14:10:43 +02:00
parent cb293de686
commit e4a45bc0ec
4 changed files with 273 additions and 6 deletions
@@ -59,6 +59,11 @@ def exportCore(bugName):
    if not os.path.exists(join(BUGDIR,bugName)):
        os.makedirs(join(BUGDIR,bugName,))

+    cmd = 'docker cp dummy:/experiment/whitebox_test.sh ' + join(BUGDIR,bugName)
+    logging.info(cmd)
+    output, e = shellGitCheckout(cmd)
+    logging.info(output)
+
    cmd = 'docker cp dummy:/experiment/'+ bugName.split(':')[1] + '.c ' + join(BUGDIR,bugName)
    logging.info(cmd)
    output, e = shellGitCheckout(cmd)
@@ -33,7 +33,7 @@ if __name__ == '__main__':

        # subject = 'ALL'
        # rootType = 'if'
-        # job = 'validateIntro'
+        # job = 'validateCodeFlaws'
        print(job)


@@ -41,6 +41,36 @@ if __name__ == '__main__':
            from javaDS import createDS
            createDS()

+        elif job == 'introRes':
+            with open(join(DATA_PATH,'introTestResults186'),'r') as f:
+                lines = f.readlines()
+
+            success = [i for i in lines if i.strip().endswith('success')]
+
+            def getPatterns(x):
+                regex = r"fix (.*) by (.*) times:1, success"
+                matches = re.finditer(regex, x, re.MULTILINE)
+                match = list(matches)
+                fixes = []
+                if len(match) >= 1:
+                    for m in match:
+                        t = m.group(1), m.group(2)
+                        fixes.append(t)
+                return fixes
+
+            success = [getPatterns(i) for i in success]
+            patterns  = pd.DataFrame(columns=['bug','pj','pattern'])
+            for idx,suc in enumerate(success):
+                bug,pattern =suc[0]
+                pj =bug.split(':')[1]
+                patterns.loc[idx] = [bug,pj,pattern.split(pj+'.c')[-1]]
+            patterns
+            summary = patterns.groupby(by=['pj'], as_index=False).agg(lambda x: x.tolist())
+            summary['bCount'] = summary.bug.apply(lambda x:len(x))
+
+
+            success
+
        elif job =='dataset4c':
            from otherDatasets import core
            core()
@@ -168,7 +198,9 @@ if __name__ == '__main__':
        elif job == 'validateMany':
            from patch_validate import patch_validate
            patch_validate()
-
+        elif job == 'validateCodeFlaws':
+            from validateCodeFlaws import validate
+            validate()
        elif job == 'introclass':
            from getIntroClass import export
            export()
@@ -84,7 +84,7 @@ def testCore(t):
                # output += '@fail:' + str(pre_failure) + '@total:' + str(total) + ', '

                # spfiles = listdir(join(DATASET, 'cocci'))
-                spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatternsL.pickle'))
+                spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle'))
                spfiles.sort_values(by='uFreq', inplace=True, ascending=False)
                spfiles = spfiles[['uid']]
                # print("patching... " + bugName)
@@ -92,6 +92,8 @@ def testCore(t):
                    if spfile == '.DS_Store':
                        continue

+                    # spfile ='if_8_102.cocci0-cocci_patches'
+
                    path = join(DATA_PATH,'introclass',bugName)
                    patch = patchSourceFile(path,spfile,bugName)

@@ -120,8 +122,15 @@ def testCore(t):
                    # output += '@True@'
                    output += '@True:' + str(idx) + ':' + patch.split('/')[-1] + '@'
                    # print("Second_test:", end=' ')
-                    post_test_outcomes = {}
-                    post_failure_cases, post_failure, total, post_test_outcomes = test_all(bug, container, client)
+
+
+                    #black
+                    # post_test_outcomes = {}
+                    # post_failure_cases, post_failure, total, post_test_outcomes = test_all(bug, container, client)
+
+                    validTests = readTestSuite(join(path, 'whitebox_test.sh'))
+                    post_failure_cases, post_failure, total = test_all_white(bug, container, client,validTests)
+
                    # print("{}".format(post_failure), end=' ')
                    output += str(post_failure) + ' '
                    if post_failure == 0:
@@ -206,7 +215,7 @@ def patch_validate():
    # t = 'introclass:syllables:99cbb4:000',6000
    # testCore(t)
    # results = parallelRunMerge(testCore, bugList,max_workers=10)
-    results = parallelRunMerge(testCore, bugList)
+    results = parallelRunMerge(testCore, bugList , max_workers=10)
    print('\n'.join(results))
    with open(join(DATA_PATH, 'introTestResults'), 'w',
              encoding='utf-8') as writeFile:
@@ -219,6 +228,44 @@ def patch_validate():

 from bugzoo import Patch, Client

+def readTestSuite(testPath):
+    regex = r"([p|n0-9]+)\)"
+    with open(testPath,mode='r') as testFile:
+        test_str = testFile.read()
+    matches = re.finditer(regex, test_str, re.MULTILINE)
+
+    testList = []
+    for matchNum, match in enumerate(matches, start=1):
+
+         for groupNum in range(0, len(match.groups())):
+            groupNum = groupNum + 1
+            testList.append(match.group(groupNum))
+    return testList
+
+def test_all_white(bug, container,  client,validTests):
+    test_outcomes = {}  # type: Dict[TestCase, TestOutcome]
+    failure_cases = []
+    failure = 0
+    total = len(validTests)
+    for test in validTests:
+        # if test.name in validTests:
+        cmd = './whitebox_test.sh {}'.format(test)
+        out = client.containers.exec(container=container, command=cmd, context='/experiment/')
+
+        if 'passed' not in out.output or out.code != 0:
+            failure += 1
+            failure_cases.append(test)
+            # test_outcomes.append(out.output)
+            break
+
+        # test_outcomes[test] = client.containers.test(container, test)
+        # if test.expected_outcome != test_outcomes[test].passed:
+        # if test_outcomes[test].passed != True:
+        #     failure.append(test.name)
+        #     failure_cases.append(test.command)
+        #     break
+    return failure_cases, failure, total
+
 def test_all(bug, container,  client):
    test_outcomes = {}  # type: Dict[TestCase, TestOutcome]
    failure_cases = []
@@ -0,0 +1,183 @@
+import bugzoo
+from bugzoo import server, Container
+import csv
+import os
+from common.commons import *
+DATA_PATH = os.environ["DATA_PATH"]
+ROOT_DIR = os.environ["ROOT_DIR"]
+DATASET = os.environ["dataset"]
+COCCI_PATH = join(os.environ["coccinelle"],'spatch')
+def patchSourceFile(bugPath,spfile,bugName):
+    # print(bugPath)
+    # srcName = bugPath.split('/')[-1].split('-')[0]
+    srcPath = bugPath
+    patchName = bugName
+
+
+
+    if(isfile(join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c'))):
+        return join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c')
+
+    if not (isfile(join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt'))):
+        cmd = COCCI_PATH + ' --sp-file ' + join(DATASET, 'cocci', spfile) + ' ' + srcPath + ' --patch -o' + join(
+            DATA_PATH, "codeflaws", bugName, 'patches', patchName) + ' > ' + join(DATA_PATH, "codeflaws", bugName,
+                                                                                   'patches',
+                                                                                   patchName + spfile + '.txt')
+
+        output, e = shellGitCheckout(cmd)
+    # logging.info(output)
+    patchSize = os.path.getsize(join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt'))
+    if patchSize == 0 :
+        # os.remove(join(DATA_PATH,"introclass",bugName,'patches',patchName+spfile+'.txt'))
+        return None
+    else:
+
+        cmd = 'patch -d '+'/'.join(srcPath.split('/')[:-1])+' -i '+join(DATA_PATH,"codeflaws",bugName,'patches',patchName+spfile+'.txt')+' -o '+join(DATA_PATH,"codeflaws",bugName,'patched',patchName+spfile+'.c')
+        o,e = shellGitCheckout(cmd)
+        return join(DATA_PATH, "codeflaws", bugName, 'patched', patchName + spfile + '.c')
+
+
+def readTestSuite(testPath):
+    regex = r"([p|n0-9]+)\)"
+    with open(testPath,mode='r') as testFile:
+        test_str = testFile.read()
+    matches = re.finditer(regex, test_str, re.MULTILINE)
+
+    testList = []
+    for matchNum, match in enumerate(matches, start=1):
+
+         for groupNum in range(0, len(match.groups())):
+            groupNum = groupNum + 1
+            testList.append(match.group(groupNum))
+    return testList
+
+def test_all(testerPath,validTests):
+    test_outcomes = {}  # type: Dict[TestCase, TestOutcome]
+    failure_cases = []
+    failure = 0
+    total = len(validTests)
+    for test in validTests:
+        # if test.name in validTests:
+        cmd ='bash ' + testerPath + ' {}'.format(test)
+        out,e = shellGitCheckout(cmd)
+        # out = client.containers.exec(container=container, command=cmd, context='/experiment/')
+
+        if 'Accepted' not in out or e != '':
+            failure += 1
+            failure_cases.append(test)
+            # test_outcomes.append(out.output)
+            break
+
+        # test_outcomes[test] = client.containers.test(container, test)
+        # if test.expected_outcome != test_outcomes[test].passed:
+        # if test_outcomes[test].passed != True:
+        #     failure.append(test.name)
+        #     failure_cases.append(test.command)
+        #     break
+    return failure_cases, failure, total
+
+def validateCore(bugName):
+
+    if not os.path.exists(join(DATA_PATH, 'codeflaws', bugName, 'patches')):
+        os.makedirs(join(DATA_PATH, 'codeflaws', bugName, 'patches'))
+    if not os.path.exists(join(join(DATA_PATH, 'codeflaws', bugName, 'patched'))):
+        os.makedirs(join(DATA_PATH, 'codeflaws', bugName, 'patched'))
+
+    fix = 'failure'
+    output = ''
+    # print("bugName: {}".format(bugName), end=' ')
+    output += 'bugName:' + bugName + ', '
+
+    # spfiles = listdir(join(DATASET, 'cocci'))
+    spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle'))
+
+    spfiles['uProjects'] = spfiles.uFiles.apply(lambda x: list(set([i.split('/{')[0].replace('(','') for i in x])))
+    spfiles[~spfiles.uProjects.apply(lambda x: np.all([i == 'codeflaws' for i in x]))]
+    spfiles = spfiles[['uid']]
+
+
+    cmd = 'make -C ' + join(DATA_PATH, 'codeflaws', bugName) + ' clean'
+    o, e = shellGitCheckout(cmd)
+    # print("patching... " + bugName)
+
+    contestid, problem, _, buggyId, acceptedId = bugName.split('-')
+
+
+    # for idx, spfile in enumerate(spfiles):
+    for idx, spfile in enumerate(spfiles.uid.values.tolist()):
+        if spfile == '.DS_Store':
+            continue
+
+        # originalBugs = get_filepaths(join(DATA_PATH, 'manybugs', bugName, 'diffs'), preId)
+        buggyFileName = contestid+'-'+problem+'-'+buggyId+'.c'
+        path = join(DATA_PATH,'codeflaws',bugName,buggyFileName)
+        patch = patchSourceFile(path, spfile, bugName)
+
+        times = 0
+        if patch is None:
+            continue
+
+        shutil.copy2(patch,join(DATA_PATH, 'codeflaws', bugName))
+
+        cmd = 'make -C ' + join(DATA_PATH, 'codeflaws', bugName) + ' FILENAME=' + bugName + spfile
+        o, e = shellGitCheckout(cmd)
+
+
+        # patch_result = output
+        # TODO logic
+        # if patch_result.successful:
+        if isfile(join(DATA_PATH,'codeflaws',bugName,bugName+spfile)):
+
+            output += '@True:' + str(idx) + ':' + patch.split('/')[-1] + '@'
+
+            validTests = readTestSuite(join(DATA_PATH, 'codeflaws', bugName, 'test-valid.sh'))
+            post_failure_cases, post_failure, total = test_all(join(DATA_PATH, 'codeflaws', bugName, 'test-valid.sh'), validTests)
+
+            # print("{}".format(post_failure), end=' ')
+            output += str(post_failure) + ' '
+            if post_failure == 0:
+                times += 1
+                fix = 'success'
+                # print("fix {} by {}".format(bugName, patch_name))
+                output += 'fix {} by {} '.format(bugName, patch)
+                break
+            # print("@fail:{}@total:{}".format(post_failure, total),end=' ')
+            # print("@post_failure_cases:{}".format(post_failure_cases))
+
+            # cmd = 'docker rm -fv {}'.format(container.id)
+            # out, e = shellGitCheckout(cmd)
+
+    output += 'times:{}, '.format(times) + fix
+    print(output)
+    return output
+
+    #         failure_cases, failure, total, test_outcomes = test_all(bug, container, client)
+    #         if failure == 0:
+    #             fix = 'success'
+    #             # print("fix {} by {}".format(bugName, patch_name))
+    #             output += 'fix {} by {} '.format(bugName, patch)
+    #             break
+    #         else:
+    #             output += ' {}'.format(failure_cases)
+    #     else:
+    #         output += '@False:' + str(idx) + ':' + patch.split('/')[-1] + '@'
+    # output += 'times:{}, '.format(times) + fix
+    #
+    # return output
+
+def validate():
+     bugs2test= listdir(join(DATA_PATH, 'codeflaws'))
+
+     bugList = []
+     for b in bugs2test:
+         if b == '.DS_Store' or b == 'README.md' or b == 'codeflaws-defect-detail-info.txt':
+             continue
+         bugList.append(b)
+
+     # results = parallelRunMerge(testCore, bugList,max_workers=10)
+     results = parallelRunMerge(validateCore, bugList)
+     print('\n'.join(results))
+     with open(join(DATA_PATH, 'codeFlawsResults'), 'w',
+               encoding='utf-8') as writeFile:
+         writeFile.write('\n'.join(results))
+         validateCore(b)