From 8eccfee17911d84e82102aa70cd8c07716d0d660 Mon Sep 17 00:00:00 2001 From: anil Date: Thu, 3 Sep 2020 10:11:15 +0200 Subject: [PATCH] changes --- python/abstractPatch.py | 44 ++++++++++++++++++++++++++++--------- python/common/commons.py | 4 ++-- python/data/datasets.csv | 4 ---- python/introDS.py | 30 +++++++++++++++++++++++++ python/stats.py | 23 ++++++++++--------- python/test_patched_file.py | 26 +++++++++++++++++----- 6 files changed, 100 insertions(+), 31 deletions(-) create mode 100644 python/introDS.py diff --git a/python/abstractPatch.py b/python/abstractPatch.py index b6ff80c..45e9aca 100644 --- a/python/abstractPatch.py +++ b/python/abstractPatch.py @@ -105,6 +105,21 @@ def getMapping(pathMapping,x): +def decode_redis(src): + if isinstance(src, list): + rv = list() + for key in src: + rv.append(decode_redis(key)) + return rv + elif isinstance(src, dict): + rv = dict() + for key in src: + rv[key.decode()] = decode_redis(src[key]) + return rv + elif isinstance(src, bytes): + return src.decode() + else: + raise Exception("type not handled: " +type(src)) def cluster(clusterPath,pairsPath, level): @@ -115,11 +130,17 @@ def cluster(clusterPath,pairsPath, level): roots = [i for i in roots if not i.startswith('.')] port = REDIS_PORT + redis_db = redis.StrictRedis(host="localhost", port=port, db=1) + filenames= decode_redis(redis_db.hgetall('filenames')) + pairsPath = filenames + roots = list(set([i.split('-')[0] for i in filenames.keys()])) if level == 'tokens': redis_db = redis.StrictRedis(host="localhost", port=port, db=3) else: redis_db = redis.StrictRedis(host="localhost", port=port, db=2) + + keys = redis_db.hkeys("compared") compared = pd.DataFrame(keys, columns=['pairs_key']) compared['pairs_key'] = compared['pairs_key'].apply(lambda x: x.decode()) @@ -169,21 +190,24 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''): logging.info('Cluster size %d',len(subgraph.nodes())) cluster.append(subgraph.nodes()) cluster - pathMapping = dict() - if level == 'tokens': - indexFile = join(pairsPath, root, s,action+'.index') - elif level == 'actions': - indexFile = join(pairsPath, root, s + '.index') - # else: - # indexFile =join(pairsPath, root, s,action,token+'.index') - df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0]) - pathMapping = df.to_dict() + # pathMapping = dict() + # if level == 'tokens': + # indexFile = join(pairsPath, root, s,action+'.index') + # elif level == 'actions': + # indexFile = join(pairsPath, root, s + '.index') + # # else: + # # indexFile =join(pairsPath, root, s,action,token+'.index') + # df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0]) + # pathMapping = df.to_dict() workList = [] for idx, clus in enumerate(cluster): logging.info('exporting cluster %s %s %s %d', root,s,action,idx) for f in clus: - dumpFile = pathMapping[1][int(f)] + # redis_db = redis.StrictRedis(host="localhost", port=6399, db=1) + # dumpFile = redis_db.hget("filenames",root+'-'+s+'-'+f) + dumpFile = pairsPath[root+'-'+s+'-'+f] + # dumpFile = pathMapping[1][int(f)] t = dumpFile,root,level,clusterPath,s,action,token,idx workList.append(t) diff --git a/python/common/commons.py b/python/common/commons.py index 68056ba..cb94935 100644 --- a/python/common/commons.py +++ b/python/common/commons.py @@ -598,7 +598,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True): else: # ax1.set_xticklabels(labels) # ax1.set_xticklabels(None) - ax1.set_yticklabels(labels, rotation=45, ha='right') + ax1.set_yticklabels(labels, ha='right') ax1.get_yaxis().set_ticklabels(labels) # sns.boxplot(yList, ax=ax1) if limit: @@ -615,7 +615,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True): fig = plt.gcf() # fig.tight_layout() - fig.set_size_inches(7, 1, forward=True) + fig.set_size_inches(7, 2, forward=True) fig.savefig(fn, dpi=100, bbox_inches='tight') diff --git a/python/data/datasets.csv b/python/data/datasets.csv index 70f0248..3b37178 100644 --- a/python/data/datasets.csv +++ b/python/data/datasets.csv @@ -3,7 +3,6 @@ libtiff, https://gitlab.com/libtiff/libtiff.git FFmpeg, https://github.com/FFmpeg/FFmpeg.git cmake, https://gitlab.kitware.com/cmake/cmake.git redis, https://github.com/antirez/redis.git -ompi, https://github.com/open-mpi/ompi.git gzip, https://git.savannah.gnu.org/git/gzip.git libarchive, https://github.com/libarchive/libarchive cairo, https://gitlab.freedesktop.org/cairo/cairo.git @@ -19,15 +18,12 @@ php-src, https://github.com/php/php-src.git gtk, https://gitlab.gnome.org/GNOME/gtk.git gstreamer, https://gitlab.freedesktop.org/gstreamer/gstreamer.git openssh-portable, https://github.com/openssh/openssh-portable.git -openssl, https://github.com/openssl/openssl.git wireshark, https://github.com/wireshark/wireshark.git -git, https://github.com/git/git.git linux, git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git gmp,https://github.com/ryepdx/gmp lighttpd1.4,https://github.com/lighttpd/lighttpd1.4.git lighttpd2,https://github.com/lighttpd/lighttpd2.git xqemu,https://github.com/xqemu/xqemu -xqemu,https://github.com/xqemu/xqemu git,https://github.com/git/git gpdb,https://github.com/greenplum-db/gpdb MonetDBLite-C,https://github.com/MonetDB/MonetDBLite-C diff --git a/python/introDS.py b/python/introDS.py new file mode 100644 index 0000000..cdc2878 --- /dev/null +++ b/python/introDS.py @@ -0,0 +1,30 @@ +from common.commons import * +DATA_PATH = os.environ["DATA_PATH"] +COMMIT_DFS = os.environ["COMMIT_DFS"] +# DATASET_PATH = '/Users/anilkoyuncu/projects/datasets' +DATASET_PATH = os.environ["REPO_PATH"] +DATASET = os.environ["dataset"] +ROOT = os.environ["ROOT_DIR"] +PROJECT_LIST = os.environ["PROJECT_LIST"] + +def core(): + intros = listdir(join(DATA_PATH,'introclass')) + intros + if not os.path.exists(join(DATA_PATH,'patches','introclass')): + os.makedirs(join(DATA_PATH,'patches','introclass')) + if not os.path.exists(join(DATA_PATH,'patches','introclass','prevFiles')): + os.makedirs(join(DATA_PATH,'patches','introclass','prevFiles')) + if not os.path.exists(join(DATA_PATH,'patches','introclass','revFiles')): + os.makedirs(join(DATA_PATH,'patches','introclass','revFiles')) + if not os.path.exists(join(DATA_PATH,'patches','introclass','DiffEntries')): + os.makedirs(join(DATA_PATH,'patches','introclass','DiffEntries')) + for i in intros: + if i == '.DS_Store': + continue + _,bugName,bid,submission = i.split(':') + + shutil.copy2(join(DATA_PATH,'introclass',i,bugName+'.c'),join(DATA_PATH,'patches','introclass','prevFiles','prev_'+bid+'_'+bid+submission+'_'+bugName+'.c')) + shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c'),join(DATA_PATH,'patches','introclass','revFiles',bid+'_'+bid+submission+'_'+bugName+'.c')) + shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c.patch'),join(DATA_PATH,'patches','introclass','DiffEntries',bid+'_'+bid+submission+'_'+bugName+'.c.txt')) + + diff --git a/python/stats.py b/python/stats.py index 2aa5438..b0781db 100644 --- a/python/stats.py +++ b/python/stats.py @@ -8,19 +8,21 @@ INNER_DATA_PATH = join(ROOT,'data') def statsNormal(isFixminer=True): # tokens = join(DATA_PATH, 'tokens') # actions = join(DATA_PATH, 'actions') - import redis - redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0) - # keys = redis_db.scan(0, match='*', count='1000000') - keys = redis_db.hkeys("dump") # hkeys "dump" - matches = pd.DataFrame(keys, columns=['pairs_key']) - + # import redis + # redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0) + # # keys = redis_db.scan(0, match='*', count='1000000') + # keys = redis_db.hkeys("dump") # hkeys "dump" + # matches = pd.DataFrame(keys, columns=['pairs_key']) + matches = load_zipped_pickle(join(DATA_PATH,'matches.pickle')) # matches = load_zipped_pickle(join(DATA_PATH,'singleHunks')) matches['pairs_key'] = matches['pairs_key'].apply(lambda x: x.decode()) matches['root'] = matches['pairs_key'].apply(lambda x: x.split('/')[0]) matches['size'] = matches['pairs_key'].apply(lambda x: x.split('/')[1]) matches['file'] = matches['pairs_key'].apply(lambda x: x.split('/')[2]) - matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0]) - matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1]) + # matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0]) + matches['repo'] = matches['file'].apply(lambda x: re.split('_[0-9a-f]{6,40}',x)[0]) + # matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1]) + matches['commit'] = matches['file'].apply(lambda x: re.findall('_[0-9a-f]{6,40}',x)[0].replace('_','')) matches['hunk'] = matches['pairs_key'].apply(lambda x: x.split('/')[2].split('_')[-1]) matches['fileName'] = matches['pairs_key'].apply(lambda x: '_'.join(x.split('/')[2].split('_')[:-1])) test = matches[['fileName','hunk']] @@ -478,8 +480,9 @@ def exportAbstractPatterns(): try: dKey = '/'.join(id[0].split('-')[:-1]) + "/" + members[0] - lines = redis_db.hget("dump",dKey ) - + # lines = redis_db.hget("dump",dKey ) + lines = redis_db.hget(dKey,'actionTree') + lines = redis_db.hget(dKey,'shapeTree') cid = id[0].replace("-",'#') abstractPattern(cid,lines.decode(),isJava,members) diff --git a/python/test_patched_file.py b/python/test_patched_file.py index 3164077..f79a8a1 100755 --- a/python/test_patched_file.py +++ b/python/test_patched_file.py @@ -84,9 +84,13 @@ def testCore(t): # output += '@fail:' + str(pre_failure) + '@total:' + str(total) + ', ' # spfiles = listdir(join(DATASET, 'cocci')) - spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle')) - spfiles.sort_values(by='uFreq', inplace=True, ascending=False) + spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatternsMod.pickle')) + spfiles.sort_values(by='uProject', inplace=True, ascending=False) spfiles = spfiles[['uid']] + #['uid', 'uFreq', 'uFunction', 'uFilenames', 'uPatch', 'uProject'] + # spfiles = spfiles[spfiles.uFreq > 2] + spfiles = spfiles[['uid']] + # print("patching... " + bugName) for idx,spfile in enumerate(spfiles.uid.values.tolist()): if spfile == '.DS_Store': @@ -158,7 +162,8 @@ def testCore(t): try: client.shutdown() except Exception as e: - logging.error(e) + logging.debug(e) + # print(myProcess.pid) # os.killpg(myProcess.pid, signal.SIGTERM) # docker stop $(docker ps -q) @@ -184,6 +189,14 @@ def patch_validate(): ,'introclass:median:d6364e:007','introclass:median:489253:007','introclass:syllables:d12048:004','introclass:smallest:d9e7ea:002','introclass:syllables:035fe9:000' ,'introclass:syllables:c9d718:002','introclass:syllables:ea67b8:007','introclass:median:48b829:000','introclass:syllables:d9e7ea:001'] + black = load_zipped_pickle(join(DATA_PATH,'blackBugs')) + white = load_zipped_pickle(join(DATA_PATH,'whiteBugs')) + + # nonFail.append('introclass:grade-b1924d-001') + # nonFail.append('introclass:smallest-b1924d-002') + + # allFixed = list(set(white).union(black)) #288 + allFixed = white # cmd = 'bash ' + join(DATA_PATH,'startBugzoo.sh') # cmd = "/Users/anil.koyuncu/anaconda3/envs/python36/bin/bugzood --debug -p " + str(port) # output, errors = shellGitCheckout(cmd) @@ -196,6 +209,9 @@ def patch_validate(): if b in nonFail: continue + + if b not in allFixed: + continue t = b, port bugList.append(t) if port == 6300: @@ -215,9 +231,9 @@ def patch_validate(): # t = 'introclass:syllables:99cbb4:000',6000 # testCore(t) # results = parallelRunMerge(testCore, bugList,max_workers=10) - results = parallelRunMerge(testCore, bugList , max_workers=10) + results = parallelRunMerge(testCore, bugList , max_workers=12) print('\n'.join(results)) - with open(join(DATA_PATH, 'introTestResults'), 'w', + with open(join(DATA_PATH, 'introTestResultsWhiteuProject'), 'w', encoding='utf-8') as writeFile: # if levelPatch == 0: writeFile.write('\n'.join(results))