This commit is contained in:
anil
2020-09-03 10:11:15 +02:00
parent 1b57a9930c
commit 8eccfee179
6 changed files with 100 additions and 31 deletions
+34 -10
View File
@@ -105,6 +105,21 @@ def getMapping(pathMapping,x):
def decode_redis(src):
if isinstance(src, list):
rv = list()
for key in src:
rv.append(decode_redis(key))
return rv
elif isinstance(src, dict):
rv = dict()
for key in src:
rv[key.decode()] = decode_redis(src[key])
return rv
elif isinstance(src, bytes):
return src.decode()
else:
raise Exception("type not handled: " +type(src))
def cluster(clusterPath,pairsPath, level):
@@ -115,11 +130,17 @@ def cluster(clusterPath,pairsPath, level):
roots = [i for i in roots if not i.startswith('.')]
port = REDIS_PORT
redis_db = redis.StrictRedis(host="localhost", port=port, db=1)
filenames= decode_redis(redis_db.hgetall('filenames'))
pairsPath = filenames
roots = list(set([i.split('-')[0] for i in filenames.keys()]))
if level == 'tokens':
redis_db = redis.StrictRedis(host="localhost", port=port, db=3)
else:
redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
keys = redis_db.hkeys("compared")
compared = pd.DataFrame(keys, columns=['pairs_key'])
compared['pairs_key'] = compared['pairs_key'].apply(lambda x: x.decode())
@@ -169,21 +190,24 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''):
logging.info('Cluster size %d',len(subgraph.nodes()))
cluster.append(subgraph.nodes())
cluster
pathMapping = dict()
if level == 'tokens':
indexFile = join(pairsPath, root, s,action+'.index')
elif level == 'actions':
indexFile = join(pairsPath, root, s + '.index')
# else:
# indexFile =join(pairsPath, root, s,action,token+'.index')
df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
pathMapping = df.to_dict()
# pathMapping = dict()
# if level == 'tokens':
# indexFile = join(pairsPath, root, s,action+'.index')
# elif level == 'actions':
# indexFile = join(pairsPath, root, s + '.index')
# # else:
# # indexFile =join(pairsPath, root, s,action,token+'.index')
# df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
# pathMapping = df.to_dict()
workList = []
for idx, clus in enumerate(cluster):
logging.info('exporting cluster %s %s %s %d', root,s,action,idx)
for f in clus:
dumpFile = pathMapping[1][int(f)]
# redis_db = redis.StrictRedis(host="localhost", port=6399, db=1)
# dumpFile = redis_db.hget("filenames",root+'-'+s+'-'+f)
dumpFile = pairsPath[root+'-'+s+'-'+f]
# dumpFile = pathMapping[1][int(f)]
t = dumpFile,root,level,clusterPath,s,action,token,idx
workList.append(t)
+2 -2
View File
@@ -598,7 +598,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True):
else:
# ax1.set_xticklabels(labels)
# ax1.set_xticklabels(None)
ax1.set_yticklabels(labels, rotation=45, ha='right')
ax1.set_yticklabels(labels, ha='right')
ax1.get_yaxis().set_ticklabels(labels)
# sns.boxplot(yList, ax=ax1)
if limit:
@@ -615,7 +615,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True):
fig = plt.gcf()
# fig.tight_layout()
fig.set_size_inches(7, 1, forward=True)
fig.set_size_inches(7, 2, forward=True)
fig.savefig(fn, dpi=100, bbox_inches='tight')
-4
View File
@@ -3,7 +3,6 @@ libtiff, https://gitlab.com/libtiff/libtiff.git
FFmpeg, https://github.com/FFmpeg/FFmpeg.git
cmake, https://gitlab.kitware.com/cmake/cmake.git
redis, https://github.com/antirez/redis.git
ompi, https://github.com/open-mpi/ompi.git
gzip, https://git.savannah.gnu.org/git/gzip.git
libarchive, https://github.com/libarchive/libarchive
cairo, https://gitlab.freedesktop.org/cairo/cairo.git
@@ -19,15 +18,12 @@ php-src, https://github.com/php/php-src.git
gtk, https://gitlab.gnome.org/GNOME/gtk.git
gstreamer, https://gitlab.freedesktop.org/gstreamer/gstreamer.git
openssh-portable, https://github.com/openssh/openssh-portable.git
openssl, https://github.com/openssl/openssl.git
wireshark, https://github.com/wireshark/wireshark.git
git, https://github.com/git/git.git
linux, git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
gmp,https://github.com/ryepdx/gmp
lighttpd1.4,https://github.com/lighttpd/lighttpd1.4.git
lighttpd2,https://github.com/lighttpd/lighttpd2.git
xqemu,https://github.com/xqemu/xqemu
xqemu,https://github.com/xqemu/xqemu
git,https://github.com/git/git
gpdb,https://github.com/greenplum-db/gpdb
MonetDBLite-C,https://github.com/MonetDB/MonetDBLite-C
1 Name GitRepo
3 FFmpeg https://github.com/FFmpeg/FFmpeg.git
4 cmake https://gitlab.kitware.com/cmake/cmake.git
5 redis https://github.com/antirez/redis.git
ompi https://github.com/open-mpi/ompi.git
6 gzip https://git.savannah.gnu.org/git/gzip.git
7 libarchive https://github.com/libarchive/libarchive
8 cairo https://gitlab.freedesktop.org/cairo/cairo.git
18 gtk https://gitlab.gnome.org/GNOME/gtk.git
19 gstreamer https://gitlab.freedesktop.org/gstreamer/gstreamer.git
20 openssh-portable https://github.com/openssh/openssh-portable.git
openssl https://github.com/openssl/openssl.git
21 wireshark https://github.com/wireshark/wireshark.git
git https://github.com/git/git.git
22 linux git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
23 gmp https://github.com/ryepdx/gmp
24 lighttpd1.4 https://github.com/lighttpd/lighttpd1.4.git
25 lighttpd2 https://github.com/lighttpd/lighttpd2.git
26 xqemu https://github.com/xqemu/xqemu
xqemu https://github.com/xqemu/xqemu
27 git https://github.com/git/git
28 gpdb https://github.com/greenplum-db/gpdb
29 MonetDBLite-C https://github.com/MonetDB/MonetDBLite-C
+30
View File
@@ -0,0 +1,30 @@
from common.commons import *
DATA_PATH = os.environ["DATA_PATH"]
COMMIT_DFS = os.environ["COMMIT_DFS"]
# DATASET_PATH = '/Users/anilkoyuncu/projects/datasets'
DATASET_PATH = os.environ["REPO_PATH"]
DATASET = os.environ["dataset"]
ROOT = os.environ["ROOT_DIR"]
PROJECT_LIST = os.environ["PROJECT_LIST"]
def core():
intros = listdir(join(DATA_PATH,'introclass'))
intros
if not os.path.exists(join(DATA_PATH,'patches','introclass')):
os.makedirs(join(DATA_PATH,'patches','introclass'))
if not os.path.exists(join(DATA_PATH,'patches','introclass','prevFiles')):
os.makedirs(join(DATA_PATH,'patches','introclass','prevFiles'))
if not os.path.exists(join(DATA_PATH,'patches','introclass','revFiles')):
os.makedirs(join(DATA_PATH,'patches','introclass','revFiles'))
if not os.path.exists(join(DATA_PATH,'patches','introclass','DiffEntries')):
os.makedirs(join(DATA_PATH,'patches','introclass','DiffEntries'))
for i in intros:
if i == '.DS_Store':
continue
_,bugName,bid,submission = i.split(':')
shutil.copy2(join(DATA_PATH,'introclass',i,bugName+'.c'),join(DATA_PATH,'patches','introclass','prevFiles','prev_'+bid+'_'+bid+submission+'_'+bugName+'.c'))
shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c'),join(DATA_PATH,'patches','introclass','revFiles',bid+'_'+bid+submission+'_'+bugName+'.c'))
shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c.patch'),join(DATA_PATH,'patches','introclass','DiffEntries',bid+'_'+bid+submission+'_'+bugName+'.c.txt'))
+13 -10
View File
@@ -8,19 +8,21 @@ INNER_DATA_PATH = join(ROOT,'data')
def statsNormal(isFixminer=True):
# tokens = join(DATA_PATH, 'tokens')
# actions = join(DATA_PATH, 'actions')
import redis
redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
# keys = redis_db.scan(0, match='*', count='1000000')
keys = redis_db.hkeys("dump") # hkeys "dump"
matches = pd.DataFrame(keys, columns=['pairs_key'])
# import redis
# redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
# # keys = redis_db.scan(0, match='*', count='1000000')
# keys = redis_db.hkeys("dump") # hkeys "dump"
# matches = pd.DataFrame(keys, columns=['pairs_key'])
matches = load_zipped_pickle(join(DATA_PATH,'matches.pickle'))
# matches = load_zipped_pickle(join(DATA_PATH,'singleHunks'))
matches['pairs_key'] = matches['pairs_key'].apply(lambda x: x.decode())
matches['root'] = matches['pairs_key'].apply(lambda x: x.split('/')[0])
matches['size'] = matches['pairs_key'].apply(lambda x: x.split('/')[1])
matches['file'] = matches['pairs_key'].apply(lambda x: x.split('/')[2])
matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0])
matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1])
# matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0])
matches['repo'] = matches['file'].apply(lambda x: re.split('_[0-9a-f]{6,40}',x)[0])
# matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1])
matches['commit'] = matches['file'].apply(lambda x: re.findall('_[0-9a-f]{6,40}',x)[0].replace('_',''))
matches['hunk'] = matches['pairs_key'].apply(lambda x: x.split('/')[2].split('_')[-1])
matches['fileName'] = matches['pairs_key'].apply(lambda x: '_'.join(x.split('/')[2].split('_')[:-1]))
test = matches[['fileName','hunk']]
@@ -478,8 +480,9 @@ def exportAbstractPatterns():
try:
dKey = '/'.join(id[0].split('-')[:-1]) + "/" + members[0]
lines = redis_db.hget("dump",dKey )
# lines = redis_db.hget("dump",dKey )
lines = redis_db.hget(dKey,'actionTree')
lines = redis_db.hget(dKey,'shapeTree')
cid = id[0].replace("-",'#')
abstractPattern(cid,lines.decode(),isJava,members)
+21 -5
View File
@@ -84,9 +84,13 @@ def testCore(t):
# output += '@fail:' + str(pre_failure) + '@total:' + str(total) + ', '
# spfiles = listdir(join(DATASET, 'cocci'))
spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle'))
spfiles.sort_values(by='uFreq', inplace=True, ascending=False)
spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatternsMod.pickle'))
spfiles.sort_values(by='uProject', inplace=True, ascending=False)
spfiles = spfiles[['uid']]
#['uid', 'uFreq', 'uFunction', 'uFilenames', 'uPatch', 'uProject']
# spfiles = spfiles[spfiles.uFreq > 2]
spfiles = spfiles[['uid']]
# print("patching... " + bugName)
for idx,spfile in enumerate(spfiles.uid.values.tolist()):
if spfile == '.DS_Store':
@@ -158,7 +162,8 @@ def testCore(t):
try:
client.shutdown()
except Exception as e:
logging.error(e)
logging.debug(e)
# print(myProcess.pid)
# os.killpg(myProcess.pid, signal.SIGTERM)
# docker stop $(docker ps -q)
@@ -184,6 +189,14 @@ def patch_validate():
,'introclass:median:d6364e:007','introclass:median:489253:007','introclass:syllables:d12048:004','introclass:smallest:d9e7ea:002','introclass:syllables:035fe9:000'
,'introclass:syllables:c9d718:002','introclass:syllables:ea67b8:007','introclass:median:48b829:000','introclass:syllables:d9e7ea:001']
black = load_zipped_pickle(join(DATA_PATH,'blackBugs'))
white = load_zipped_pickle(join(DATA_PATH,'whiteBugs'))
# nonFail.append('introclass:grade-b1924d-001')
# nonFail.append('introclass:smallest-b1924d-002')
# allFixed = list(set(white).union(black)) #288
allFixed = white
# cmd = 'bash ' + join(DATA_PATH,'startBugzoo.sh')
# cmd = "/Users/anil.koyuncu/anaconda3/envs/python36/bin/bugzood --debug -p " + str(port)
# output, errors = shellGitCheckout(cmd)
@@ -196,6 +209,9 @@ def patch_validate():
if b in nonFail:
continue
if b not in allFixed:
continue
t = b, port
bugList.append(t)
if port == 6300:
@@ -215,9 +231,9 @@ def patch_validate():
# t = 'introclass:syllables:99cbb4:000',6000
# testCore(t)
# results = parallelRunMerge(testCore, bugList,max_workers=10)
results = parallelRunMerge(testCore, bugList , max_workers=10)
results = parallelRunMerge(testCore, bugList , max_workers=12)
print('\n'.join(results))
with open(join(DATA_PATH, 'introTestResults'), 'w',
with open(join(DATA_PATH, 'introTestResultsWhiteuProject'), 'w',
encoding='utf-8') as writeFile:
# if levelPatch == 0:
writeFile.write('\n'.join(results))