changes
This commit is contained in:
+34
-10
@@ -105,6 +105,21 @@ def getMapping(pathMapping,x):
|
||||
|
||||
|
||||
|
||||
def decode_redis(src):
|
||||
if isinstance(src, list):
|
||||
rv = list()
|
||||
for key in src:
|
||||
rv.append(decode_redis(key))
|
||||
return rv
|
||||
elif isinstance(src, dict):
|
||||
rv = dict()
|
||||
for key in src:
|
||||
rv[key.decode()] = decode_redis(src[key])
|
||||
return rv
|
||||
elif isinstance(src, bytes):
|
||||
return src.decode()
|
||||
else:
|
||||
raise Exception("type not handled: " +type(src))
|
||||
|
||||
def cluster(clusterPath,pairsPath, level):
|
||||
|
||||
@@ -115,11 +130,17 @@ def cluster(clusterPath,pairsPath, level):
|
||||
roots = [i for i in roots if not i.startswith('.')]
|
||||
|
||||
port = REDIS_PORT
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=1)
|
||||
filenames= decode_redis(redis_db.hgetall('filenames'))
|
||||
pairsPath = filenames
|
||||
roots = list(set([i.split('-')[0] for i in filenames.keys()]))
|
||||
if level == 'tokens':
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=3)
|
||||
else:
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
|
||||
|
||||
|
||||
|
||||
keys = redis_db.hkeys("compared")
|
||||
compared = pd.DataFrame(keys, columns=['pairs_key'])
|
||||
compared['pairs_key'] = compared['pairs_key'].apply(lambda x: x.decode())
|
||||
@@ -169,21 +190,24 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''):
|
||||
logging.info('Cluster size %d',len(subgraph.nodes()))
|
||||
cluster.append(subgraph.nodes())
|
||||
cluster
|
||||
pathMapping = dict()
|
||||
if level == 'tokens':
|
||||
indexFile = join(pairsPath, root, s,action+'.index')
|
||||
elif level == 'actions':
|
||||
indexFile = join(pairsPath, root, s + '.index')
|
||||
# else:
|
||||
# indexFile =join(pairsPath, root, s,action,token+'.index')
|
||||
df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
|
||||
pathMapping = df.to_dict()
|
||||
# pathMapping = dict()
|
||||
# if level == 'tokens':
|
||||
# indexFile = join(pairsPath, root, s,action+'.index')
|
||||
# elif level == 'actions':
|
||||
# indexFile = join(pairsPath, root, s + '.index')
|
||||
# # else:
|
||||
# # indexFile =join(pairsPath, root, s,action,token+'.index')
|
||||
# df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
|
||||
# pathMapping = df.to_dict()
|
||||
|
||||
workList = []
|
||||
for idx, clus in enumerate(cluster):
|
||||
logging.info('exporting cluster %s %s %s %d', root,s,action,idx)
|
||||
for f in clus:
|
||||
dumpFile = pathMapping[1][int(f)]
|
||||
# redis_db = redis.StrictRedis(host="localhost", port=6399, db=1)
|
||||
# dumpFile = redis_db.hget("filenames",root+'-'+s+'-'+f)
|
||||
dumpFile = pairsPath[root+'-'+s+'-'+f]
|
||||
# dumpFile = pathMapping[1][int(f)]
|
||||
|
||||
t = dumpFile,root,level,clusterPath,s,action,token,idx
|
||||
workList.append(t)
|
||||
|
||||
@@ -598,7 +598,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True):
|
||||
else:
|
||||
# ax1.set_xticklabels(labels)
|
||||
# ax1.set_xticklabels(None)
|
||||
ax1.set_yticklabels(labels, rotation=45, ha='right')
|
||||
ax1.set_yticklabels(labels, ha='right')
|
||||
ax1.get_yaxis().set_ticklabels(labels)
|
||||
# sns.boxplot(yList, ax=ax1)
|
||||
if limit:
|
||||
@@ -615,7 +615,7 @@ def plotBox(yList,labels, fn, xAxisLabel,yAxisLabel, rotate=False,limit=True):
|
||||
fig = plt.gcf()
|
||||
|
||||
# fig.tight_layout()
|
||||
fig.set_size_inches(7, 1, forward=True)
|
||||
fig.set_size_inches(7, 2, forward=True)
|
||||
fig.savefig(fn, dpi=100, bbox_inches='tight')
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ libtiff, https://gitlab.com/libtiff/libtiff.git
|
||||
FFmpeg, https://github.com/FFmpeg/FFmpeg.git
|
||||
cmake, https://gitlab.kitware.com/cmake/cmake.git
|
||||
redis, https://github.com/antirez/redis.git
|
||||
ompi, https://github.com/open-mpi/ompi.git
|
||||
gzip, https://git.savannah.gnu.org/git/gzip.git
|
||||
libarchive, https://github.com/libarchive/libarchive
|
||||
cairo, https://gitlab.freedesktop.org/cairo/cairo.git
|
||||
@@ -19,15 +18,12 @@ php-src, https://github.com/php/php-src.git
|
||||
gtk, https://gitlab.gnome.org/GNOME/gtk.git
|
||||
gstreamer, https://gitlab.freedesktop.org/gstreamer/gstreamer.git
|
||||
openssh-portable, https://github.com/openssh/openssh-portable.git
|
||||
openssl, https://github.com/openssl/openssl.git
|
||||
wireshark, https://github.com/wireshark/wireshark.git
|
||||
git, https://github.com/git/git.git
|
||||
linux, git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
|
||||
gmp,https://github.com/ryepdx/gmp
|
||||
lighttpd1.4,https://github.com/lighttpd/lighttpd1.4.git
|
||||
lighttpd2,https://github.com/lighttpd/lighttpd2.git
|
||||
xqemu,https://github.com/xqemu/xqemu
|
||||
xqemu,https://github.com/xqemu/xqemu
|
||||
git,https://github.com/git/git
|
||||
gpdb,https://github.com/greenplum-db/gpdb
|
||||
MonetDBLite-C,https://github.com/MonetDB/MonetDBLite-C
|
||||
|
||||
|
@@ -0,0 +1,30 @@
|
||||
from common.commons import *
|
||||
DATA_PATH = os.environ["DATA_PATH"]
|
||||
COMMIT_DFS = os.environ["COMMIT_DFS"]
|
||||
# DATASET_PATH = '/Users/anilkoyuncu/projects/datasets'
|
||||
DATASET_PATH = os.environ["REPO_PATH"]
|
||||
DATASET = os.environ["dataset"]
|
||||
ROOT = os.environ["ROOT_DIR"]
|
||||
PROJECT_LIST = os.environ["PROJECT_LIST"]
|
||||
|
||||
def core():
|
||||
intros = listdir(join(DATA_PATH,'introclass'))
|
||||
intros
|
||||
if not os.path.exists(join(DATA_PATH,'patches','introclass')):
|
||||
os.makedirs(join(DATA_PATH,'patches','introclass'))
|
||||
if not os.path.exists(join(DATA_PATH,'patches','introclass','prevFiles')):
|
||||
os.makedirs(join(DATA_PATH,'patches','introclass','prevFiles'))
|
||||
if not os.path.exists(join(DATA_PATH,'patches','introclass','revFiles')):
|
||||
os.makedirs(join(DATA_PATH,'patches','introclass','revFiles'))
|
||||
if not os.path.exists(join(DATA_PATH,'patches','introclass','DiffEntries')):
|
||||
os.makedirs(join(DATA_PATH,'patches','introclass','DiffEntries'))
|
||||
for i in intros:
|
||||
if i == '.DS_Store':
|
||||
continue
|
||||
_,bugName,bid,submission = i.split(':')
|
||||
|
||||
shutil.copy2(join(DATA_PATH,'introclass',i,bugName+'.c'),join(DATA_PATH,'patches','introclass','prevFiles','prev_'+bid+'_'+bid+submission+'_'+bugName+'.c'))
|
||||
shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c'),join(DATA_PATH,'patches','introclass','revFiles',bid+'_'+bid+submission+'_'+bugName+'.c'))
|
||||
shutil.copy2(join(DATA_PATH,'introclass',i,'oracle.c.patch'),join(DATA_PATH,'patches','introclass','DiffEntries',bid+'_'+bid+submission+'_'+bugName+'.c.txt'))
|
||||
|
||||
|
||||
+13
-10
@@ -8,19 +8,21 @@ INNER_DATA_PATH = join(ROOT,'data')
|
||||
def statsNormal(isFixminer=True):
|
||||
# tokens = join(DATA_PATH, 'tokens')
|
||||
# actions = join(DATA_PATH, 'actions')
|
||||
import redis
|
||||
redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
|
||||
# keys = redis_db.scan(0, match='*', count='1000000')
|
||||
keys = redis_db.hkeys("dump") # hkeys "dump"
|
||||
matches = pd.DataFrame(keys, columns=['pairs_key'])
|
||||
|
||||
# import redis
|
||||
# redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
|
||||
# # keys = redis_db.scan(0, match='*', count='1000000')
|
||||
# keys = redis_db.hkeys("dump") # hkeys "dump"
|
||||
# matches = pd.DataFrame(keys, columns=['pairs_key'])
|
||||
matches = load_zipped_pickle(join(DATA_PATH,'matches.pickle'))
|
||||
# matches = load_zipped_pickle(join(DATA_PATH,'singleHunks'))
|
||||
matches['pairs_key'] = matches['pairs_key'].apply(lambda x: x.decode())
|
||||
matches['root'] = matches['pairs_key'].apply(lambda x: x.split('/')[0])
|
||||
matches['size'] = matches['pairs_key'].apply(lambda x: x.split('/')[1])
|
||||
matches['file'] = matches['pairs_key'].apply(lambda x: x.split('/')[2])
|
||||
matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0])
|
||||
matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1])
|
||||
# matches['repo'] = matches['file'].apply(lambda x: x.split('_')[0])
|
||||
matches['repo'] = matches['file'].apply(lambda x: re.split('_[0-9a-f]{6,40}',x)[0])
|
||||
# matches['commit'] = matches['file'].apply(lambda x: x.split('_')[1])
|
||||
matches['commit'] = matches['file'].apply(lambda x: re.findall('_[0-9a-f]{6,40}',x)[0].replace('_',''))
|
||||
matches['hunk'] = matches['pairs_key'].apply(lambda x: x.split('/')[2].split('_')[-1])
|
||||
matches['fileName'] = matches['pairs_key'].apply(lambda x: '_'.join(x.split('/')[2].split('_')[:-1]))
|
||||
test = matches[['fileName','hunk']]
|
||||
@@ -478,8 +480,9 @@ def exportAbstractPatterns():
|
||||
|
||||
try:
|
||||
dKey = '/'.join(id[0].split('-')[:-1]) + "/" + members[0]
|
||||
lines = redis_db.hget("dump",dKey )
|
||||
|
||||
# lines = redis_db.hget("dump",dKey )
|
||||
lines = redis_db.hget(dKey,'actionTree')
|
||||
lines = redis_db.hget(dKey,'shapeTree')
|
||||
cid = id[0].replace("-",'#')
|
||||
|
||||
abstractPattern(cid,lines.decode(),isJava,members)
|
||||
|
||||
@@ -84,9 +84,13 @@ def testCore(t):
|
||||
# output += '@fail:' + str(pre_failure) + '@total:' + str(total) + ', '
|
||||
|
||||
# spfiles = listdir(join(DATASET, 'cocci'))
|
||||
spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatterns.pickle'))
|
||||
spfiles.sort_values(by='uFreq', inplace=True, ascending=False)
|
||||
spfiles = load_zipped_pickle(join(DATA_PATH, 'uniquePatternsMod.pickle'))
|
||||
spfiles.sort_values(by='uProject', inplace=True, ascending=False)
|
||||
spfiles = spfiles[['uid']]
|
||||
#['uid', 'uFreq', 'uFunction', 'uFilenames', 'uPatch', 'uProject']
|
||||
# spfiles = spfiles[spfiles.uFreq > 2]
|
||||
spfiles = spfiles[['uid']]
|
||||
|
||||
# print("patching... " + bugName)
|
||||
for idx,spfile in enumerate(spfiles.uid.values.tolist()):
|
||||
if spfile == '.DS_Store':
|
||||
@@ -158,7 +162,8 @@ def testCore(t):
|
||||
try:
|
||||
client.shutdown()
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
logging.debug(e)
|
||||
|
||||
# print(myProcess.pid)
|
||||
# os.killpg(myProcess.pid, signal.SIGTERM)
|
||||
# docker stop $(docker ps -q)
|
||||
@@ -184,6 +189,14 @@ def patch_validate():
|
||||
,'introclass:median:d6364e:007','introclass:median:489253:007','introclass:syllables:d12048:004','introclass:smallest:d9e7ea:002','introclass:syllables:035fe9:000'
|
||||
,'introclass:syllables:c9d718:002','introclass:syllables:ea67b8:007','introclass:median:48b829:000','introclass:syllables:d9e7ea:001']
|
||||
|
||||
black = load_zipped_pickle(join(DATA_PATH,'blackBugs'))
|
||||
white = load_zipped_pickle(join(DATA_PATH,'whiteBugs'))
|
||||
|
||||
# nonFail.append('introclass:grade-b1924d-001')
|
||||
# nonFail.append('introclass:smallest-b1924d-002')
|
||||
|
||||
# allFixed = list(set(white).union(black)) #288
|
||||
allFixed = white
|
||||
# cmd = 'bash ' + join(DATA_PATH,'startBugzoo.sh')
|
||||
# cmd = "/Users/anil.koyuncu/anaconda3/envs/python36/bin/bugzood --debug -p " + str(port)
|
||||
# output, errors = shellGitCheckout(cmd)
|
||||
@@ -196,6 +209,9 @@ def patch_validate():
|
||||
|
||||
if b in nonFail:
|
||||
continue
|
||||
|
||||
if b not in allFixed:
|
||||
continue
|
||||
t = b, port
|
||||
bugList.append(t)
|
||||
if port == 6300:
|
||||
@@ -215,9 +231,9 @@ def patch_validate():
|
||||
# t = 'introclass:syllables:99cbb4:000',6000
|
||||
# testCore(t)
|
||||
# results = parallelRunMerge(testCore, bugList,max_workers=10)
|
||||
results = parallelRunMerge(testCore, bugList , max_workers=10)
|
||||
results = parallelRunMerge(testCore, bugList , max_workers=12)
|
||||
print('\n'.join(results))
|
||||
with open(join(DATA_PATH, 'introTestResults'), 'w',
|
||||
with open(join(DATA_PATH, 'introTestResultsWhiteuProject'), 'w',
|
||||
encoding='utf-8') as writeFile:
|
||||
# if levelPatch == 0:
|
||||
writeFile.write('\n'.join(results))
|
||||
|
||||
Reference in New Issue
Block a user