enabled back token iteration

This commit is contained in:
mimic
2020-06-09 14:51:19 +02:00
parent aa2a17bc4a
commit a26275cf4a
12 changed files with 256 additions and 464 deletions
+26 -21
View File
@@ -6,6 +6,8 @@ from common.commons import *
DATA_PATH = os.environ["DATA_PATH"]
DATASET = os.environ["dataset"]
jdk8 = os.environ["JDK8"]
REDIS_PORT = os.environ["REDIS_PORT"]
# def localPairCore(aTuple):
# redis_db = redis.StrictRedis(host="localhost", port=6380, db=1)
# idx, key = aTuple
@@ -54,12 +56,15 @@ def loadPairMulti(root,clusterPath,level):
# root = 'BreakStatement'
logging.info(root)
port = 6399
port = REDIS_PORT
# if isfile(clusterPath +"/"+root+".pickle"):
# return load_zipped_pickle(clusterPath +"/"+root+".pickle")
# else:
# redis_db = redis.StrictRedis(host="localhost", port=port, db=1) #L1
redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
if level == 'tokens':
redis_db = redis.StrictRedis(host="localhost", port=port, db=3)
else:
redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
keys = redis_db.scan(0, match=root+'-*', count='100000000')
# keys = redis_db.hkeys("dump")
@@ -81,11 +86,11 @@ def loadPairMulti(root,clusterPath,level):
matches['path2']=matches['pairs'].apply(lambda x:x[1])
# matches['sizes']=matches['pairs_key'].apply(lambda x:x.split('_')[0].split('-')[1])
matches['sizes']=matches['pairs_key'].apply(lambda x:x.split(root)[1].split('/')[0].split('-')[1])
if level == 'actions':
matches['actions']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
if level == 'tokens':
matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])
matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
# if level == 'tokens':
# matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
# matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])
# save_zipped_pickle(matches,clusterPath +"/"+root+".pickle")
@@ -119,19 +124,19 @@ def cluster(clusterPath,pairsPath, level):
for s in sizes:
match = matches[matches['sizes'] == s]
if level == 'actions':
actions = match['actions'].unique().tolist()
if level == 'tokens':
actions = match['tokens'].unique().tolist()
for action in actions:
match = match[match['actions'] == action]
match = match[match['tokens'] == action]
clusterCore(clusterPath, level, match, pairsPath, root, s,action)
elif level == 'tokens':
actions = match['actions'].unique().tolist()
for action in actions:
match = match[match['actions'] == action]
tokens = match['tokens'].unique().tolist()
for token in tokens:
match = match[match['tokens']==token]
clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
# elif level == 'tokens':
# actions = match['actions'].unique().tolist()
# for action in actions:
# match = match[match['actions'] == action]
# tokens = match['tokens'].unique().tolist()
# for token in tokens:
# match = match[match['tokens']==token]
# clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
else:
clusterCore(clusterPath, level, match, pairsPath, root, s,'')
@@ -158,12 +163,12 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''):
cluster.append(subgraph.nodes())
cluster
pathMapping = dict()
if level == 'actions':
if level == 'tokens':
indexFile = join(pairsPath, root, s,action+'.index')
elif level == 'shapes':
elif level == 'actions':
indexFile = join(pairsPath, root, s + '.index')
else:
indexFile =join(pairsPath, root, s,action,token+'.index')
# else:
# indexFile =join(pairsPath, root, s,action,token+'.index')
df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
pathMapping = df.to_dict()
+15 -67
View File
@@ -40,9 +40,7 @@ if __name__ == '__main__':
if job == 'dataset4j':
from javaDS import createDS
createDS()
# elif job == 'linuxDS':
# from linuxDataset import collectBugFixPatches
# collectBugFixPatches()
elif job =='dataset4c':
from otherDatasets import core
core()
@@ -53,20 +51,16 @@ if __name__ == '__main__':
output = shellCallTemplate(cmd)
logging.info(output)
# elif job =='loadRES':
# cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " LOAD " + rootType
# output = shellCallTemplate(cmd)
# logging.info(output)
elif job =='shapeSI':
from pairs import shapePairs
matches = shapePairs()
elif job =='actionSI':
from pairs import actionPairs
matches = actionPairs()
from pairs import createPairs
createPairs(matches)
from pairs import importShape
importShape()
from pairs import importAction
importAction()
elif job =='compare':
# cmd = "mvn exec:java -f '/data/fixminer_source/' -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
@@ -74,65 +68,28 @@ if __name__ == '__main__':
output = shellCallTemplate4jar(cmd)
logging.info(output)
# elif job == 'clusterAdditional':
# from addNewData import cluster
# cluster()
elif job == 'cluster':
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, "6399", PROJECT_TYPE)
cluster(join(DATA_PATH,'shapes'),join(DATA_PATH, 'pairs'),'shapes')
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster(join(DATA_PATH,'actions'),join(DATA_PATH, 'pairs'),'actions')
# elif job =='actionSI':
# from pairs import actionPairs
# actionPairs(rootType)
#
# # elif job =='importActionPairs':
# from pairs import importAction
# importAction(rootType)
#
# elif job =='compareActions':
# # cmd = "JAVA_HOME='"+jdk8+"' java -Xmx8096m -Djava.util.concurrent.ForkJoinPool.common.parallelism=64 -jar "+ join(DATA_PATH,'CompareTrees.jar') + " action " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl1-gumInputALL.rdb"
#
# cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L2'
# output = shellCallTemplate(cmd)
# logging.info(output)
#
# elif job == 'clusterActions':
# from abstractPatch import cluster
#
# dbDir = join(DATA_PATH, 'redis')
# startDB(dbDir, "6399", PROJECT_TYPE)
# cluster( join(DATA_PATH, 'actions'),join(DATA_PATH, 'pairsAction'),'actions',rootType)
elif job == 'tokenSI':
elif job =='tokenSI':
from pairs import tokenPairs
tokenPairs()
from pairs import importToken
importToken()
elif job == 'compareTokens':
# cmd = "JAVA_HOME='"+jdk8+"' java -jar "+ join(DATA_PATH,'CompareTrees.jar') + " token " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl2-gumInputALL.rdb"
cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L3'
output = shellCallTemplate(cmd)
logging.info(output)
from pairs import importTokens
importTokens()
elif job == 'clusterTokens':
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, "6399", PROJECT_TYPE)
startDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'),'tokens')
stopDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster( join(DATA_PATH, 'tokens'),join(DATA_PATH, 'pairsToken'),'tokens')
# elif job == 'additional':
# from addNewData import core
# core()
# # from addNewData import checkWrongMembers
# # checkWrongMembers()
elif job == 'codeflaws':
from otherDatasets import codeflaws
@@ -218,19 +175,10 @@ if __name__ == '__main__':
elif job == 'defects4j':
from stats import defects4jStats
defects4jStats()
elif job == 'exportPatterns':
elif job == 'patterns':
from stats import exportAbstractPatterns
exportAbstractPatterns()
elif job =='export':
patternPath = join(DATA_PATH,'actions','ExpressionStatement','3','0','0')
patterns = listdir(patternPath)
for pattern in patterns:
repo = pattern.split('_')[0]
file = pattern.replace(repo+'_','')
print(file)
filename = file.rsplit('_',1)[0]
print(join(DATA_PATH,'gumInput',repo,'DiffEntries',filename))
break
else:
logging.error('Unknown job %s',job)
+29 -145
View File
@@ -3,50 +3,20 @@ DATA_PATH = os.environ["DATA_PATH"]
ROOT = os.environ["ROOT_DIR"]
INNER_DATA_PATH = join(ROOT,'data')
PROJECT_TYPE = os.environ["PROJECT_TYPE"]
def importToken():
# global dbDir, portInner, redis_db, pairs, cluster, action, e, idx, v, key
dbDir = join(INNER_DATA_PATH, 'redis')
portInner = '6380'
dbDir = join(INNER_DATA_PATH, 'redis')
startDB(dbDir, portInner, "clusterl2-gumInputALL.rdb")
import redis
pairsToken = join(DATA_PATH, 'pairsToken')
redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
pairs = get_filepaths(pairsToken, '.txt')
for pair in pairs:
split = pair.split("/")
REDIS_PORT = os.environ["REDIS_PORT"]
shapeName = split[-4]
shapeSize = split[-3]
cluster = split[-2]
action = split[-1].replace('.txt', '')
# cmd ="bash " + join(DATA_PATH,'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-"+sizeCluster+"-"+actionCluster ;#+, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
cmd = "bash " + join(INNER_DATA_PATH,
'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + shapeSize + "-" + cluster + "-" + action; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
o, e = shellGitCheckout(cmd)
o
indexFile = pair.replace('.txt', '.index')
with open(indexFile, 'r') as iFile:
idx = iFile.readlines()
for i in idx:
k, v = i.split(',')
key = shapeName + "-" + shapeSize + "-" + cluster + "-" + action + "-" + k
# redis_db.set(key, v.strip())
redis_db.hset('filenames', key, v.strip())
def importAction(rootType):
def importTokens():
# global dbDir, portInner, redis_db, pairs, pair, split, shapeName, shapeSize, cluster, cmd, o, e, indexFile, iFile, idx, i, k, v, key
dbDir = join(INNER_DATA_PATH, 'redis')
# portInner = '6380'
# startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
portInner = '6399'
portInner = REDIS_PORT
startDB(dbDir, portInner, PROJECT_TYPE)
import redis
# import pairs
pairsAction = join(DATA_PATH, 'pairsAction',rootType)
pairsAction = join(DATA_PATH, 'pairsToken')
redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
pairs = get_filepaths(pairsAction, '.txt')
for pair in pairs:
@@ -55,7 +25,7 @@ def importAction(rootType):
shapeSize = split[-2]
cluster = split[-1].replace('.txt', '')
cmd = "bash " + join(INNER_DATA_PATH,
'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
o, e = shellGitCheckout(cmd)
print(o)
@@ -67,49 +37,17 @@ def importAction(rootType):
key = shapeName + "-" + shapeSize + "-" + cluster + "-" + k
# redis_db.set(key, v.strip())
redis_db.hset('filenames', key, v.strip())
redis_db.set("level", "l2")
# def importShape():
# # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
# dbDir = join(DATA_PATH, 'redis')
# portInner = '6380'
# startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
# import redis
# pairsShapes = join(DATA_PATH, 'pairs')
# redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
# pairs = get_filepaths(pairsShapes, '.index')
# l = []
# for pair in pairs:
# # split = pair.split("/")
# # shapeName = split[-2]
# # sizeCluster = split[-1].replace('.txt', '')
# # cmd = "bash " + join(DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + sizeCluster;
# #
# # o, e = shellGitCheckout(cmd)
# # print(o)
# # indexFile = pair.replace('.txt', '.index')
# with open(pair, 'r') as iFile:
# idx = iFile.readlines()
# idx = [i.split(',')[1] for i in idx]
# l.append(idx)
# l = list(itertools.chain.from_iterable(l))
# l = [i for i in l if not (i.startswith('commons-math') or i.startswith('commons-lang') or i.startswith(
# 'closure-compiler') or i.startswith('joda-time') or i.startswith('mockito') or i.startswith('jfreechart'))]
# l
#
# # for i in idx:
# # k, v = i.split(',')
# # key = shapeName + "-" + sizeCluster + "-" + k
# # redis_db.set(key, v.strip())
def importShape():
def importAction():
# global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
dbDir = join(INNER_DATA_PATH, 'redis')
# portInner = '6380'
# startDB(dbDir, portInner, "clusterl0-gumInputALL.rdb")
portInner = '6399'
portInner = REDIS_PORT
startDB(dbDir, portInner, PROJECT_TYPE)
import redis
@@ -120,7 +58,7 @@ def importShape():
split = pair.split("/")
shapeName = split[-2]
sizeCluster = split[-1].replace('.txt', '')
cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + sizeCluster;
cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + sizeCluster;
o, e = shellGitCheckout(cmd)
print(o)
@@ -132,43 +70,45 @@ def importShape():
key = shapeName + "-" + sizeCluster + "-" + k
#redis_db.set(key, v.strip())
redis_db.hset('filenames',key,v.strip())
redis_db.set("level","l1")
def tokenPairs():
global shapes, shape, sizes, clusters, cluster, actions, action, idx, val, pairs
# global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
shapes = listdir(join(DATA_PATH, 'actions'))
shapes = [f for f in shapes if isdir(join(DATA_PATH, 'actions', f))]
if os.path.exists(join(DATA_PATH, 'pairsToken')):
import shutil
shutil.rmtree(join(DATA_PATH, 'pairsToken'))
# shapes = [rootType]
for shape in shapes:
sizes = listdir(join(DATA_PATH, 'actions', shape))
sizes = [f for f in sizes if isdir(join(DATA_PATH, 'actions', shape, f))]
for sf in sizes:
if sf.startswith('.'):
continue
if sf == '1':
continue
clusters = listdir(join(DATA_PATH, 'actions', shape, sf))
for cluster in clusters:
if cluster.startswith('.'):
continue
actions = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
for action in actions:
files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster, action))
files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
if len(files) > 1:
indexCompared = []
if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf, cluster)):
os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf, cluster))
if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf)):
os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf))
infexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.index')
if isfile(infexFile):
test = pd.read_csv(infexFile, header=None, index_col=0)
indexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.index')
if isfile(indexFile):
test = pd.read_csv(indexFile, header=None, index_col=0)
test.rename(columns={1: 'filename'}, inplace=True)
newFiles = [i for i in files if i not in test.filename.values.tolist()]
for newFile in newFiles:
test = test.append(pd.DataFrame(columns=['filename'], data=[newFile]), ignore_index=True)
indexCompared = test.index.values.tolist()
test.to_csv(infexFile, header=None)
test.to_csv(indexFile, header=None)
else:
with open(infexFile, 'w') as out:
with open(indexFile, 'w') as out:
# csv_out = csv.writer(out)
for idx, val in enumerate(files):
@@ -176,7 +116,8 @@ def tokenPairs():
indexCompared.append(str(idx))
pairs = list(itertools.combinations(indexCompared, 2))
pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.txt')
pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.txt')
if isfile(pairsFile):
test = pd.read_csv(pairsFile, header=None)
test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
@@ -193,69 +134,12 @@ def tokenPairs():
a, b = row
out.write(a + ',' + b + '\n')
def actionPairs(rootType):
# global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
# shapes = listdir(join(DATA_PATH, 'shapes'))
# shapes = [f for f in shapes if isdir(join(DATA_PATH, 'shapes', f))]
shapes = [rootType]
for shape in shapes:
sizes = listdir(join(DATA_PATH, 'shapes', shape))
sizes = [f for f in sizes if isdir(join(DATA_PATH, 'shapes', shape, f))]
for sf in sizes:
if sf.startswith('.'):
continue
clusters = listdir(join(DATA_PATH, 'shapes', shape, sf))
for cluster in clusters:
if cluster.startswith('.'):
continue
files = listdir(join(DATA_PATH, 'shapes', shape, sf, cluster))
indexCompared = []
if not os.path.exists(join(DATA_PATH, 'pairsAction', shape, sf)):
os.makedirs(join(DATA_PATH, 'pairsAction', shape, sf))
indexFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.index')
if isfile(indexFile):
test = pd.read_csv(indexFile, header=None, index_col=0)
test.rename(columns={1: 'filename'}, inplace=True)
newFiles = [i for i in files if i not in test.filename.values.tolist()]
for newFile in newFiles:
test = test.append(pd.DataFrame(columns=['filename'], data=[newFile]), ignore_index=True)
indexCompared = test.index.values.tolist()
test.to_csv(indexFile, header=None)
else:
with open(indexFile, 'w') as out:
# csv_out = csv.writer(out)
for idx, val in enumerate(files):
out.write(str(idx) + ',' + val + '\n')
indexCompared.append(str(idx))
pairs = list(itertools.combinations(indexCompared, 2))
pairsFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.txt')
if isfile(pairsFile):
test = pd.read_csv(pairsFile, header=None)
test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
newPairs = [i for i in pairs if i not in test['pairs'].values.tolist()]
with open(pairsFile, 'w') as out:
# csv_out = csv.writer(out)
for row in newPairs:
a, b = row
out.write(a + ',' + b + '\n')
else:
with open(pairsFile, 'w') as out:
# csv_out = csv.writer(out)
for row in pairs:
a, b = row
out.write(a + ',' + b + '\n')
def shapePairs():
def actionPairs():
# global dbDir, portInner, redis_db, keys, matches, roots, sizes, sf, files, indexCompared, out, idx, val, pairs, row, a, b
# if not (isfile(join(DATA_PATH, 'studyDataset.pickle'))):
dbDir = join(INNER_DATA_PATH, 'redis')
portInner = '6399'
portInner = REDIS_PORT
startDB(dbDir, portInner, PROJECT_TYPE)
import redis
+14 -31
View File
@@ -1,12 +1,13 @@
from common.commons import *
DATA_PATH = os.environ["DATA_PATH"]
PROJECT_TYPE = os.environ["PROJECT_TYPE"]
REDIS_PORT = os.environ["REDIS_PORT"]
def statsNormal(isFixminer=True):
# tokens = join(DATA_PATH, 'tokens')
# actions = join(DATA_PATH, 'actions')
import redis
redis_db = redis.StrictRedis(host="localhost", port=6399, db=0)
redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
# keys = redis_db.scan(0, match='*', count='1000000')
keys = redis_db.hkeys("dump") # hkeys "dump"
matches = pd.DataFrame(keys, columns=['pairs_key'])
@@ -95,7 +96,7 @@ def statsNormal(isFixminer=True):
# for type in ['tokens', 'actions', 'shapes']:
for type in ['shapes']:
for type in ['actions']:
statsS,clusterDF = stats(type,isFixminer)
if isFixminer:
clusterDF = clusterDF[clusterDF.members.str.len() > 1]
@@ -136,25 +137,7 @@ def statsNormal(isFixminer=True):
matches
if isFixminer:
matches.to_csv(join(DATA_PATH, "stats" + type + ".csv"), index=False)
if type == 'actions':
clusterDF['ms'] = clusterDF.members.str.len()
clusterDF.sort_values(by='ms', ascending=False, inplace=True)
top50 = clusterDF.head(50)
top50['member'] = top50.members.apply(lambda x: x[0])
top50['cid'] = top50.cid.apply(lambda x: x[0])
top50['path'] = top50.apply(lambda x:x['cid'].replace('-','/')+'/'+x['member'],axis=1)
def readFile(x):
with open(join(DATA_PATH,'actions',x), 'r', encoding='utf-8') as writeFile:
lines = writeFile.read()
return lines
# if lines.startswith('UPD'):
# return lines
# else:
# return ''
# return lines
top50['pattern'] = top50.path.apply(lambda x:readFile(x))
top50[['cid','pattern']].to_csv('actionsPattern2verify.csv',index=False,header=None)
@@ -253,7 +236,7 @@ def stats(type,isFixminer=True):
continue
cs = listdir(join(shapesPath, shape, size, cluster))
if shapesPath.endswith('shapes'):
if shapesPath.endswith('actions'):
cs = listdir(join(shapesPath, shape, size, cluster))
statsCore(cs)
else:
@@ -262,14 +245,14 @@ def stats(type,isFixminer=True):
if action.startswith('.'):
continue
tokens = listdir(join(shapesPath, shape, size, cluster, action))
if shapesPath.endswith('actions'):
if shapesPath.endswith('tokens'):
statsCore(tokens)
else:
for token in tokens:
if token.startswith('.'):
continue
cs = listdir(join(shapesPath, shape, size, cluster, action, token))
statsCore(cs)
# else:
# for token in tokens:
# if token.startswith('.'):
# continue
# cs = listdir(join(shapesPath, shape, size, cluster, action, token))
# statsCore(cs)
return statsS,clustersDF
@@ -282,7 +265,7 @@ def defects4jStats(isFixminer=False):
mapping.rename(columns={0: 'repo', 1: "commit", 2: 'defects4jID'}, inplace=True)
dbDir = join(DATA_PATH, 'redis')
portInner = '6399'
portInner = REDIS_PORT
startDB(dbDir, portInner, PROJECT_TYPE )
import redis
@@ -474,8 +457,8 @@ cAst = ["unit","comment","literal","operator","modifier","name","type","conditio
def exportAbstractPatterns():
clusterStats,df = stats('shapes')
port = 6399
clusterStats,df = stats('actions')
port = REDIS_PORT
import redis
redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
isJava = False
+2 -2
View File
@@ -1,5 +1,5 @@
from common.commons import *
REDIS_PORT = os.environ["REDIS_PORT"]
DATA_PATH = os.environ["DATA_PATH"]
ast = ["AnonymousClassDeclaration", "ArrayAccess", "ArrayCreation", "ArrayInitializer", "ArrayType", "AssertStatement",
"Assignment", "Block", "BooleanLiteral", "BreakStatement", "CastExpression", "CatchClause", "CharacterLiteral",
@@ -31,7 +31,7 @@ redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
redis_db1 = redis.StrictRedis(host="localhost", port=port, db=1)
redis_db2 = redis.StrictRedis(host="localhost", port=port, db=2)
redis_out = redis.StrictRedis(host="localhost", port=6399, db=0)
redis_out = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
def getTokens(prefix, i):
dist2load = redis_db1.get(prefix + "-" + i);