enabled back token iteration
This commit is contained in:
@@ -34,6 +34,10 @@ You can cite FixMiner using the following bibtex:
|
||||
Fixminer is a systematic and automated approach to mine relevant and actionable fix patterns for automated program repair.
|
||||

|
||||
|
||||
** This version of the Fixminer has some changes compared to the one published in the paper.
|
||||
|
||||
- The iteration that was computing the shapes separetely is removed. Now the operation of the shape trees are performed together with the action trees. As a result of this change no shape clusters are generated separately anymore. The initial output of the pattern mining iteration is action trees (a.k.a patterns )
|
||||
|
||||
## II. Environment setup
|
||||
|
||||
* OS: macOS Mojave (10.14.3)
|
||||
@@ -82,8 +86,8 @@ In order to launch FixMiner, execute [fixminer.sh](python/fixminer.sh)
|
||||
|
||||
In order to launch FixMiner, execute [fixminer.sh](python/fixminer.sh)
|
||||
|
||||
bash fixminer.sh [JOB] [CONFIG_FILE]
|
||||
e.g. bash fixminer.sh dataset4c /Users/projects/release/fixminer_source/src/main/resources/config.yml
|
||||
bash fixminer.sh [CONFIG_FILE] [JOB]
|
||||
e.g. bash fixminer.sh /Users/projects/release/fixminer_source/src/main/resources/config.yml dataset4c
|
||||
|
||||
A log file (app.log) is created after every execution of the [fixminer.sh]((python/fixminer.sh)). Please check this log file in order to access more information.
|
||||
|
||||
@@ -91,30 +95,38 @@ A log file (app.log) is created after every execution of the [fixminer.sh]((pyth
|
||||
|
||||
#### Job Types
|
||||
|
||||
*FixMiner* needs to specify a job to run.
|
||||
*FixMiner* needs to follow an execution, **in the order listed below** in order to create clusters of patches.
|
||||
|
||||
1. __dataset4j__ / __dataset4c__: Create a java/c mining dataset from the projects listed in [subjects.csv](python/data/subjects.csv) or [datasets.csv](python/data/datasets.csv) for c
|
||||
|
||||
2. __richEditScript__: Calls the jar file produced as the results as maven package to compute Rich edit scripts.
|
||||
2. __richedit__: Calls the jar file produced as the results as maven package to compute Rich edit scripts.
|
||||
This step can be invoke natively from java or using the [Launcher](src/main/java/edu/lu/uni/serval/richedit/Launcher.java) with appropriate arguments.
|
||||
|
||||
```powershell
|
||||
java -jar FixPatternMiner-1.0.0-jar-with-dependencies.jar /Users/projects/release/fixminer_source/src/main/resources/config.yml RICHEDITSCRIPT
|
||||
```
|
||||
3. __shapeSI__: Search index creation for shapes. The output of this step is written to __pairs__ folder which will be generated under __datapath__ in [config file](src/main/resources/config.yml)
|
||||
3. __actionSI__: Search index creation for actions. The output of this step is written to __pairs__ folder which will be generated under __datapath__ in [config file](src/main/resources/config.yml)
|
||||
|
||||
4. __compare__ : Calls the jar file produced as the results as maven package to compare the trees.
|
||||
This step can be invoke natively from java or using the [Launcher](src/main/java/edu/lu/uni/serval/richedit/Launcher.java) with appropriate arguments.
|
||||
|
||||
```powershell
|
||||
java -jar FixPatternMiner-1.0.0-jar-with-dependencies.jar /Users/projects/release/fixminer_source/src/main/resources/config.yml COMPARE
|
||||
|
||||
```
|
||||
5. __cluster__ : Forms clusters of identical trees. The output of this step is written to __shapes__ folder which will be generated under __datapath__ in [config file](src/main/resources/config.yml)
|
||||
5. __cluster__ : Forms clusters of identical trees. The output of this step is written to __actions__ folder which will be generated under __datapath__ in [config file](src/main/resources/config.yml)
|
||||
|
||||
6. __stats__: Calculate frequency statistics of the patterns under statsshapes.csv in datapath. The information is also written in app.log file.
|
||||
6. __tokenSI__ : Search index creation for tokens. The output of this step is written to __pairsToken__ folder which will be generated under __datapath__ in [config file](src/main/resources/config.yml)
|
||||
|
||||
7. __exportPatterns__ : Export FixPatterns of APR integration under patterns folder located in datapath/
|
||||
7. __compare__ : Calls the jar file produced as the results as maven package to compare the trees.
|
||||
This step can be invoke natively from java or using the [Launcher](src/main/java/edu/lu/uni/serval/richedit/Launcher.java) with appropriate arguments.
|
||||
|
||||
```powershell
|
||||
java -jar FixPatternMiner-1.0.0-jar-with-dependencies.jar /Users/projects/release/fixminer_source/src/main/resources/config.yml COMPARE
|
||||
```
|
||||
|
||||
8. __stats__: Calculate frequency statistics of the patterns under statsactions.csv in datapath. The information is also written in app.log file.
|
||||
|
||||
7. __patterns__ : Export FixPatterns of APR integration under patterns folder located in datapath/
|
||||
|
||||
<!--
|
||||
|
||||
@@ -252,10 +264,11 @@ Connect to redis instance
|
||||
redis-cli -p 6399
|
||||
```
|
||||
|
||||
We use 3 databases inside the redis, 0,1,2.
|
||||
We use 3 databases inside the redis, 0,1,2,3.
|
||||
DB 0 stores the richedit dumps, comparison indices
|
||||
DB 1 stores the filenames and their corresponding indices
|
||||
DB 2 stores the output of comparison, a.k.a same trees.
|
||||
DB 1 stores the filenames and their indices (used in comparison and stored in DB2, DB3)
|
||||
DB 2 stores the output of comparison action trees.
|
||||
DB 3 stores the output of comparison token trees.
|
||||
|
||||
In order to switch between these database use the following command
|
||||
|
||||
@@ -298,15 +311,18 @@ hgetall NAME_OF_THE_EXACT_KEY
|
||||
hgetall MethodDeclaration/40/fuse_67b14b_04e5b1_fabric#fabric-client#src#main#java#org#fusesource#fabric#jolokia#facade#facades#ProfileFacade.java.txt_1
|
||||
|
||||
OUTPUT:
|
||||
1) "targetTree"
|
||||
2) "[(55@@[(31@@)][(31@@)][(31@@)][(31@@[(44@@)][(44@@)])][(31@@[(44@@[(74@@)][(74@@)][(74@@)])][(44@@)])][(31@@[(60@@[(74@@)][(74@@)][(74@@[(74@@)][(74@@)][(74@@)])])][(60@@[(59@@)][(59@@)])])][(31@@[(25@@[(27@@)][(27@@)][(27@@)])][(25@@[(8@@[(21@@[(32@@)][(32@@[(42@@)][(42@@)])])])][(8@@[(21@@[(32@@[(42@@)])])])])])])]"
|
||||
3) "actionTree"
|
||||
4) "[(100@@[(100@@)][(100@@)][(100@@)][(100@@[(100@@)][(100@@)])][(100@@[(100@@[(100@@)][(100@@)][(100@@)])][(100@@)])][(100@@[(100@@[(100@@)][(100@@)][(100@@[(100@@)][(100@@)][(100@@)])])][(100@@[(100@@)][(100@@)])])][(100@@[(100@@[(100@@)][(100@@)][(100@@)])][(100@@[(100@@[(100@@[(100@@)][(100@@[(100@@)][(100@@)])])])][(100@@[(100@@[(100@@[(100@@)])])])])])])]"
|
||||
5) "shapeTree"
|
||||
6) "[(31@@[(83@@)][(39@@)][(42@@)][(44@@[(43@@)][(42@@)])][(44@@[(74@@[(43@@)][(43@@)][(43@@)])][(42@@)])][(60@@[(74@@[(43@@)][(43@@)][(74@@[(43@@)][(43@@)][(43@@)])])][(59@@[(42@@)][(32@@)])])][(25@@[(27@@[(42@@)][(-1@@)][(33@@)])][(8@@[(21@@[(32@@[(42@@)][(42@@[(42@@)][(42@@)])])])][(21@@[(32@@[(42@@[(42@@)])])])])])])]"
|
||||
1) "tokens"
|
||||
2) "public void MethodName:setConfiguration String pid Map String String configuration Map String Map String String configurations MethodName:getConfigurations:[] configurations != null Name:configurations pid configuration configurations "
|
||||
3) "targetTree"
|
||||
4) "[(55@@[(31@@)][(31@@)][(31@@)][(31@@[(44@@)][(44@@)])][(31@@[(44@@[(74@@)][(74@@)][(74@@)])][(44@@)])][(31@@[(60@@[(74@@)][(74@@)][(74@@[(74@@)][(74@@)][(74@@)])])][(60@@[(59@@)][(59@@)])])][(31@@[(25@@[(27@@)][(27@@)][(27@@)])][(25@@[(8@@[(21@@[(32@@)][(32@@[(42@@)][(42@@)])])])][(8@@[(21@@[(32@@[(42@@)])])])])])])]"
|
||||
5) "actionTree"
|
||||
6) "[(100@@[(100@@)][(100@@)][(100@@)][(100@@[(100@@)][(100@@)])][(100@@[(100@@[(100@@)][(100@@)][(100@@)])][(100@@)])][(100@@[(100@@[(100@@)][(100@@)][(100@@[(100@@)][(100@@)][(100@@)])])][(100@@[(100@@)][(100@@)])])][(100@@[(100@@[(100@@)][(100@@)][(100@@)])][(100@@[(100@@[(100@@[(100@@)][(100@@[(100@@)][(100@@)])])])][(100@@[(100@@[(100@@[(100@@)])])])])])])]"
|
||||
7) "shapeTree"
|
||||
8) "[(31@@[(83@@)][(39@@)][(42@@)][(44@@[(43@@)][(42@@)])][(44@@[(74@@[(43@@)][(43@@)][(43@@)])][(42@@)])][(60@@[(74@@[(43@@)][(43@@)][(74@@[(43@@)][(43@@)][(43@@)])])][(59@@[(42@@)][(32@@)])])][(25@@[(27@@[(42@@)][(-1@@)][(33@@)])][(8@@[(21@@[(32@@[(42@@)][(42@@[(42@@)][(42@@)])])])][(21@@[(32@@[(42@@[(42@@)])])])])])])]"
|
||||
```
|
||||
|
||||
After executing the shapeSI step, the rich edit scripts to be compared are stored in a key in DB 0. Use the following command to verify number of comparison to be made.
|
||||
After executing the actionSI / tokenSI steps, the rich edit scripts to be compared are stored in a key in DB 0. Use the following command to verify number of comparison to be made.
|
||||
The trees that are labelled to be same are stored in DB2 for action trees and,in DB3 for token trees.
|
||||
|
||||
This command can also be used in order to progress the compare step. When the comparison is completed the following command will return 0.
|
||||
```powershell
|
||||
|
||||
+25
-20
@@ -6,6 +6,8 @@ from common.commons import *
|
||||
DATA_PATH = os.environ["DATA_PATH"]
|
||||
DATASET = os.environ["dataset"]
|
||||
jdk8 = os.environ["JDK8"]
|
||||
REDIS_PORT = os.environ["REDIS_PORT"]
|
||||
|
||||
# def localPairCore(aTuple):
|
||||
# redis_db = redis.StrictRedis(host="localhost", port=6380, db=1)
|
||||
# idx, key = aTuple
|
||||
@@ -54,11 +56,14 @@ def loadPairMulti(root,clusterPath,level):
|
||||
|
||||
# root = 'BreakStatement'
|
||||
logging.info(root)
|
||||
port = 6399
|
||||
port = REDIS_PORT
|
||||
# if isfile(clusterPath +"/"+root+".pickle"):
|
||||
# return load_zipped_pickle(clusterPath +"/"+root+".pickle")
|
||||
# else:
|
||||
# redis_db = redis.StrictRedis(host="localhost", port=port, db=1) #L1
|
||||
if level == 'tokens':
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=3)
|
||||
else:
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
|
||||
keys = redis_db.scan(0, match=root+'-*', count='100000000')
|
||||
# keys = redis_db.hkeys("dump")
|
||||
@@ -81,11 +86,11 @@ def loadPairMulti(root,clusterPath,level):
|
||||
matches['path2']=matches['pairs'].apply(lambda x:x[1])
|
||||
# matches['sizes']=matches['pairs_key'].apply(lambda x:x.split('_')[0].split('-')[1])
|
||||
matches['sizes']=matches['pairs_key'].apply(lambda x:x.split(root)[1].split('/')[0].split('-')[1])
|
||||
if level == 'actions':
|
||||
matches['actions']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
|
||||
if level == 'tokens':
|
||||
matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
|
||||
matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])
|
||||
matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
|
||||
# if level == 'tokens':
|
||||
# matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
|
||||
# matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])
|
||||
|
||||
|
||||
# save_zipped_pickle(matches,clusterPath +"/"+root+".pickle")
|
||||
@@ -119,19 +124,19 @@ def cluster(clusterPath,pairsPath, level):
|
||||
for s in sizes:
|
||||
match = matches[matches['sizes'] == s]
|
||||
|
||||
if level == 'actions':
|
||||
actions = match['actions'].unique().tolist()
|
||||
if level == 'tokens':
|
||||
actions = match['tokens'].unique().tolist()
|
||||
for action in actions:
|
||||
match = match[match['actions'] == action]
|
||||
match = match[match['tokens'] == action]
|
||||
clusterCore(clusterPath, level, match, pairsPath, root, s,action)
|
||||
elif level == 'tokens':
|
||||
actions = match['actions'].unique().tolist()
|
||||
for action in actions:
|
||||
match = match[match['actions'] == action]
|
||||
tokens = match['tokens'].unique().tolist()
|
||||
for token in tokens:
|
||||
match = match[match['tokens']==token]
|
||||
clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
|
||||
# elif level == 'tokens':
|
||||
# actions = match['actions'].unique().tolist()
|
||||
# for action in actions:
|
||||
# match = match[match['actions'] == action]
|
||||
# tokens = match['tokens'].unique().tolist()
|
||||
# for token in tokens:
|
||||
# match = match[match['tokens']==token]
|
||||
# clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
|
||||
else:
|
||||
clusterCore(clusterPath, level, match, pairsPath, root, s,'')
|
||||
|
||||
@@ -158,12 +163,12 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''):
|
||||
cluster.append(subgraph.nodes())
|
||||
cluster
|
||||
pathMapping = dict()
|
||||
if level == 'actions':
|
||||
if level == 'tokens':
|
||||
indexFile = join(pairsPath, root, s,action+'.index')
|
||||
elif level == 'shapes':
|
||||
elif level == 'actions':
|
||||
indexFile = join(pairsPath, root, s + '.index')
|
||||
else:
|
||||
indexFile =join(pairsPath, root, s,action,token+'.index')
|
||||
# else:
|
||||
# indexFile =join(pairsPath, root, s,action,token+'.index')
|
||||
df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
|
||||
pathMapping = df.to_dict()
|
||||
|
||||
|
||||
+13
-65
@@ -40,9 +40,7 @@ if __name__ == '__main__':
|
||||
if job == 'dataset4j':
|
||||
from javaDS import createDS
|
||||
createDS()
|
||||
# elif job == 'linuxDS':
|
||||
# from linuxDataset import collectBugFixPatches
|
||||
# collectBugFixPatches()
|
||||
|
||||
elif job =='dataset4c':
|
||||
from otherDatasets import core
|
||||
core()
|
||||
@@ -53,20 +51,16 @@ if __name__ == '__main__':
|
||||
output = shellCallTemplate(cmd)
|
||||
logging.info(output)
|
||||
|
||||
# elif job =='loadRES':
|
||||
# cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " LOAD " + rootType
|
||||
# output = shellCallTemplate(cmd)
|
||||
# logging.info(output)
|
||||
|
||||
elif job =='shapeSI':
|
||||
from pairs import shapePairs
|
||||
matches = shapePairs()
|
||||
elif job =='actionSI':
|
||||
from pairs import actionPairs
|
||||
matches = actionPairs()
|
||||
|
||||
from pairs import createPairs
|
||||
createPairs(matches)
|
||||
|
||||
from pairs import importShape
|
||||
importShape()
|
||||
from pairs import importAction
|
||||
importAction()
|
||||
|
||||
elif job =='compare':
|
||||
# cmd = "mvn exec:java -f '/data/fixminer_source/' -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
|
||||
@@ -74,65 +68,28 @@ if __name__ == '__main__':
|
||||
output = shellCallTemplate4jar(cmd)
|
||||
logging.info(output)
|
||||
|
||||
# elif job == 'clusterAdditional':
|
||||
# from addNewData import cluster
|
||||
# cluster()
|
||||
|
||||
elif job == 'cluster':
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, "6399", PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH,'shapes'),join(DATA_PATH, 'pairs'),'shapes')
|
||||
|
||||
# elif job =='actionSI':
|
||||
# from pairs import actionPairs
|
||||
# actionPairs(rootType)
|
||||
#
|
||||
# # elif job =='importActionPairs':
|
||||
# from pairs import importAction
|
||||
# importAction(rootType)
|
||||
#
|
||||
# elif job =='compareActions':
|
||||
# # cmd = "JAVA_HOME='"+jdk8+"' java -Xmx8096m -Djava.util.concurrent.ForkJoinPool.common.parallelism=64 -jar "+ join(DATA_PATH,'CompareTrees.jar') + " action " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl1-gumInputALL.rdb"
|
||||
#
|
||||
# cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L2'
|
||||
# output = shellCallTemplate(cmd)
|
||||
# logging.info(output)
|
||||
#
|
||||
# elif job == 'clusterActions':
|
||||
# from abstractPatch import cluster
|
||||
#
|
||||
# dbDir = join(DATA_PATH, 'redis')
|
||||
# startDB(dbDir, "6399", PROJECT_TYPE)
|
||||
# cluster( join(DATA_PATH, 'actions'),join(DATA_PATH, 'pairsAction'),'actions',rootType)
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH,'actions'),join(DATA_PATH, 'pairs'),'actions')
|
||||
|
||||
elif job =='tokenSI':
|
||||
from pairs import tokenPairs
|
||||
tokenPairs()
|
||||
from pairs import importToken
|
||||
importToken()
|
||||
|
||||
elif job == 'compareTokens':
|
||||
# cmd = "JAVA_HOME='"+jdk8+"' java -jar "+ join(DATA_PATH,'CompareTrees.jar') + " token " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl2-gumInputALL.rdb"
|
||||
cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L3'
|
||||
output = shellCallTemplate(cmd)
|
||||
logging.info(output)
|
||||
from pairs import importTokens
|
||||
importTokens()
|
||||
|
||||
elif job == 'clusterTokens':
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, "6399", PROJECT_TYPE)
|
||||
startDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster( join(DATA_PATH, 'tokens'),join(DATA_PATH, 'pairsToken'),'tokens')
|
||||
stopDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
|
||||
|
||||
# elif job == 'additional':
|
||||
# from addNewData import core
|
||||
# core()
|
||||
# # from addNewData import checkWrongMembers
|
||||
# # checkWrongMembers()
|
||||
|
||||
elif job == 'codeflaws':
|
||||
from otherDatasets import codeflaws
|
||||
@@ -218,19 +175,10 @@ if __name__ == '__main__':
|
||||
elif job == 'defects4j':
|
||||
from stats import defects4jStats
|
||||
defects4jStats()
|
||||
elif job == 'exportPatterns':
|
||||
elif job == 'patterns':
|
||||
from stats import exportAbstractPatterns
|
||||
exportAbstractPatterns()
|
||||
elif job =='export':
|
||||
patternPath = join(DATA_PATH,'actions','ExpressionStatement','3','0','0')
|
||||
patterns = listdir(patternPath)
|
||||
for pattern in patterns:
|
||||
repo = pattern.split('_')[0]
|
||||
file = pattern.replace(repo+'_','')
|
||||
print(file)
|
||||
filename = file.rsplit('_',1)[0]
|
||||
print(join(DATA_PATH,'gumInput',repo,'DiffEntries',filename))
|
||||
break
|
||||
|
||||
|
||||
else:
|
||||
logging.error('Unknown job %s',job)
|
||||
|
||||
+24
-140
@@ -3,50 +3,20 @@ DATA_PATH = os.environ["DATA_PATH"]
|
||||
ROOT = os.environ["ROOT_DIR"]
|
||||
INNER_DATA_PATH = join(ROOT,'data')
|
||||
PROJECT_TYPE = os.environ["PROJECT_TYPE"]
|
||||
def importToken():
|
||||
# global dbDir, portInner, redis_db, pairs, cluster, action, e, idx, v, key
|
||||
dbDir = join(INNER_DATA_PATH, 'redis')
|
||||
portInner = '6380'
|
||||
dbDir = join(INNER_DATA_PATH, 'redis')
|
||||
startDB(dbDir, portInner, "clusterl2-gumInputALL.rdb")
|
||||
import redis
|
||||
pairsToken = join(DATA_PATH, 'pairsToken')
|
||||
redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
|
||||
pairs = get_filepaths(pairsToken, '.txt')
|
||||
for pair in pairs:
|
||||
split = pair.split("/")
|
||||
REDIS_PORT = os.environ["REDIS_PORT"]
|
||||
|
||||
shapeName = split[-4]
|
||||
shapeSize = split[-3]
|
||||
cluster = split[-2]
|
||||
action = split[-1].replace('.txt', '')
|
||||
|
||||
# cmd ="bash " + join(DATA_PATH,'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-"+sizeCluster+"-"+actionCluster ;#+, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
|
||||
cmd = "bash " + join(INNER_DATA_PATH,
|
||||
'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + shapeSize + "-" + cluster + "-" + action; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
|
||||
|
||||
o, e = shellGitCheckout(cmd)
|
||||
o
|
||||
indexFile = pair.replace('.txt', '.index')
|
||||
with open(indexFile, 'r') as iFile:
|
||||
idx = iFile.readlines()
|
||||
for i in idx:
|
||||
k, v = i.split(',')
|
||||
key = shapeName + "-" + shapeSize + "-" + cluster + "-" + action + "-" + k
|
||||
# redis_db.set(key, v.strip())
|
||||
redis_db.hset('filenames', key, v.strip())
|
||||
|
||||
def importAction(rootType):
|
||||
def importTokens():
|
||||
# global dbDir, portInner, redis_db, pairs, pair, split, shapeName, shapeSize, cluster, cmd, o, e, indexFile, iFile, idx, i, k, v, key
|
||||
dbDir = join(INNER_DATA_PATH, 'redis')
|
||||
# portInner = '6380'
|
||||
# startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
|
||||
portInner = '6399'
|
||||
portInner = REDIS_PORT
|
||||
startDB(dbDir, portInner, PROJECT_TYPE)
|
||||
|
||||
import redis
|
||||
# import pairs
|
||||
pairsAction = join(DATA_PATH, 'pairsAction',rootType)
|
||||
pairsAction = join(DATA_PATH, 'pairsToken')
|
||||
redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
|
||||
pairs = get_filepaths(pairsAction, '.txt')
|
||||
for pair in pairs:
|
||||
@@ -55,7 +25,7 @@ def importAction(rootType):
|
||||
shapeSize = split[-2]
|
||||
cluster = split[-1].replace('.txt', '')
|
||||
cmd = "bash " + join(INNER_DATA_PATH,
|
||||
'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
|
||||
'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
|
||||
|
||||
o, e = shellGitCheckout(cmd)
|
||||
print(o)
|
||||
@@ -67,49 +37,17 @@ def importAction(rootType):
|
||||
key = shapeName + "-" + shapeSize + "-" + cluster + "-" + k
|
||||
# redis_db.set(key, v.strip())
|
||||
redis_db.hset('filenames', key, v.strip())
|
||||
redis_db.set("level", "l2")
|
||||
|
||||
|
||||
# def importShape():
|
||||
# # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
|
||||
# dbDir = join(DATA_PATH, 'redis')
|
||||
# portInner = '6380'
|
||||
# startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
|
||||
# import redis
|
||||
# pairsShapes = join(DATA_PATH, 'pairs')
|
||||
# redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
|
||||
# pairs = get_filepaths(pairsShapes, '.index')
|
||||
# l = []
|
||||
# for pair in pairs:
|
||||
# # split = pair.split("/")
|
||||
# # shapeName = split[-2]
|
||||
# # sizeCluster = split[-1].replace('.txt', '')
|
||||
# # cmd = "bash " + join(DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + sizeCluster;
|
||||
# #
|
||||
# # o, e = shellGitCheckout(cmd)
|
||||
# # print(o)
|
||||
# # indexFile = pair.replace('.txt', '.index')
|
||||
# with open(pair, 'r') as iFile:
|
||||
# idx = iFile.readlines()
|
||||
# idx = [i.split(',')[1] for i in idx]
|
||||
# l.append(idx)
|
||||
# l = list(itertools.chain.from_iterable(l))
|
||||
# l = [i for i in l if not (i.startswith('commons-math') or i.startswith('commons-lang') or i.startswith(
|
||||
# 'closure-compiler') or i.startswith('joda-time') or i.startswith('mockito') or i.startswith('jfreechart'))]
|
||||
# l
|
||||
#
|
||||
# # for i in idx:
|
||||
# # k, v = i.split(',')
|
||||
# # key = shapeName + "-" + sizeCluster + "-" + k
|
||||
# # redis_db.set(key, v.strip())
|
||||
|
||||
|
||||
def importShape():
|
||||
def importAction():
|
||||
# global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
|
||||
dbDir = join(INNER_DATA_PATH, 'redis')
|
||||
# portInner = '6380'
|
||||
# startDB(dbDir, portInner, "clusterl0-gumInputALL.rdb")
|
||||
|
||||
portInner = '6399'
|
||||
portInner = REDIS_PORT
|
||||
startDB(dbDir, portInner, PROJECT_TYPE)
|
||||
|
||||
import redis
|
||||
@@ -120,7 +58,7 @@ def importShape():
|
||||
split = pair.split("/")
|
||||
shapeName = split[-2]
|
||||
sizeCluster = split[-1].replace('.txt', '')
|
||||
cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + sizeCluster;
|
||||
cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + sizeCluster;
|
||||
|
||||
o, e = shellGitCheckout(cmd)
|
||||
print(o)
|
||||
@@ -132,88 +70,34 @@ def importShape():
|
||||
key = shapeName + "-" + sizeCluster + "-" + k
|
||||
#redis_db.set(key, v.strip())
|
||||
redis_db.hset('filenames',key,v.strip())
|
||||
redis_db.set("level","l1")
|
||||
|
||||
def tokenPairs():
|
||||
global shapes, shape, sizes, clusters, cluster, actions, action, idx, val, pairs
|
||||
# global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
|
||||
shapes = listdir(join(DATA_PATH, 'actions'))
|
||||
shapes = [f for f in shapes if isdir(join(DATA_PATH, 'actions', f))]
|
||||
|
||||
if os.path.exists(join(DATA_PATH, 'pairsToken')):
|
||||
import shutil
|
||||
shutil.rmtree(join(DATA_PATH, 'pairsToken'))
|
||||
# shapes = [rootType]
|
||||
for shape in shapes:
|
||||
sizes = listdir(join(DATA_PATH, 'actions', shape))
|
||||
sizes = [f for f in sizes if isdir(join(DATA_PATH, 'actions', shape, f))]
|
||||
for sf in sizes:
|
||||
if sf.startswith('.'):
|
||||
continue
|
||||
if sf == '1':
|
||||
continue
|
||||
clusters = listdir(join(DATA_PATH, 'actions', shape, sf))
|
||||
for cluster in clusters:
|
||||
if cluster.startswith('.'):
|
||||
continue
|
||||
actions = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
|
||||
for action in actions:
|
||||
|
||||
files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster, action))
|
||||
files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
|
||||
if len(files) > 1:
|
||||
indexCompared = []
|
||||
if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf, cluster)):
|
||||
os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf, cluster))
|
||||
if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf)):
|
||||
os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf))
|
||||
|
||||
infexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.index')
|
||||
if isfile(infexFile):
|
||||
test = pd.read_csv(infexFile, header=None, index_col=0)
|
||||
test.rename(columns={1: 'filename'}, inplace=True)
|
||||
|
||||
newFiles = [i for i in files if i not in test.filename.values.tolist()]
|
||||
for newFile in newFiles:
|
||||
test = test.append(pd.DataFrame(columns=['filename'], data=[newFile]), ignore_index=True)
|
||||
indexCompared = test.index.values.tolist()
|
||||
test.to_csv(infexFile, header=None)
|
||||
else:
|
||||
with open(infexFile, 'w') as out:
|
||||
# csv_out = csv.writer(out)
|
||||
|
||||
for idx, val in enumerate(files):
|
||||
out.write(str(idx) + ',' + val + '\n')
|
||||
indexCompared.append(str(idx))
|
||||
|
||||
pairs = list(itertools.combinations(indexCompared, 2))
|
||||
pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.txt')
|
||||
if isfile(pairsFile):
|
||||
test = pd.read_csv(pairsFile, header=None)
|
||||
test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
|
||||
newPairs = [i for i in pairs if i not in test['pairs'].values.tolist()]
|
||||
with open(pairsFile, 'w') as out:
|
||||
# csv_out = csv.writer(out)
|
||||
for row in newPairs:
|
||||
a, b = row
|
||||
out.write(a + ',' + b + '\n')
|
||||
else:
|
||||
with open(pairsFile, 'w') as out:
|
||||
# csv_out = csv.writer(out)
|
||||
for row in pairs:
|
||||
a, b = row
|
||||
out.write(a + ',' + b + '\n')
|
||||
|
||||
def actionPairs(rootType):
|
||||
# global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
|
||||
# shapes = listdir(join(DATA_PATH, 'shapes'))
|
||||
# shapes = [f for f in shapes if isdir(join(DATA_PATH, 'shapes', f))]
|
||||
shapes = [rootType]
|
||||
for shape in shapes:
|
||||
sizes = listdir(join(DATA_PATH, 'shapes', shape))
|
||||
sizes = [f for f in sizes if isdir(join(DATA_PATH, 'shapes', shape, f))]
|
||||
for sf in sizes:
|
||||
if sf.startswith('.'):
|
||||
continue
|
||||
clusters = listdir(join(DATA_PATH, 'shapes', shape, sf))
|
||||
for cluster in clusters:
|
||||
if cluster.startswith('.'):
|
||||
continue
|
||||
files = listdir(join(DATA_PATH, 'shapes', shape, sf, cluster))
|
||||
indexCompared = []
|
||||
if not os.path.exists(join(DATA_PATH, 'pairsAction', shape, sf)):
|
||||
os.makedirs(join(DATA_PATH, 'pairsAction', shape, sf))
|
||||
|
||||
indexFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.index')
|
||||
indexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.index')
|
||||
if isfile(indexFile):
|
||||
test = pd.read_csv(indexFile, header=None, index_col=0)
|
||||
test.rename(columns={1: 'filename'}, inplace=True)
|
||||
@@ -233,7 +117,7 @@ def actionPairs(rootType):
|
||||
|
||||
pairs = list(itertools.combinations(indexCompared, 2))
|
||||
|
||||
pairsFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.txt')
|
||||
pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.txt')
|
||||
if isfile(pairsFile):
|
||||
test = pd.read_csv(pairsFile, header=None)
|
||||
test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
|
||||
@@ -250,12 +134,12 @@ def actionPairs(rootType):
|
||||
a, b = row
|
||||
out.write(a + ',' + b + '\n')
|
||||
|
||||
def shapePairs():
|
||||
def actionPairs():
|
||||
# global dbDir, portInner, redis_db, keys, matches, roots, sizes, sf, files, indexCompared, out, idx, val, pairs, row, a, b
|
||||
# if not (isfile(join(DATA_PATH, 'studyDataset.pickle'))):
|
||||
dbDir = join(INNER_DATA_PATH, 'redis')
|
||||
|
||||
portInner = '6399'
|
||||
portInner = REDIS_PORT
|
||||
startDB(dbDir, portInner, PROJECT_TYPE)
|
||||
|
||||
import redis
|
||||
|
||||
+14
-31
@@ -1,12 +1,13 @@
|
||||
from common.commons import *
|
||||
DATA_PATH = os.environ["DATA_PATH"]
|
||||
PROJECT_TYPE = os.environ["PROJECT_TYPE"]
|
||||
REDIS_PORT = os.environ["REDIS_PORT"]
|
||||
|
||||
def statsNormal(isFixminer=True):
|
||||
# tokens = join(DATA_PATH, 'tokens')
|
||||
# actions = join(DATA_PATH, 'actions')
|
||||
import redis
|
||||
redis_db = redis.StrictRedis(host="localhost", port=6399, db=0)
|
||||
redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
|
||||
# keys = redis_db.scan(0, match='*', count='1000000')
|
||||
keys = redis_db.hkeys("dump") # hkeys "dump"
|
||||
matches = pd.DataFrame(keys, columns=['pairs_key'])
|
||||
@@ -95,7 +96,7 @@ def statsNormal(isFixminer=True):
|
||||
|
||||
|
||||
# for type in ['tokens', 'actions', 'shapes']:
|
||||
for type in ['shapes']:
|
||||
for type in ['actions']:
|
||||
statsS,clusterDF = stats(type,isFixminer)
|
||||
if isFixminer:
|
||||
clusterDF = clusterDF[clusterDF.members.str.len() > 1]
|
||||
@@ -136,25 +137,7 @@ def statsNormal(isFixminer=True):
|
||||
matches
|
||||
if isFixminer:
|
||||
matches.to_csv(join(DATA_PATH, "stats" + type + ".csv"), index=False)
|
||||
if type == 'actions':
|
||||
clusterDF['ms'] = clusterDF.members.str.len()
|
||||
clusterDF.sort_values(by='ms', ascending=False, inplace=True)
|
||||
top50 = clusterDF.head(50)
|
||||
top50['member'] = top50.members.apply(lambda x: x[0])
|
||||
top50['cid'] = top50.cid.apply(lambda x: x[0])
|
||||
top50['path'] = top50.apply(lambda x:x['cid'].replace('-','/')+'/'+x['member'],axis=1)
|
||||
def readFile(x):
|
||||
with open(join(DATA_PATH,'actions',x), 'r', encoding='utf-8') as writeFile:
|
||||
lines = writeFile.read()
|
||||
return lines
|
||||
# if lines.startswith('UPD'):
|
||||
# return lines
|
||||
# else:
|
||||
# return ''
|
||||
# return lines
|
||||
|
||||
top50['pattern'] = top50.path.apply(lambda x:readFile(x))
|
||||
top50[['cid','pattern']].to_csv('actionsPattern2verify.csv',index=False,header=None)
|
||||
|
||||
|
||||
|
||||
@@ -253,7 +236,7 @@ def stats(type,isFixminer=True):
|
||||
continue
|
||||
cs = listdir(join(shapesPath, shape, size, cluster))
|
||||
|
||||
if shapesPath.endswith('shapes'):
|
||||
if shapesPath.endswith('actions'):
|
||||
cs = listdir(join(shapesPath, shape, size, cluster))
|
||||
statsCore(cs)
|
||||
else:
|
||||
@@ -262,14 +245,14 @@ def stats(type,isFixminer=True):
|
||||
if action.startswith('.'):
|
||||
continue
|
||||
tokens = listdir(join(shapesPath, shape, size, cluster, action))
|
||||
if shapesPath.endswith('actions'):
|
||||
if shapesPath.endswith('tokens'):
|
||||
statsCore(tokens)
|
||||
else:
|
||||
for token in tokens:
|
||||
if token.startswith('.'):
|
||||
continue
|
||||
cs = listdir(join(shapesPath, shape, size, cluster, action, token))
|
||||
statsCore(cs)
|
||||
# else:
|
||||
# for token in tokens:
|
||||
# if token.startswith('.'):
|
||||
# continue
|
||||
# cs = listdir(join(shapesPath, shape, size, cluster, action, token))
|
||||
# statsCore(cs)
|
||||
return statsS,clustersDF
|
||||
|
||||
|
||||
@@ -282,7 +265,7 @@ def defects4jStats(isFixminer=False):
|
||||
mapping.rename(columns={0: 'repo', 1: "commit", 2: 'defects4jID'}, inplace=True)
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
|
||||
portInner = '6399'
|
||||
portInner = REDIS_PORT
|
||||
startDB(dbDir, portInner, PROJECT_TYPE )
|
||||
|
||||
import redis
|
||||
@@ -474,8 +457,8 @@ cAst = ["unit","comment","literal","operator","modifier","name","type","conditio
|
||||
|
||||
|
||||
def exportAbstractPatterns():
|
||||
clusterStats,df = stats('shapes')
|
||||
port = 6399
|
||||
clusterStats,df = stats('actions')
|
||||
port = REDIS_PORT
|
||||
import redis
|
||||
redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
|
||||
isJava = False
|
||||
|
||||
+2
-2
@@ -1,5 +1,5 @@
|
||||
from common.commons import *
|
||||
|
||||
REDIS_PORT = os.environ["REDIS_PORT"]
|
||||
DATA_PATH = os.environ["DATA_PATH"]
|
||||
ast = ["AnonymousClassDeclaration", "ArrayAccess", "ArrayCreation", "ArrayInitializer", "ArrayType", "AssertStatement",
|
||||
"Assignment", "Block", "BooleanLiteral", "BreakStatement", "CastExpression", "CatchClause", "CharacterLiteral",
|
||||
@@ -31,7 +31,7 @@ redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
|
||||
redis_db1 = redis.StrictRedis(host="localhost", port=port, db=1)
|
||||
redis_db2 = redis.StrictRedis(host="localhost", port=port, db=2)
|
||||
|
||||
redis_out = redis.StrictRedis(host="localhost", port=6399, db=0)
|
||||
redis_out = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
|
||||
|
||||
def getTokens(prefix, i):
|
||||
dist2load = redis_db1.get(prefix + "-" + i);
|
||||
|
||||
@@ -73,18 +73,18 @@ public class Launcher {
|
||||
String srcMLPath = (String) fixminer.get("srcMLPath");
|
||||
|
||||
// String parameter = args[2];
|
||||
String parameter = "L1";
|
||||
// String parameter = "L1";
|
||||
String jobType = args[1];
|
||||
// String jobType = "RICHEDITSCRIPT";
|
||||
// String jobType = "COMPARE";
|
||||
// jobType = "COMPARE";
|
||||
|
||||
|
||||
mainLaunch( numOfWorkers, jobType, portDumps,projectType,input,redisPath,parameter, srcMLPath,hunkLimit,projectList,patchSize);
|
||||
mainLaunch( numOfWorkers, jobType, portDumps,projectType,input,redisPath, srcMLPath,hunkLimit,projectList,patchSize);
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static void mainLaunch(String numOfWorkers, String jobType, String portDumps, String projectType, String input, String redisPath,String parameter,String srcMLPath,String hunkLimit,String[] projectList,String patchSize){
|
||||
public static void mainLaunch(String numOfWorkers, String jobType, String portDumps, String projectType, String input, String redisPath,String srcMLPath,String hunkLimit,String[] projectList,String patchSize){
|
||||
|
||||
|
||||
String dbDir;
|
||||
@@ -100,36 +100,36 @@ public class Launcher {
|
||||
try {
|
||||
switch (jobType) {
|
||||
case "RICHEDITSCRIPT":
|
||||
EnhancedASTDiff.main(gumInput, portDumps, dbDir, dumpsName, srcMLPath,parameter,hunkLimit,projectList,patchSize,projectType);
|
||||
EnhancedASTDiff.main(gumInput, portDumps, dbDir, dumpsName, srcMLPath,hunkLimit,projectList,patchSize,projectType);
|
||||
break;
|
||||
|
||||
case "COMPARE":
|
||||
String job;
|
||||
String compareDBName;
|
||||
switch (parameter){
|
||||
case "L1":
|
||||
// job = "shape";
|
||||
job = "single";
|
||||
compareDBName = "clusterl0-gumInputALL.rdb";
|
||||
break;
|
||||
case "L2":
|
||||
job = "action";
|
||||
compareDBName = "clusterl1-gumInputALL.rdb";
|
||||
break;
|
||||
case "L3":
|
||||
job = "token";
|
||||
compareDBName = "clusterl2-gumInputALL.rdb";
|
||||
break;
|
||||
default:
|
||||
throw new Error("unknown level please specify L1,L2,L3");
|
||||
}
|
||||
// String job;
|
||||
// String compareDBName;
|
||||
// switch (parameter){
|
||||
// case "L1":
|
||||
//// job = "shape";
|
||||
// job = "single";
|
||||
// compareDBName = "clusterl0-gumInputALL.rdb";
|
||||
// break;
|
||||
// case "L2":
|
||||
// job = "action";
|
||||
// compareDBName = "clusterl1-gumInputALL.rdb";
|
||||
// break;
|
||||
// case "L3":
|
||||
// job = "token";
|
||||
// compareDBName = "clusterl2-gumInputALL.rdb";
|
||||
// break;
|
||||
// default:
|
||||
// throw new Error("unknown level please specify L1,L2,L3");
|
||||
// }
|
||||
|
||||
|
||||
CompareTrees.main(redisPath, portDumps,dumpsName, job,numOfWorkers);
|
||||
break;
|
||||
case "PATTERN":
|
||||
ClusterToPattern.main(portDumps,redisPath, dumpsName, parameter);
|
||||
CompareTrees.main(redisPath, portDumps,dumpsName, numOfWorkers);
|
||||
break;
|
||||
// case "PATTERN":
|
||||
// ClusterToPattern.main(portDumps,redisPath, dumpsName, parameter);
|
||||
// break;
|
||||
default:
|
||||
throw new Error("unknown Job");
|
||||
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
package edu.lu.uni.serval.richedit;
|
||||
|
||||
import edu.lu.uni.serval.richedit.jobs.CompareTrees;
|
||||
import edu.lu.uni.serval.richedit.jobs.EnhancedASTDiff;
|
||||
import edu.lu.uni.serval.utils.ClusterToPattern;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Created by anilkoyuncu on 14/04/2018.
|
||||
*/
|
||||
public class Launcher2 {
|
||||
|
||||
private static Logger log = LoggerFactory.getLogger(Launcher2.class);
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
|
||||
Properties appProps = new Properties();
|
||||
|
||||
String hostname = "Unknown";
|
||||
try
|
||||
{
|
||||
InetAddress addr;
|
||||
addr = InetAddress.getLocalHost();
|
||||
hostname = addr.getHostName();
|
||||
}
|
||||
catch (UnknownHostException ex)
|
||||
{
|
||||
System.out.println("Hostname can not be resolved");
|
||||
}
|
||||
String appConfigPath;
|
||||
if (hostname.equals("Unknown")){
|
||||
appConfigPath = "src/main/resource/app.properties";
|
||||
}
|
||||
else{
|
||||
appConfigPath = "src/main/resource/"+hostname+".app.properties";
|
||||
}
|
||||
// String appConfigPath = args[0];
|
||||
appProps.load(new FileInputStream(appConfigPath));
|
||||
|
||||
String numOfWorkers = appProps.getProperty("numOfWorkers", "10");
|
||||
String portDumps = appProps.getProperty("portDumps","6399");
|
||||
String projectType = appProps.getProperty("projectType","java");
|
||||
|
||||
String hunkLimit = appProps.getProperty("hunkLimit","10");
|
||||
String patchSize = appProps.getProperty("patchSize","50");
|
||||
String projectL = appProps.getProperty("projectList","");
|
||||
String[] projectList = projectL.split(",");
|
||||
String input = appProps.getProperty("inputPath","FORKJOIN");
|
||||
String redisPath = appProps.getProperty("redisPath","FORKJOIN");
|
||||
String srcMLPath = appProps.getProperty("srcMLPath","FORKJOIN");
|
||||
|
||||
// String parameter = args[2];
|
||||
String parameter = "L1";
|
||||
// String jobType = args[1];
|
||||
// String jobType = "RICHEDITSCRIPT";
|
||||
String jobType = "COMPARE";
|
||||
|
||||
|
||||
mainLaunch( numOfWorkers, jobType, portDumps,projectType,input,redisPath,parameter, srcMLPath,hunkLimit,projectList,patchSize);
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static void mainLaunch(String numOfWorkers, String jobType, String portDumps, String projectType, String input, String redisPath,String parameter,String srcMLPath,String hunkLimit,String[] projectList,String patchSize){
|
||||
|
||||
|
||||
String dbDir;
|
||||
String dumpsName;
|
||||
String gumInput;
|
||||
|
||||
dumpsName = "dumps-"+projectType+".rdb";
|
||||
|
||||
gumInput = input;
|
||||
dbDir = redisPath;
|
||||
|
||||
|
||||
try {
|
||||
switch (jobType) {
|
||||
case "RICHEDITSCRIPT":
|
||||
EnhancedASTDiff.main(gumInput, portDumps, dbDir, dumpsName, srcMLPath,parameter,hunkLimit,projectList,patchSize,projectType);
|
||||
break;
|
||||
|
||||
case "COMPARE":
|
||||
String job;
|
||||
String compareDBName;
|
||||
switch (parameter){
|
||||
case "L1":
|
||||
// job = "shape";
|
||||
job = "single";
|
||||
compareDBName = "clusterl0-gumInputALL.rdb";
|
||||
break;
|
||||
case "L2":
|
||||
job = "action";
|
||||
compareDBName = "clusterl1-gumInputALL.rdb";
|
||||
break;
|
||||
case "L3":
|
||||
job = "token";
|
||||
compareDBName = "clusterl2-gumInputALL.rdb";
|
||||
break;
|
||||
default:
|
||||
throw new Error("unknown level please specify L1,L2,L3");
|
||||
}
|
||||
|
||||
|
||||
CompareTrees.main(redisPath, portDumps,dumpsName, job,numOfWorkers);
|
||||
break;
|
||||
case "PATTERN":
|
||||
ClusterToPattern.main(portDumps,redisPath, dumpsName, parameter);
|
||||
break;
|
||||
default:
|
||||
throw new Error("unknown Job");
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -62,12 +62,16 @@ public class EDiffHunkParser extends EDiffParser {
|
||||
ITree targetTree = EDiffHelper.getTargets(actionSet, isJava);
|
||||
ITree actionTree = EDiffHelper.getActionTrees(actionSet);
|
||||
ITree shapeTree = EDiffHelper.getShapeTree(actionSet, isJava);
|
||||
ITree tokenTree = EDiffHelper.getTokenTree(actionSet,isJava);
|
||||
String tokens = EDiffHelper.getNames2(tokenTree);
|
||||
// EDiffHelper.getTokenTree(actionSet, parent, children, tc);
|
||||
try (Jedis inner = innerPool.getResource()) {
|
||||
|
||||
inner.hset("dump", key, actionSet.toString());
|
||||
inner.hset(key, "actionTree", actionTree.toStaticHashString());
|
||||
inner.hset(key, "targetTree", targetTree.toStaticHashString());
|
||||
inner.hset(key, "shapeTree", shapeTree.toStaticHashString());
|
||||
inner.hset(key, "tokens", tokens);
|
||||
}
|
||||
// File f = new File(root+"dumps/"+astNodeType+"/"+String.valueOf(size)+"/");
|
||||
// f.mkdirs();
|
||||
|
||||
@@ -4,6 +4,7 @@ import edu.lu.uni.serval.utils.CallShell;
|
||||
import edu.lu.uni.serval.utils.EDiffHelper;
|
||||
import edu.lu.uni.serval.utils.PoolBuilder;
|
||||
import me.tongfei.progressbar.ProgressBar;
|
||||
import org.apache.commons.text.similarity.JaroWinklerDistance;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import redis.clients.jedis.Jedis;
|
||||
@@ -28,7 +29,7 @@ public class CompareTrees {
|
||||
private static Logger log = LoggerFactory.getLogger(CompareTrees.class);
|
||||
|
||||
|
||||
public static void main(String redisPath, String portDumps, String dumpsName, String job,String numOfWorkers) throws Exception {
|
||||
public static void main(String redisPath, String portDumps, String dumpsName, String numOfWorkers) throws Exception {
|
||||
|
||||
// shape /Users/anil.koyuncu/projects/test/richedit-core/python/data/redis ALLdumps-gumInput.rdb clusterl0-gumInputALL.rdb /Users/anil.koyuncu/projects/test/richedit-core/python/data/richEditScript
|
||||
|
||||
@@ -55,6 +56,7 @@ public class CompareTrees {
|
||||
|
||||
// List<String> listOfPairs = AkkaTreeParser.getMessages(innerPool,Integer.valueOf(numOfWorkers));
|
||||
HashMap<String, String> filenames = getFilenames(outerPool);
|
||||
String job= getLevel(outerPool);
|
||||
// List<String> listOfPairs = AkkaTreeParser.files2compare(outerPool);
|
||||
|
||||
|
||||
@@ -69,12 +71,13 @@ public class CompareTrees {
|
||||
}
|
||||
IntStream stream = IntStream.range(0, compare.intValue());
|
||||
|
||||
String finalJob = job;
|
||||
ProgressBar.wrap(stream.
|
||||
parallel(),"Task").
|
||||
|
||||
forEach(m ->
|
||||
{
|
||||
newCoreCompare(job, errorPairs, filenames, outerPool);
|
||||
newCoreCompare(finalJob, errorPairs, filenames, outerPool);
|
||||
}
|
||||
);
|
||||
|
||||
@@ -164,15 +167,17 @@ public class CompareTrees {
|
||||
String keyName = split[0];
|
||||
matchKey = keyName + "/" + (String.valueOf(i)) + "/" + String.valueOf(j);
|
||||
|
||||
switch (treeType) {
|
||||
case "single":
|
||||
|
||||
if (matchKey == null){
|
||||
return false;
|
||||
}
|
||||
Map<String, String> oldTreeString = EDiffHelper.getTreeString(keyName, i, outerPool, filenames);
|
||||
Map<String, String> newTreeString = EDiffHelper.getTreeString(keyName, j, outerPool, filenames);
|
||||
|
||||
switch (treeType) {
|
||||
case "single":
|
||||
|
||||
|
||||
|
||||
String oldShapeTree =oldTreeString.get("shapeTree");
|
||||
String newShapeTree =newTreeString.get("shapeTree");
|
||||
|
||||
@@ -196,6 +201,35 @@ public class CompareTrees {
|
||||
}
|
||||
}
|
||||
return true;
|
||||
case "token":
|
||||
|
||||
String oldTokens = oldTreeString.get("tokens");
|
||||
String newTokens = newTreeString.get("tokens");
|
||||
// EDiffHelper.getTokens(keyName, i, outerPool, filenames);
|
||||
// newTree = EDiffHelper.getTokens(keyName, j, outerPool, innerPool);
|
||||
|
||||
// String oldTokens = EDiffHelper.getNames2(oldTree);
|
||||
// String newTokens = EDiffHelper.getNames2(newTree);
|
||||
//
|
||||
JaroWinklerDistance jwd = new JaroWinklerDistance();
|
||||
//
|
||||
//
|
||||
Double overallSimi = Double.valueOf(0);
|
||||
//
|
||||
if (!(oldTokens.trim().isEmpty() || newTokens.trim().isEmpty())) {
|
||||
overallSimi = jwd.apply(oldTokens, newTokens);
|
||||
|
||||
}
|
||||
int retval = Double.compare(overallSimi, Double.valueOf(1));
|
||||
|
||||
if (retval >= 0) {
|
||||
try (Jedis jedis = outerPool.getResource()) {
|
||||
jedis.select(3);
|
||||
jedis.set(matchKey, "1");
|
||||
}
|
||||
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
return true;
|
||||
// break;
|
||||
@@ -212,6 +246,35 @@ public class CompareTrees {
|
||||
}
|
||||
|
||||
|
||||
public static String getLevel(JedisPool innerPool){
|
||||
|
||||
|
||||
HashMap<String, String> fileMap =new HashMap<String, String>();
|
||||
|
||||
try (Jedis inner = innerPool.getResource()) {
|
||||
while (!inner.ping().equals("PONG")){
|
||||
log.info("wait");
|
||||
}
|
||||
|
||||
inner.select(1);
|
||||
String level = inner.get("level");
|
||||
|
||||
switch (level){
|
||||
case "l1":
|
||||
return "single";
|
||||
case "l2":
|
||||
return "token";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static HashMap<String, String> getFilenames(JedisPool innerPool){
|
||||
|
||||
@@ -244,4 +307,7 @@ public class CompareTrees {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ public class EnhancedASTDiff {
|
||||
|
||||
private static Logger log = LoggerFactory.getLogger(EnhancedASTDiff.class);
|
||||
|
||||
public static void main(String inputPath, String portInner, String dbDir, String chunkName,String srcMLPath,String parameter,String hunkLimit,String[] projectList,String patchSize,String projectType) throws Exception {
|
||||
public static void main(String inputPath, String portInner, String dbDir, String chunkName,String srcMLPath,String hunkLimit,String[] projectList,String patchSize,String projectType) throws Exception {
|
||||
|
||||
|
||||
String parameters = String.format("\nInput path %s", inputPath);
|
||||
|
||||
@@ -71,15 +71,22 @@ public class EDiffHelper {
|
||||
}
|
||||
|
||||
|
||||
public static ITree getTokenTree(HierarchicalActionSet actionSet, ITree parent, ITree children,TreeContext tc){
|
||||
public static ITree getTokenTree(HierarchicalActionSet actionSet, ITree parent, ITree children,TreeContext tc,boolean isJava){
|
||||
|
||||
int newType = 0;
|
||||
|
||||
String astNodeType = actionSet.getAstNodeType();
|
||||
|
||||
String label = actionSet.getAction().toString();
|
||||
|
||||
Map<Integer, String> nodeMap;
|
||||
if(isJava){
|
||||
nodeMap = ASTNodeMap.map;
|
||||
}else{
|
||||
nodeMap = NodeMap_new.map;
|
||||
}
|
||||
// List<Integer> keysByValue = getKeysByValue(ASTNodeMap.map, astNodeType);
|
||||
List<Integer> keysByValue = getKeysByValue(NodeMap_new.map, astNodeType);
|
||||
List<Integer> keysByValue = getKeysByValue(nodeMap, astNodeType);
|
||||
|
||||
if(keysByValue.size() != 1){
|
||||
log.error("More than 1");
|
||||
@@ -102,7 +109,7 @@ public class EDiffHelper {
|
||||
if(actionSet.getParent() == null){
|
||||
children = parent;
|
||||
}
|
||||
getTokenTree(subAction,children,null,tc);
|
||||
getTokenTree(subAction,children,null,tc,isJava);
|
||||
|
||||
}
|
||||
|
||||
@@ -307,6 +314,17 @@ public class EDiffHelper {
|
||||
return tree;
|
||||
}
|
||||
|
||||
public static ITree getTokenTree(HierarchicalActionSet actionSet,boolean isJava) {
|
||||
ITree tree = null;
|
||||
ITree parent = null;
|
||||
ITree children = null;
|
||||
TreeContext tc = new TreeContext();
|
||||
tree = EDiffHelper.getTokenTree(actionSet, parent, children, tc,isJava);
|
||||
//tree.setParent(null);
|
||||
tc.validate();
|
||||
return tree;
|
||||
}
|
||||
|
||||
|
||||
public static ITree getTargets(HierarchicalActionSet actionSet,boolean isJava) {
|
||||
|
||||
@@ -420,13 +438,15 @@ public class EDiffHelper {
|
||||
// String[] split = prefix.split("-");
|
||||
// String key = split[0] + "/"+split[1]+"/" + dist2load;
|
||||
//
|
||||
// byte[] s = outer.hget("dump".getBytes(), key.getBytes());
|
||||
// actionSet = (HierarchicalActionSet) EDiffHelper.kryoDeseerialize(s);
|
||||
// String s = outer.hget("dump", key);
|
||||
// actionSet = (HierarchicalActionSet) EDiffHelper.fromString(s);
|
||||
//
|
||||
//// actionSet = (HierarchicalActionSet) EDiffHelper.kryoDeseerialize(s);
|
||||
//
|
||||
// ITree parent = null;
|
||||
// ITree children = null;
|
||||
// TreeContext tc = new TreeContext();
|
||||
// tree = EDiffHelper.getTokenTree(actionSet, parent, children, tc);
|
||||
// tree = EDiffHelper.getTokenTree(actionSet, parent, children, tc,is);
|
||||
// tree.setParent(null);
|
||||
// tc.validate();
|
||||
//// getLeaves(tree);
|
||||
|
||||
Reference in New Issue
Block a user