From 131087a3f19f6bfec26b0cbe590b220476fae31a Mon Sep 17 00:00:00 2001 From: mimic Date: Thu, 5 Mar 2020 14:02:32 +0100 Subject: [PATCH] change in richedit --- pom.xml | 6 + .../edu/lu/uni/serval/fixminer/Launcher.java | 18 +- .../fixminer/akka/compare/CompareTrees.java | 12 +- .../fixminer/akka/ediff/EDiffHunkParser.java | 40 +-- .../fixminer/akka/ediff/MessageFile.java | 10 +- .../serval/fixminer/jobs/EnhancedASTDiff.java | 250 +++--------------- src/main/resource/app.properties | 17 +- 7 files changed, 101 insertions(+), 252 deletions(-) diff --git a/pom.xml b/pom.xml index 30b2944..326e4c2 100755 --- a/pom.xml +++ b/pom.xml @@ -29,6 +29,12 @@ javatuples 1.2 + + + me.tongfei + progressbar + 0.8.1 + diff --git a/src/main/java/edu/lu/uni/serval/fixminer/Launcher.java b/src/main/java/edu/lu/uni/serval/fixminer/Launcher.java index 6f5d982..fc1240d 100755 --- a/src/main/java/edu/lu/uni/serval/fixminer/Launcher.java +++ b/src/main/java/edu/lu/uni/serval/fixminer/Launcher.java @@ -26,16 +26,16 @@ public class Launcher { // String appConfigPath = args[0]; appProps.load(new FileInputStream(appConfigPath)); -// String portInner = appProps.getProperty("portInner","6380"); String numOfWorkers = appProps.getProperty("numOfWorkers", "10"); String portDumps = appProps.getProperty("portDumps","6399"); String pjName = appProps.getProperty("pjName","allDataset"); String actionType = appProps.getProperty("actionType","ALL"); - String eDiffTimeout = appProps.getProperty("eDiffTimeout","900"); - String parallelism = appProps.getProperty("parallelism","FORKJOIN"); + String hostname = appProps.getProperty("hostname","localhost"); String hunkLimit = appProps.getProperty("hunkLimit","10"); - + String patchSize = appProps.getProperty("patchSize","50"); + String projectL = appProps.getProperty("projectList",""); + String[] projectList = projectL.split(","); String input = appProps.getProperty("inputPath","FORKJOIN"); String redisPath = appProps.getProperty("redisPath","FORKJOIN"); String srcMLPath = appProps.getProperty("srcMLPath","FORKJOIN"); @@ -58,12 +58,12 @@ public class Launcher { // // log.info(parameters); - mainLaunch( numOfWorkers, jobType, portDumps, pjName,actionType,eDiffTimeout,parallelism,input,redisPath,parameter, srcMLPath,hostname,hunkLimit); + mainLaunch( numOfWorkers, jobType, portDumps, pjName,actionType,input,redisPath,parameter, srcMLPath,hostname,hunkLimit,projectList,patchSize); } - public static void mainLaunch(String numOfWorkers, String jobType, String portDumps, String pjName, String actionType, String eDiffTimeout, String parallelism,String input, String redisPath,String parameter,String srcMLPath,String hostname,String hunkLimit){ + public static void mainLaunch(String numOfWorkers, String jobType, String portDumps, String pjName, String actionType, String input, String redisPath,String parameter,String srcMLPath,String hostname,String hunkLimit,String[] projectList,String patchSize){ String dbDir; @@ -79,11 +79,7 @@ public class Launcher { try { switch (jobType) { case "RICHEDITSCRIPT": - EnhancedASTDiff.main(gumInput, numOfWorkers, pjName, eDiffTimeout,parallelism,portDumps, dbDir, actionType+dumpsName, srcMLPath,parameter,hunkLimit); - break; - - case "LOAD": - EnhancedASTDiff.load(gumInput, numOfWorkers, pjName, eDiffTimeout,parallelism,portDumps, dbDir, actionType+dumpsName, srcMLPath,parameter); + EnhancedASTDiff.main(gumInput, pjName, portDumps, dbDir, actionType+dumpsName, srcMLPath,parameter,hunkLimit,projectList,patchSize); break; case "COMPARE": diff --git a/src/main/java/edu/lu/uni/serval/fixminer/akka/compare/CompareTrees.java b/src/main/java/edu/lu/uni/serval/fixminer/akka/compare/CompareTrees.java index 796b487..c51d1c1 100755 --- a/src/main/java/edu/lu/uni/serval/fixminer/akka/compare/CompareTrees.java +++ b/src/main/java/edu/lu/uni/serval/fixminer/akka/compare/CompareTrees.java @@ -5,6 +5,7 @@ import edu.lu.uni.serval.fixminer.akka.ediff.HierarchicalActionSet; import edu.lu.uni.serval.utils.CallShell; import edu.lu.uni.serval.utils.EDiffHelper; import edu.lu.uni.serval.utils.PoolBuilder; +import me.tongfei.progressbar.ProgressBar; import org.apache.commons.text.similarity.JaroWinklerDistance; import org.javatuples.Pair; import org.slf4j.Logger; @@ -74,8 +75,8 @@ public class CompareTrees { final Future future = executor.submit(new RunnableCompare(job, errorPairs, filenames, outerPool, i)); results.add(future); } - - for (Future future:results){ + for (Future future : ProgressBar.wrap(results, "Comparing")){ +// for (Future future:results){ try { // wait for task to complete future.get(); @@ -86,11 +87,12 @@ public class CompareTrees { } catch (ExecutionException e) { e.printStackTrace(); - } finally { - executor.shutdownNow(); } +// finally { +// executor.shutdownNow(); +// } } - + executor.shutdownNow(); diff --git a/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/EDiffHunkParser.java b/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/EDiffHunkParser.java index bf77187..03a5d41 100755 --- a/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/EDiffHunkParser.java +++ b/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/EDiffHunkParser.java @@ -18,7 +18,7 @@ import java.util.List; /** * Parse fix violations with GumTree in terms of multiple statements. - * + * * @author kui.liu * */ @@ -27,18 +27,27 @@ public class EDiffHunkParser extends EDiffParser { private static Logger logger = LoggerFactory.getLogger(EDiffHunkParser.class); @Override public void parseFixPatterns(File prevFile, File revFile, File diffentryFile, String project, JedisPool innerPool,String srcMLPath,String hunkLimit) { - List actionSets = parseChangedSourceCodeWithGumTree2(prevFile, revFile,srcMLPath); + + String datasetName = project; + String[] split1 = diffentryFile.getParent().split(datasetName); + String root = split1[0]; + String pj = split1[1].split("/")[1]; + + + List actionSets = parseChangedSourceCodeWithGumTree2(prevFile, revFile, srcMLPath); + if (actionSets != null && actionSets.size() != 0) { boolean processActionSet = true; - if (actionSets.size() > Integer.valueOf(hunkLimit)){ + if (actionSets.size() > Integer.valueOf(hunkLimit)) { processActionSet = false; - logger.debug("Skipping {} set size {}",diffentryFile.getName(),hunkLimit); + logger.debug("Skipping {} set size {}", diffentryFile.getName(), hunkLimit); } int hunkSet = 0; - if(processActionSet){ + if (processActionSet) { + for (HierarchicalActionSet actionSet : actionSets) { // FileOutputStream f = null; @@ -51,23 +60,18 @@ public class EDiffHunkParser extends EDiffParser { actionSet.toString(); int size = actionSet.getActionSize(); - String datasetName = project; - String[] split1 = diffentryFile.getParent().split(datasetName); - String root = split1[0]; - String pj = split1[1].split("/")[1]; - - String key = astNodeType+"/"+String.valueOf(size)+"/" + pj +"_" + diffentryFile.getName() + "_" + String.valueOf(hunkSet); + String key = astNodeType + "/" + String.valueOf(size) + "/" + pj + "_" + diffentryFile.getName() + "_" + String.valueOf(hunkSet); ITree targetTree = EDiffHelper.getTargets(actionSet); ITree actionTree = EDiffHelper.getActionTrees(actionSet); ITree shapeTree = EDiffHelper.getShapeTree(actionSet); try (Jedis inner = innerPool.getResource()) { - inner.hset("dump",key,actionSet.toString()); - inner.hset(key,"actionTree",actionTree.toStaticHashString()); - inner.hset(key,"targetTree",targetTree.toStaticHashString()); - inner.hset(key,"shapeTree",shapeTree.toStaticHashString()); + inner.hset("dump", key, actionSet.toString()); + inner.hset(key, "actionTree", actionTree.toStaticHashString()); + inner.hset(key, "targetTree", targetTree.toStaticHashString()); + inner.hset(key, "shapeTree", shapeTree.toStaticHashString()); } // File f = new File(root+"dumps/"+astNodeType+"/"+String.valueOf(size)+"/"); // f.mkdirs(); @@ -83,15 +87,19 @@ public class EDiffHunkParser extends EDiffParser { // oos.close(); } catch (Exception e) { - logger.error("error",e); + logger.error("error", e); // e.printStackTrace(); } hunkSet++; } + try (Jedis inner = innerPool.getResource()) { + inner.hset("diffEntry", pj + "_" + diffentryFile.getName(), "1"); + } } } + } diff --git a/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/MessageFile.java b/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/MessageFile.java index b51e597..7dcec9d 100755 --- a/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/MessageFile.java +++ b/src/main/java/edu/lu/uni/serval/fixminer/akka/ediff/MessageFile.java @@ -8,14 +8,22 @@ public class MessageFile { private File prevFile; private File diffEntryFile; private File positionFile; + + + + private String project; - public MessageFile(File revFile, File prevFile, File diffEntryFile) { + public MessageFile(File revFile, File prevFile, File diffEntryFile,String project) { super(); this.revFile = revFile; this.prevFile = prevFile; this.diffEntryFile = diffEntryFile; + this.project = project; } + public String getProject() { return project;} + + public void setProject(String project) { this.project = project;} public File getRevFile() { return revFile; } diff --git a/src/main/java/edu/lu/uni/serval/fixminer/jobs/EnhancedASTDiff.java b/src/main/java/edu/lu/uni/serval/fixminer/jobs/EnhancedASTDiff.java index e200890..e6a7b0c 100755 --- a/src/main/java/edu/lu/uni/serval/fixminer/jobs/EnhancedASTDiff.java +++ b/src/main/java/edu/lu/uni/serval/fixminer/jobs/EnhancedASTDiff.java @@ -1,26 +1,21 @@ package edu.lu.uni.serval.fixminer.jobs; -import akka.actor.ActorRef; -import akka.actor.ActorSystem; -import edu.lu.uni.serval.fixminer.akka.compare.CompareTrees; -import edu.lu.uni.serval.fixminer.akka.ediff.*; +import edu.lu.uni.serval.fixminer.akka.ediff.EDiffHunkParser; +import edu.lu.uni.serval.fixminer.akka.ediff.FileHelper; +import edu.lu.uni.serval.fixminer.akka.ediff.MessageFile; import edu.lu.uni.serval.utils.CallShell; import edu.lu.uni.serval.utils.PoolBuilder; import me.tongfei.progressbar.ProgressBar; -import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; - -import java.util.concurrent.*; +import java.util.Map; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -31,7 +26,7 @@ public class EnhancedASTDiff { private static Logger log = LoggerFactory.getLogger(EnhancedASTDiff.class); - public static void main(String inputPath, String numOfWorkers, String project, String eDiffTimeout, String parallelism, String portInner, String dbDir, String chunkName,String srcMLPath,String rootType,String hunkLimit) throws Exception { + public static void main(String inputPath, String project, String portInner, String dbDir, String chunkName,String srcMLPath,String parameter,String hunkLimit,String[] projectList,String patchSize) throws Exception { String parameters = String.format("\nInput path %s",inputPath); @@ -39,39 +34,19 @@ public class EnhancedASTDiff { CallShell cs = new CallShell(); String cmd = "bash "+dbDir + "/" + "startServer.sh" +" %s %s %s"; -// if (rootType == null){ + cmd = String.format(cmd, dbDir,chunkName,Integer.valueOf(portInner)); -// }else{ -// cmd = String.format(cmd, dbDir,rootType+chunkName,Integer.valueOf(portInner)); -// } cs.runShell(cmd, portInner); JedisPool innerPool = new JedisPool(PoolBuilder.getPoolConfig(), "127.0.0.1",Integer.valueOf(portInner),20000000); - if (rootType == "add"){ - try (Jedis inner = innerPool.getResource()) { - inner.select(2); - inner.flushDB(); - inner.select(1); - inner.flushDB(); - inner.select(0); - inner.del("compare"); - - } - + List> allPredicates = new ArrayList>(); + for (String s : projectList) { + Predicate predicate = x->x.getName().endsWith(s); + allPredicates.add(predicate); } - - Predicate predicate1 = x->x.getName().endsWith("libtiff"); - Predicate predicate2 = x->x.getName().endsWith("php-src"); - Predicate predicate3 = x->x.getName().endsWith("cpython"); - Predicate predicate4 = x->x.getName().endsWith("wireshark"); - Predicate predicate5 = x->x.getName().endsWith("gzip"); - Predicate predicate6 = x->x.getName().endsWith("gmp"); - Predicate predicate7 = x->x.getName().endsWith("lighttpd1.4"); - Predicate predicate8 = x->x.getName().endsWith("lighttpd2"); - File folder = new File(inputPath); File[] listOfFiles = folder.listFiles(); Stream stream = Arrays.stream(listOfFiles); @@ -79,15 +54,14 @@ public class EnhancedASTDiff { .filter(x -> !x.getName().startsWith(".")) .filter(x -> !x.getName().startsWith("cocci")) .filter(x -> !x.getName().endsWith(".index")) - .filter(predicate1.or(predicate2).or(predicate3).or(predicate4).or(predicate5).or(predicate6).or(predicate7).or(predicate8)) - + .filter(allPredicates.stream().reduce(x->false, Predicate::or)) .collect(Collectors.toList()); List allMessageFiles = new ArrayList<>(); for (File target : folders) { - List msgFiles = getMessageFiles(target.toString() + "/"); //"/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput/Apache/CAMEL/" + List msgFiles = getMessageFiles(target.toString() + "/",project,patchSize); //"/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput/Apache/CAMEL/" // msgFiles = msgFiles.subList(0,3000); if (msgFiles == null) @@ -97,122 +71,31 @@ public class EnhancedASTDiff { } - switch (parallelism){ - case "AKKA": - ActorSystem system = null; - ActorRef parsingActor = null; + Map diffEntry; + try (Jedis inner = innerPool.getResource()) { + diffEntry = inner.hgetAll("diffEntry"); - final EDiffMessage msg = new EDiffMessage(0, allMessageFiles,eDiffTimeout,innerPool,srcMLPath,hunkLimit); - try { - log.info("Akka begins..."); - log.info("{} files to process ...", allMessageFiles.size()); - system = ActorSystem.create("Mining-FixPattern-System"); - - parsingActor = system.actorOf(EDiffActor.props(Integer.valueOf(numOfWorkers), project), "mine-fix-pattern-actor"); - parsingActor.tell(msg, ActorRef.noSender()); - } catch (Exception e) { - system.shutdown(); - e.printStackTrace(); - }finally { - system.awaitTermination(); -// system.shutdown(); - } - break; - case "FORKJOIN": - int counter = new Object() { - int counter = 0; - - { - allMessageFiles.stream(). - parallel(). - peek(x -> counter++). - forEach(m -> - { - EDiffHunkParser parser = new EDiffHunkParser(); - parser.parseFixPatterns(m.getPrevFile(),m.getRevFile(), m.getDiffEntryFile(),project,innerPool,srcMLPath,hunkLimit); - if (counter % 10 == 0) { - log.info("Finalized parsing " + counter + " files... remaining " + (allMessageFiles.size() - counter)); - } - } - ); - } - }.counter; - log.info("Finished parsing {} files",counter); - break; - - - default: -// ProgressBar.wrap(allMessageFiles.stream(). -// parallel(),"Task"). -// forEach(m -> -// { -// EDiffHunkParser parser = new EDiffHunkParser(); -// parser.parseFixPatterns(m.getPrevFile(),m.getRevFile(), m.getDiffEntryFile(),project,innerPool,srcMLPath,hunkLimit); -//// if (counter % 10 == 0) { -//// log.info("Finalized parsing " + counter + " files... remaining " + (allMessageFiles.size() - counter)); -//// } -// } -// ); - - - Integer numberOfWorkers = Integer.valueOf(numOfWorkers); - final ExecutorService executor = Executors.newWorkStealingPool(numberOfWorkers); - ArrayList> results = new ArrayList>(); - for (MessageFile msgFile : allMessageFiles) { - File revFile = msgFile.getRevFile(); - File prevFile = msgFile.getPrevFile(); - File diffentryFile = msgFile.getDiffEntryFile(); - - - - // schedule the work -// log.info("Starting job {}",i); -// final Future future = executor.submit(new CompareTrees.RunnableCompare(job, errorPairs, filenames, outerPool, i)); - EDiffHunkParser parser = new EDiffHunkParser(); - final Future future = executor.submit(new RunnableParser(prevFile, revFile, diffentryFile, parser,project,innerPool,srcMLPath,hunkLimit)); - results.add(future); - } - try(ProgressBar compare = new ProgressBar("Compare", allMessageFiles.size())){ -// for (Future future : ProgressBar.wrap(results, "Comparing")){ - for (Future future:results){ - try { - // wait for task to complete -// future.get(); - future.get(new Long(eDiffTimeout), TimeUnit.SECONDS); - compare.step(); - } catch (TimeoutException e) { - future.cancel(true); - compare.step(); - - } catch (InterruptedException e) { - - e.printStackTrace(); - } catch (ExecutionException e) { - - e.printStackTrace(); - } -// finally { -// executor.shutdownNow(); -// } - } - } - executor.shutdownNow(); - - - - log.error("Unknown parallelism {}", parallelism); - break; } - - - - + log.info("{} files to process ...", allMessageFiles.size()); + if (diffEntry != null) { + log.info("{} files already process ...", diffEntry.size()); + allMessageFiles = allMessageFiles.stream().filter(f -> !diffEntry.containsKey(f.getProject() + "_" + f.getDiffEntryFile().getName())).collect(Collectors.toList()); + log.info("{} files to process ...", allMessageFiles.size()); + } + ProgressBar.wrap(allMessageFiles.stream(). + parallel(),"Task"). + forEach(m -> + { + EDiffHunkParser parser = new EDiffHunkParser(); + parser.parseFixPatterns(m.getPrevFile(),m.getRevFile(), m.getDiffEntryFile(),project,innerPool,srcMLPath,hunkLimit); + } + ); } - private static List getMessageFiles(String gumTreeInput) { + private static List getMessageFiles(String gumTreeInput,String datasetName,String patchSize) { String inputPath = gumTreeInput; // prevFiles revFiles diffentryFile positionsFile File revFilesPath = new File(inputPath + "revFiles/"); log.info(revFilesPath.getPath()); @@ -234,12 +117,18 @@ public class EnhancedASTDiff { int count = 0; while (matcher.find()) count++; - if(count>51) + if(count>= Integer.valueOf(patchSize)) // if(count>201) continue; // if(FileHelper.readFile(diffentryFile).split("@@\\s\\-\\d+,*\\d*\\s\\+\\d+,*\\d*\\s@@").length > 2) // continue; - MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile); + +// String datasetName = project; + String[] split1 = diffentryFile.getParent().split(datasetName); + String root = split1[0]; + String pj = split1[1].split("/")[1]; + + MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile,pj); msgFiles.add(msgFile); @@ -251,69 +140,6 @@ public class EnhancedASTDiff { } } - public static void load(String inputPath, String numOfWorkers, String project, String eDiffTimeout, String parallelism, String portInner, String dbDir, String chunkName,String srcMLPath,String rootType) throws Exception { - - - String parameters = String.format("\nInput path %s",inputPath); - log.info(parameters); - - CallShell cs = new CallShell(); - String cmd = "bash "+dbDir + "/" + "startServer.sh" +" %s %s %s"; - cmd = String.format(cmd, dbDir,chunkName,Integer.valueOf(portInner)); -// if (rootType == null){ -// cmd = String.format(cmd, dbDir,chunkName,Integer.valueOf(portInner)); -// }else{ -// cmd = String.format(cmd, dbDir,rootType+chunkName,Integer.valueOf(portInner)); -// } - - cs.runShell(cmd, portInner); - - JedisPool innerPool = new JedisPool(PoolBuilder.getPoolConfig(), "127.0.0.1",Integer.valueOf(portInner),20000000); - try (Jedis inner = innerPool.getResource()) { - inner.flushAll(); - } - File folder = new File(new File(inputPath).getParent() + "/dumps/" + rootType); - File[] listOfFiles = folder.listFiles(); - Stream stream = Arrays.stream(listOfFiles); - List folders = stream - .filter(x -> !x.getName().startsWith(".")) - .collect(Collectors.toList()); - List allMessageFiles = new ArrayList<>(); - - for (File target : folders) { - if(target.getName().startsWith(".")) - continue; - List files = Arrays.asList(target.listFiles()); - if (files.size() > 1){ - allMessageFiles.addAll(files); - } - } - - log.info("Message size: "+allMessageFiles.size()); - - allMessageFiles.stream(). - parallel(). - forEach(x-> loadCore(x,innerPool)); - - } - - public static void loadCore(File file2load, JedisPool innerPool){ - try (Jedis inner = innerPool.getResource()) { - -// byte[] dump = Files.readAllBytes(Paths.get(file2load.getPath())); - byte[] dump = FileUtils.readFileToByteArray(file2load); -// HierarchicalActionSet actionSet = (HierarchicalActionSet) EDiffHelper.kryoDeseerialize(dump); - String key = file2load.getPath().split("/dumps/")[1]; - inner.hset("dump".getBytes(),key.getBytes(),dump); -// actionSet.toString(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - } - } diff --git a/src/main/resource/app.properties b/src/main/resource/app.properties index 3970991..06c5cb0 100755 --- a/src/main/resource/app.properties +++ b/src/main/resource/app.properties @@ -1,20 +1,23 @@ -pjName = gumInput -portInner = 6380 +pjName = patches portDumps = 6399 -parallelism = AKKA numOfWorkers = 14 hostname = localhost hunkLimit = 10 +patchSize = 50 + +projectList = libtiff,php-src,cpython,wireshark,gzip,gmp,lighttpd1.4,lighttpd2 #inputPath = /Users/anilkoyuncu/projects/gumInputLinux -inputPath = /Users/anil.koyuncu/projects/fixminer/fixminer-data/gumInputLinux +#inputPath = /Users/anil.koyuncu/projects/fixminer/fixminer-data/gumInputLinux +inputPath = /Users/anil.koyuncu/projects/test/fixminer-data/patches #redisPath = /Users/anil.koyuncu/projects/fixminer/fixminer-core/python/data/redis -redisPath = /Users/anil.koyuncu/projects/fixminer/fixminer-core/python/data/redis -srcMLPath= /Users/anil.koyuncu/Downloads/srcML.0.9.5/bin/srcml +#redisPath = /Users/anil.koyuncu/projects/fixminer/fixminer-core/python/data/redis +redisPath = /Users/anil.koyuncu/projects/test/fixminer-core/python/data/redis +#srcMLPath= /Users/anil.koyuncu/Downloads/srcML.0.9.5/bin/srcml +srcMLPath= /Users/anil.koyuncu/projects/test/srcML/bin/srcml #srcMLPath= /usr/local/bin/srcml actionType = ALL -eDiffTimeout = 900 #ENHANCEDASTDIFF,CACHE,LEVEL1,LEVEL2,LEVEL3