diff --git a/FixPatternMiner.iml b/FixPatternMiner.iml index 1f288a8..2df3bc9 100644 --- a/FixPatternMiner.iml +++ b/FixPatternMiner.iml @@ -42,7 +42,6 @@ - diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java index 29ea314..d7f7dfb 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java @@ -36,7 +36,8 @@ public abstract class Parser implements ParserInterface { protected List parseChangedSourceCodeWithGumTree(File prevFile, File revFile) { List actionSets = new ArrayList<>(); // GumTree results - List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile); + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); +// List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile); if (gumTreeResults != null && gumTreeResults.size() > 0) { // Regroup GumTre results. List allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java index 9ea9bb8..74aa543 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java @@ -49,17 +49,17 @@ public class FixedViolationHunkParser extends FixedViolationParser { // type = "#NoStatementChange:"; // } } else { - List diffentryHunks = new DiffEntryReader().readHunks2(diffentryFile); + List diffentryHunks = new DiffEntryReader().readHunks3(diffentryFile); //Filter out the modify actions, which are not in the DiffEntry hunks. HunkActionFilter hunkFilter = new HunkActionFilter(); - List selectedPatchHunks = hunkFilter.matchActionsByDiffEntryForC(diffentryHunks, actionSets); - + List selectedPatchHunks = hunkFilter.filterActionsByModifiedRange2(diffentryHunks, actionSets, revFile, prevFile); + for (DiffEntryHunk patchHunk : selectedPatchHunks) { - List hunkActionSets = patchHunk.getActionSets(); + List hunkActionSets = patchHunk.getActionSets(); // multiple UPD, and some UPD contain other UPD. removeOverlapperdUPD(hunkActionSets); - + // Range of buggy source code int bugStartLine = 0; int bugEndLine = 0; @@ -69,7 +69,6 @@ public class FixedViolationHunkParser extends FixedViolationParser { int bugEndPosition = 0; int fixEndPosition = 0; for (HierarchicalActionSet hunkActionSet : hunkActionSets) { - //TODO FIX ME int actionBugStart = hunkActionSet.getBugStartLineNum(); int actionBugEnd = hunkActionSet.getBugEndLineNum(); int actionFixStart = hunkActionSet.getFixStartLineNum(); @@ -93,13 +92,18 @@ public class FixedViolationHunkParser extends FixedViolationParser { fixEndPosition = hunkActionSet.getFixEndPosition(); } } - + if (fixStartLine == 0 && bugStartLine == 0) { this.unfixedViolations += "#WRONG: " + revFile.getName() + ":" + patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize() + "\n"; this.nullMappingGumTreeResult ++; continue; } - + + if (fixStartLine == 0 && bugStartLine != 0) {// pure delete actions. + // get the exact buggy code by violation's position. TODO later + } + +// if (children.size() == 0) continue; boolean isPureInsert = false; if (bugStartLine == 0 && patchHunk.getBugLineStartNum() > 0) { bugStartLine = patchHunk.getBugLineStartNum(); @@ -107,13 +111,15 @@ public class FixedViolationHunkParser extends FixedViolationParser { isPureInsert = true; // continue; } - if ((bugEndLine - bugStartLine > Configuration.HUNK_SIZE && !isPureInsert) || fixEndLine - fixStartLine > Configuration.HUNK_SIZE) { -// continue; //TODO filter out the +// if ((bugEndLine - bugStartLine > Configuration.HUNK_SIZE ) || fixEndLine - fixStartLine > Configuration.HUNK_SIZE) { +// continue; //TODO hunk size +// } + if(patchHunk.getBuggyHunkSize() > Configuration.HUNK_SIZE || patchHunk.getFixedHunkSize() > Configuration.HUNK_SIZE){ + continue; } - - + /** - * Select edit scripts for deep learning. + * Select edit scripts for deep learning. * Edit scripts will be used to mine common fix patterns. */ // 1. First level: AST node type. @@ -122,8 +128,8 @@ public class FixedViolationHunkParser extends FixedViolationParser { System.err.println("===+++===: " + revFile.getName() + ":" +patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize()); } // 2. source code: raw tokens - // 3. abstract identifiers: - // 4. semi-source code: + // 3. abstract identifiers: + // 4. semi-source code: String[] editScriptTokens = astEditScripts.split(" "); int size = editScriptTokens.length; if (size == 1) { @@ -131,17 +137,21 @@ public class FixedViolationHunkParser extends FixedViolationParser { this.unfixedViolations += "#NullMatchedGumTreeResult1:" + revFile.getName() + ":" + patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize() + "\n"; continue; } - + String patchPosition = "\n" + revFile.getName() + "\n@@ -" + bugStartLine + ", " + bugEndLine + " +" + fixStartLine + ", " + fixEndLine + "@@\n"; String info = Configuration.PATCH_SIGNAL + "\n" + patchPosition + patchHunk.getHunk() + "\nAST Diff###:\n" + getAstEditScripts(hunkActionSets, bugEndPosition, fixEndPosition) + "\n"; +//TODO uncomment the line below for more detailed gumtree input. + // String info = Configuration.PATCH_SIGNAL + "\n" + patchPosition + patchHunk.getHunk() + "\nAST Diff###:\n" + getAstEditScripts(hunkActionSets) + "\n"; +// if (noUpdate(editScriptTokens)) { +// } this.patchesSourceCode += info; this.sizes += size + "\n"; this.astEditScripts += astEditScripts + "\n"; - -// SimpleTree simpleTree = getBuggyCodeTree(patchHunk, bugEndPosition, prevFile, bugStartLine, bugEndLine); -// String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim(); -// this.tokensOfSourceCode += tokens + "\n"; + + SimpleTree simpleTree = getBuggyCodeTree(patchHunk, bugEndPosition, prevFile, bugStartLine, bugEndLine); + String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim(); + this.tokensOfSourceCode += tokens + "\n"; } } } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java index 272560a..a94c1c0 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java @@ -12,19 +12,18 @@ import com.github.gumtreediff.actions.model.Action; import edu.lu.uni.serval.FixPatternParser.Parser; import edu.lu.uni.serval.gumtree.GumTreeComparer; import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; - -import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouperForC; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; import edu.lu.uni.serval.utils.FileHelper; import edu.lu.uni.serval.utils.ListSorter; /** * Parse fix patterns with GumTree. - * + * * @author kui.liu * */ public class FixedViolationParser extends Parser { - + /* * ResultType: * 0: normal GumTree results. @@ -34,16 +33,16 @@ public class FixedViolationParser extends Parser { * 4: useless violations */ public int resultType = 0; - + protected String violationTypes = ""; - + @Override public void parseFixPatterns(File prevFile, File revFile, File diffentryFile) { } - + /** * Regroup GumTree results without remove the modification of variable names. - * + * * @param prevFile * @param revFile * @return @@ -51,7 +50,7 @@ public class FixedViolationParser extends Parser { protected List parseChangedSourceCodeWithGumTree2(File prevFile, File revFile) { List actionSets = new ArrayList<>(); // GumTree results - List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile); + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); if (gumTreeResults == null) { this.resultType = 1; return null; @@ -60,7 +59,7 @@ public class FixedViolationParser extends Parser { return actionSets; } else { // Regroup GumTre results. - List allActionSets = new HierarchicalRegrouperForC().regroupGumTreeResults(gumTreeResults); + List allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); // for (HierarchicalActionSet actionSet : allActionSets) { // String astNodeType = actionSet.getAstNodeType(); // if (astNodeType.endsWith("Statement") || "FieldDeclaration".equals(astNodeType)) { diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java index 341ce17..9837afb 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java @@ -6,111 +6,132 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Scanner; +import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import java.util.stream.Stream; import edu.lu.uni.serval.FixPatternParser.RunnableParser; import edu.lu.uni.serval.MultipleThreadsParser.MessageFile; import edu.lu.uni.serval.config.Configuration; import edu.lu.uni.serval.utils.FileHelper; +import org.eclipse.jgit.revwalk.RevCommit; public class TestHunkParser { public static void main(String[] args) { // input data - String pjName = "linux"; - final List msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT +pjName +"/"); - System.out.println(msgFiles.size()); + File folder = new File("/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput"); + File[] listOfFiles = folder.listFiles(); + Stream stream = Arrays.stream(listOfFiles); + List folders = stream + .filter(x -> !x.getName().startsWith(".")) + .collect(Collectors.toList()); - // output path - final String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE; - final String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE; - final String buggyTokensFilePath = Configuration.BUGGY_CODE_TOKENS_FILE; - final String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE; - final String alarmTypesFilePath = Configuration.ALARM_TYPES_FILE; + List targetList = new ArrayList(); + for (File f:folders){ + for(File f1 :f.listFiles()){ + if (!f1.getName().startsWith(".")){ + targetList.add(f1); + } + } + } -// FileHelper.deleteDirectory(editScriptsFilePath); -// FileHelper.deleteDirectory(patchesSourceCodeFilePath); -// FileHelper.deleteDirectory(buggyTokensFilePath); -// FileHelper.deleteDirectory(editScriptSizesFilePath); -// FileHelper.deleteDirectory(alarmTypesFilePath); - - StringBuilder astEditScripts = new StringBuilder(); - StringBuilder tokens = new StringBuilder(); - StringBuilder sizes = new StringBuilder(); - StringBuilder patches = new StringBuilder(); - StringBuilder alarmTypes = new StringBuilder(); - - int a = 0; + for (File target : targetList) { + final List msgFiles = getMessageFiles(target.toString() + "/"); //"/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput/Apache/CAMEL/" + System.out.println(msgFiles.size()); + if(msgFiles.size() == 0) + continue; + String pjName = target.getName(); + // output path + String GUM_TREE_OUTPUT = Configuration.ROOT_PATH + pjName + "/"; + final String editScriptsFilePath = GUM_TREE_OUTPUT + "editScripts.list"; + final String patchesSourceCodeFilePath =GUM_TREE_OUTPUT + "patchSourceCode.list"; + final String buggyTokensFilePath = GUM_TREE_OUTPUT + "tokens.list"; + final String editScriptSizesFilePath = GUM_TREE_OUTPUT + "editScriptSizes.csv"; + final String alarmTypesFilePath = GUM_TREE_OUTPUT + "alarmTypes.list"; + + + + FileHelper.deleteDirectory(editScriptsFilePath); + FileHelper.deleteDirectory(patchesSourceCodeFilePath); + FileHelper.deleteDirectory(buggyTokensFilePath); + FileHelper.deleteDirectory(editScriptSizesFilePath); + FileHelper.deleteDirectory(alarmTypesFilePath); + + StringBuilder astEditScripts = new StringBuilder(); + StringBuilder tokens = new StringBuilder(); + StringBuilder sizes = new StringBuilder(); + StringBuilder patches = new StringBuilder(); + StringBuilder alarmTypes = new StringBuilder(); + + int a = 0; // int counter = 0; - for (MessageFile msgFile : msgFiles) { - FixedViolationHunkParser parser = new FixedViolationHunkParser(); - - final ExecutorService executor = Executors.newSingleThreadExecutor(); - // schedule the work - final Future future = executor.submit(new RunnableParser(msgFile.getPrevFile(), - msgFile.getRevFile(), msgFile.getDiffEntryFile(), parser)); - try { - // where we wait for task to complete - future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS); - String editScripts = parser.getAstEditScripts(); - if (!editScripts.equals("")) { - astEditScripts.append(editScripts); - tokens.append(parser.getTokensOfSourceCode()); - sizes.append(parser.getSizes()); - patches.append(parser.getPatchesSourceCode()); - alarmTypes.append(parser.getAlarmTypes()); + for (MessageFile msgFile : msgFiles) { + FixedViolationHunkParser parser = new FixedViolationHunkParser(); + + final ExecutorService executor = Executors.newSingleThreadExecutor(); + // schedule the work + final Future future = executor.submit(new RunnableParser(msgFile.getPrevFile(), + msgFile.getRevFile(), msgFile.getDiffEntryFile(), parser)); + try { + // where we wait for task to complete + future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS); + String editScripts = parser.getAstEditScripts(); + if (!editScripts.equals("")) { + astEditScripts.append(editScripts); + tokens.append(parser.getTokensOfSourceCode()); + sizes.append(parser.getSizes()); + patches.append(parser.getPatchesSourceCode()); + alarmTypes.append(parser.getAlarmTypes()); + + a++; + if (a % 10 == 0) { + FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); + FileHelper.outputToFile(buggyTokensFilePath, tokens, true); + FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); + FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); + FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); + astEditScripts.setLength(0); + tokens.setLength(0); + sizes.setLength(0); + patches.setLength(0); + alarmTypes.setLength(0); + System.out.println("Finish of parsing " + a + " files......"); + } + } + } catch (TimeoutException e) { + err.println("task timed out"); + future.cancel(true /* mayInterruptIfRunning */); + } catch (InterruptedException e) { + err.println("task interrupted"); + } catch (ExecutionException e) { + err.println("task aborted"); + } finally { + executor.shutdownNow(); + } + } + + FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); + FileHelper.outputToFile(buggyTokensFilePath, tokens, true); + FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); + FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); + FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); + astEditScripts.setLength(0); + tokens.setLength(0); + sizes.setLength(0); + patches.setLength(0); + alarmTypes.setLength(0); + System.out.println(a); - a ++; - if (a % 100 == 0) { - FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); - FileHelper.outputToFile(buggyTokensFilePath, tokens, true); - FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); - FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); - FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); - astEditScripts.setLength(0); - tokens.setLength(0); - sizes.setLength(0); - patches.setLength(0); - alarmTypes.setLength(0); - System.out.println("Finish of parsing " + a + " files......"); - } - } - } catch (TimeoutException e) { - err.println("task timed out"); - future.cancel(true /* mayInterruptIfRunning */ ); - } catch (InterruptedException e) { - err.println("task interrupted"); - } catch (ExecutionException e) { - err.println("task aborted"); - } finally { - executor.shutdownNow(); - } - } - - FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); - FileHelper.outputToFile(buggyTokensFilePath, tokens, true); - FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); - FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); - FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); - astEditScripts.setLength(0); - tokens.setLength(0); - sizes.setLength(0); - patches.setLength(0); - alarmTypes.setLength(0); - System.out.println(a); - // classifyByAlarmTypes(); + } } @@ -119,21 +140,25 @@ public class TestHunkParser { File revFilesPath = new File(inputPath + "revFiles/"); File[] revFiles = revFilesPath.listFiles(); // project folders List msgFiles = new ArrayList<>(); - - for (File revFile : revFiles) { + if (revFiles.length >= 0) { + for (File revFile : revFiles) { // if (revFile.getName().endsWith(".java")) { - String fileName = revFile.getName(); - File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file - fileName = fileName.replace(".java", ".txt"); - File diffentryFile = new File(gumTreeInput + "diffentries/" + fileName); // DiffEntry file - File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file - MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile); - msgFile.setPositionFile(positionFile); - msgFiles.add(msgFile); + String fileName = revFile.getName(); + File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file + fileName = fileName.replace(".java", ".txt"); + File diffentryFile = new File(gumTreeInput + "DiffEntries/" + fileName); // DiffEntry file + File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file + MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile); + msgFile.setPositionFile(positionFile); + msgFiles.add(msgFile); // } - } - - return msgFiles; + } + + return msgFiles; + } + else{ + return null; + } } public static void classifyByAlarmTypes() { diff --git a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser2.java b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser2.java index a7df66c..016746a 100644 --- a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser2.java +++ b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser2.java @@ -52,8 +52,8 @@ public class AkkaParser2 { // String GIT_REPOSITORY_PATH = "/Users/anilkoyuncu/bugLocalizationStudy/irblsensitivity/data/" + pjPath + "/" + pjName + "/gitrepo/.git"; log.info(pjName); // input data - log.info("Get the input data..." + Configuration.GUM_TREE_INPUT +pjName +"/"); - final List msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT +pjName +"/"); + log.info("Get the input data..." + Configuration.GUM_TREE_INPUT ); + final List msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT); log.info("MessageFiles: " + msgFiles.size()); // output path diff --git a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java index c0d9693..0e4806b 100644 --- a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java +++ b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; +import java.util.regex.Pattern; import edu.lu.uni.serval.utils.FileHelper; @@ -101,6 +102,7 @@ public class DiffEntryReader { while ((line = reader.readLine()) != null) { if (RegExp.filterSignal(line.trim())) { +// line = Pattern.compile("^@@\\s\\-\\d+,*\\d*\\s\\+\\d+,*\\d*\\s@@").split(line)[1]; sourceCode = true; if (hunk.length() > 0) { if (startLine > 0) { diff --git a/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java b/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java index ebcf9a8..b1d017d 100644 --- a/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java +++ b/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java @@ -9,7 +9,7 @@ public class RegExp { public static boolean filterSignal(String string) { boolean flag = false; - + Matcher res = pattern.matcher(string); if (res.matches()) { flag = true;