diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/CUCreator.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/CUCreator.java new file mode 100644 index 0000000..f495837 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/CUCreator.java @@ -0,0 +1,59 @@ +package edu.lu.uni.serval.FixPatternParser; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; + +import org.eclipse.jdt.core.dom.AST; +import org.eclipse.jdt.core.dom.ASTParser; +import org.eclipse.jdt.core.dom.CompilationUnit; + +/** + * Creator of a CompilationUnit. + * + * @author kui.liu + * + */ +public class CUCreator { + + public CompilationUnit createCompilationUnit(File javaFile) { + CompilationUnit unit = null; + try { + char[] sourceCode = readFileToCharArray(new FileReader(javaFile)); + ASTParser parser = createASTParser(sourceCode); + parser.setKind(ASTParser.K_COMPILATION_UNIT); + unit = (CompilationUnit) parser.createAST(null); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + return unit; + } + + private ASTParser createASTParser(char[] javaCode) { + ASTParser parser = ASTParser.newParser(AST.JLS8); + parser.setSource(javaCode); + + return parser; + } + + private char[] readFileToCharArray(FileReader fileReader) throws IOException { + StringBuilder fileData = new StringBuilder(); + BufferedReader br = new BufferedReader(fileReader); + + char[] buf = new char[10]; + int numRead = 0; + while ((numRead = br.read(buf)) != -1) { + String readData = String.valueOf(buf, 0, numRead); + fileData.append(readData); + buf = new char[1024]; + } + br.close(); + + return fileData.toString().toCharArray(); + } +} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/HunkParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/HunkParser.java new file mode 100644 index 0000000..4cb8628 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/HunkParser.java @@ -0,0 +1,340 @@ +package edu.lu.uni.serval.FixPatternParser; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.config.Configuration; +import edu.lu.uni.serval.diffentry.DiffEntryHunk; +import edu.lu.uni.serval.diffentry.DiffEntryReader; +import edu.lu.uni.serval.gumtree.GumTreeComparer; +import edu.lu.uni.serval.gumtree.regroup.ActionFilter; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; +import edu.lu.uni.serval.gumtree.regroup.HunkActionFilter; +import edu.lu.uni.serval.gumtree.regroup.HunkFixPattern; +import edu.lu.uni.serval.gumtree.regroup.SimpleTree; +import edu.lu.uni.serval.gumtree.regroup.SimplifyTree; + +/** + * Parse fix patterns with GumTree. + * + * @author kui.liu + * + */ +public class HunkParser { + + private String astEditScripts = ""; // it will be used for fix patterns mining. + private String patchesSourceCode = ""; // testing + private String buggyTrees = ""; // Compute similarity for bug localization. + private String sizes = ""; // fix patterns' selection before mining. + private String tokensOfSourceCode = ""; // Compute similarity for bug localization. + private String originalTree = ""; // Guide of generating patches. + private String actionSets = ""; // Guide of generating patches. + + public void parseFixPatterns(File prevFile, File revFile, File diffEntryFile) throws FileNotFoundException, IOException { + + // GumTree results + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); + + if (gumTreeResults != null && gumTreeResults.size() > 0) { + List actionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); + + ActionFilter filter = new ActionFilter(); + // Filter out modified actions of changing method names, method parameters, variable names and field names in declaration part. + // TODO: variable effects range, sub-actions are these kinds of modification? + List allActionSets = filter.filterOutUselessActions(actionSets); + + // DiffEntry size: filter out big hunks. + List diffentryHunks = new DiffEntryReader().readHunks(diffEntryFile); + //Filter out the modify actions, which are not in the DiffEntry hunks. + HunkActionFilter hunkFilter = new HunkActionFilter(); + List allHunkFixPatternss = hunkFilter.filterActionsByDiffEntryHunk2(diffentryHunks, allActionSets, revFile, prevFile); + + for (HunkFixPattern hunkFixPattern : allHunkFixPatternss) { + /* + * Convert the ITree of buggy code to a simple tree. + * It will be used to compute the similarity. + */ + List hunkActionSets = hunkFixPattern.getHunkActionSets(); + SimpleTree simpleTree = new SimpleTree(); + simpleTree.setLabel("Block"); + simpleTree.setNodeType("Block"); + List children = new ArrayList<>(); + String astEditScripts = ""; + for (HierarchicalActionSet hunkActionSet : hunkActionSets) { + SimplifyTree abstractIdentifier = new SimplifyTree(); + abstractIdentifier.abstractTree(hunkActionSet); + SimpleTree simpleT = hunkActionSet.getSimpleTree(); + if (simpleTree == null) { // Failed to get the simple tree for INS actions. + continue; + } + children.add(simpleT); + + /** + * Select edit scripts for deep learning. + * Edit scripts will be used to mine common fix patterns. + */ + // 1. First level: AST node type. + astEditScripts += getASTEditScripts(hunkActionSet); + // 2. source code: raw tokens + // 3. abstract identifiers: + // 4. semi-source code: + } + simpleTree.setChildren(children); + simpleTree.setParent(null); + + int size = astEditScripts.split(" ").length; + this.sizes += size + "\n"; + this.astEditScripts += astEditScripts + "\n"; + + this.buggyTrees += Configuration.BUGGY_TREE_TOKEN + "\n" + simpleTree.toString() + "\n"; + this.tokensOfSourceCode += getTokensDeepFirst(simpleTree).trim() + "\n"; +// this.actionSets += Configuration.BUGGY_TREE_TOKEN + "\n" + readActionSet(actionSet, "") + "\n"; +// this.originalTree += Configuration.BUGGY_TREE_TOKEN + "\n" + actionSet.getOriginalTree().toString() + "\n"; + + // Source Code of patches. +// String patchSourceCode = getPatchSourceCode(sourceCode, startLineNum, endLineNum, startLineNum2, +// endLineNum2); +// if (patchSourceCode == null) continue; +// patchesSourceCode += "PATCH###\n" + patchSourceCode; +// patchesSourceCode += actionSet.toString() + "\n"; + } + } + } + + private String readActionSet(HierarchicalActionSet actionSet, String line) { + String str = line + actionSet.getActionString() + "\n"; + List subActions = actionSet.getSubActions(); + for (HierarchicalActionSet subAction : subActions) { + str += readActionSet(subAction, line + "---"); + } + return str; + } + + private String getTokensDeepFirst(SimpleTree simpleTree) { + String tokens = ""; + List children = simpleTree.getChildren(); + String astNodeType = simpleTree.getNodeType(); + if ("AssertStatement".equals(astNodeType) || "DoStatement".equals(astNodeType) + || "ForStatement".equals(astNodeType) || "IfStatement".equals(astNodeType) + || "ReturnStatement".equals(astNodeType) || "SwitchStatement".equals(astNodeType) + || "SynchronizedStatement".equals(astNodeType) || "ThrowStatement".equals(astNodeType) + || "TryStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType)) { + String label = simpleTree.getLabel(); + label = label.substring(0, label.indexOf("S")).toLowerCase(); + tokens += label + " "; + } else if ("EnhancedForStatement".equals(astNodeType)) { + tokens += "for "; + } else if ("CatchClause".equals(astNodeType)) { + tokens += "catch "; + } else if ("SwitchCase".equals(astNodeType)) { + tokens += "case "; + } else if ("SuperConstructorInvocation".equals(astNodeType)) { + tokens += "super "; + } else if ("ConstructorInvocation".equals(astNodeType)) { + tokens += "this "; + } else if ("FinallyBody".equals(astNodeType)) { + tokens += "finally "; + } + + if (children.isEmpty()) { + tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " "; + } else { + for (SimpleTree child : children) { + tokens += getTokensDeepFirst(child); + } + } + return tokens; + } + + private String getSemiSourceCodeEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private String getAbstractIdentifiersEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private String getRawTokenEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private int getEndPosition(List children) { + int endPosition = 0; + for (ITree child : children) { + if (child.getLabel().endsWith("Body")) { + endPosition = child.getPos() - 1; + break; + } + } + return endPosition; + } + + private String getPatchSourceCode(String sourceCode, int startLineNum, int endLineNum, int startLineNum2, int endLineNum2) { + String buggyStatements = ""; + String fixedStatements = ""; + BufferedReader reader = null; + try { + reader = new BufferedReader(new StringReader(sourceCode)); + String line = null; + int startLine = 0; + int counter = 0; + int range = 0; + int startLine2 = 0; + int counter2 = 0; + int range2 = 0; + int counter3 = 0; // counter of non-buggy code line. + while ((line = reader.readLine()) != null) { + if (startLine == 0 && line.startsWith("@@ -")) { + // RegExp.filterSignal(line) + int plusIndex = line.indexOf("+"); + String lineNum = line.substring(4, plusIndex); + String[] nums = lineNum.split(","); + if (nums.length != 2) { + continue; + } + startLine = Integer.parseInt(nums[0].trim()); + range = Integer.parseInt(nums[1].trim()); + if (startLine > endLineNum) { + return null; // Wrong Matching. + } + if (startLine + range < startLineNum) { + startLine = 0; + continue; + } + String lineNum2 = line.substring(plusIndex) .trim(); + lineNum2 = lineNum2.substring(1, lineNum2.length() - 2); + String[] nums2 = lineNum2.split(","); + if (nums2.length != 2) { + startLine = 0; + range = 0; + continue; + } + startLine2 = Integer.parseInt(nums2[0].trim()); + range2 = Integer.parseInt(nums2[1].trim()); + continue; + } + + int lineNum1 = counter + counter3; + int lineNum2 = counter2 + counter3; + if (startLine > 0 && startLine2 > 0 && lineNum1 < range && lineNum2 < range2) { + if (line.startsWith("-") && startLine + lineNum1 >= startLineNum && startLine + lineNum1 <= endLineNum) { + buggyStatements += line + "\n"; + } else if (line.startsWith("+") && startLine2 + lineNum2 >= startLineNum2 && startLine2 + lineNum2 <= endLineNum2) { + fixedStatements += line + "\n"; + } + if (line.startsWith("-")) { + counter ++; + } else if (line.startsWith("+")) { + counter2 ++; + } else { + counter3 ++; + } + } + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + if (reader != null) { + reader.close(); + reader = null; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + return buggyStatements + fixedStatements; + } + + /** + * Get the AST node based edit script of patches in terms of breadth first. + * + * @param actionSet + * @return + */ + private String getASTEditScripts(HierarchicalActionSet actionSet) { + String editScript = ""; + + List actionSets = new ArrayList<>(); + actionSets.add(actionSet); + while (actionSets.size() != 0) { + List subSets = new ArrayList<>(); + for (HierarchicalActionSet set : actionSets) { + subSets.addAll(set.getSubActions()); + String actionStr = set.getActionString(); + int index = actionStr.indexOf("@@"); + String singleEdit = actionStr.substring(0, index).replace(" ", ""); + + if (singleEdit.endsWith("SimpleName")) { + actionStr = actionStr.substring(index + 2); + if (actionStr.startsWith("MethodName")) { + singleEdit = singleEdit.replace("SimpleName", "MethodName"); + } else { + if (actionStr.startsWith("Name")) { + actionStr = actionStr.substring(5, 6); + if (!actionStr.equals(actionStr.toLowerCase())) { + singleEdit = singleEdit.replace("SimpleName", "Name"); + } else { + singleEdit = singleEdit.replace("SimpleName", "Variable"); + } + } else { + singleEdit = singleEdit.replace("SimpleName", "Variable"); + } + } + } + + editScript += singleEdit + " "; + } + actionSets.clear(); + actionSets.addAll(subSets); + } + return editScript; + } + + private void clearITree(HierarchicalActionSet actionSet) { + actionSet.getAction().setNode(null); + for (HierarchicalActionSet subActionSet : actionSet.getSubActions()) { + clearITree(subActionSet); + } + } + + public String getAstEditScripts() { + return astEditScripts; + } + + public String getPatchesSourceCode() { + return patchesSourceCode; + } + + public String getBuggyTrees() { + return buggyTrees; + } + + public String getSizes() { + return sizes; + } + + public String getTokensOfSourceCode() { + return tokensOfSourceCode; + } + + public String getOriginalTree() { + return originalTree; + } + + public String getActionSets() { + return actionSets; + } +} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java index f806db8..8ca1192 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/Parser.java @@ -8,20 +8,20 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.List; -import org.eclipse.jdt.core.dom.CompilationUnit; - -import com.github.gumtreediff.actions.model.Move; +import com.github.gumtreediff.actions.model.Action; import com.github.gumtreediff.actions.model.Update; import com.github.gumtreediff.tree.ITree; import edu.lu.uni.serval.config.Configuration; +import edu.lu.uni.serval.diffentry.DiffEntryHunk; +import edu.lu.uni.serval.diffentry.DiffEntryReader; import edu.lu.uni.serval.gumtree.GumTreeComparer; import edu.lu.uni.serval.gumtree.regroup.ActionFilter; import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; +import edu.lu.uni.serval.gumtree.regroup.HunkActionFilter; import edu.lu.uni.serval.gumtree.regroup.SimpleTree; import edu.lu.uni.serval.gumtree.regroup.SimplifyTree; -import edu.lu.uni.serval.gumtree.utils.CUCreator; -import edu.lu.uni.serval.utils.FileHelper; /** * Parse fix patterns with GumTree. @@ -31,24 +31,47 @@ import edu.lu.uni.serval.utils.FileHelper; */ public class Parser { - private String astEditScripts = ""; - private String patchesSourceCode = ""; - private String buggyTrees = ""; - private String sizes = ""; + private String astEditScripts = ""; // it will be used for fix patterns mining. + private String patchesSourceCode = ""; // testing + private String buggyTrees = ""; // Compute similarity for bug localization. + private String sizes = ""; // fix patterns' selection before mining. + private String tokensOfSourceCode = ""; // Compute similarity for bug localization. + private String originalTree = ""; // Guide of generating patches. + private String actionSets = ""; // Guide of generating patches. public void parseFixPatterns(File prevFile, File revFile, File diffEntryFile) throws FileNotFoundException, IOException { + // GumTree results - List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); - // Filter out modified actions of changing method names, method parameters, variable names and field names in declaration part. - List hierarchicalActionSets = new ActionFilter().filterOutUselessActions(gumTreeResults); - - if (hierarchicalActionSets.size() > 0) { - CUCreator cuCreator = new CUCreator(); - CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); - CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); - - String sourceCode = FileHelper.readFile(diffEntryFile); + if (gumTreeResults != null && gumTreeResults.size() > 0) { + List actionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); + + /** + * TODO What we need to discuss: + * 3. actions' nodes have the same parent belongs to one fix pattern? + * actions in the same method body. + * field, one by one, + * contains a body block. + */ + ActionFilter filter = new ActionFilter(); + // Filter out modified actions of changing method names, method parameters, variable names and field names in declaration part. + List hierarchicalActionSets = filter.filterOutUselessActions(actionSets); // TODO: variable effects range, sub-actions are these kinds of modification? + + // DiffEntry size: + List diffentryHunks = new DiffEntryReader().readHunks(diffEntryFile); // filter out big hunks. + //Filter out the modify actions, which are not in the DiffEntry hunks. + HunkActionFilter hunkFilter = new HunkActionFilter(); + hierarchicalActionSets = hunkFilter.filterActionsByDiffEntryHunk(diffentryHunks, actionSets, revFile, prevFile); + + /** + * Patch size; + * 1. one hunk is one patch。 + * 2. one statement. + */ + + // + for (HierarchicalActionSet actionSet : hierarchicalActionSets) { // position of buggy statements int startPosition = 0; @@ -106,18 +129,6 @@ public class Parser { // Get the buggy code and fixed code if (startPosition != 0 && startPosition2 != 0) { - // Line numbers of buggy statements - int startLineNum = prevUnit.getLineNumber(startPosition); - int endLineNum = prevUnit.getLineNumber(endPosition); - // Line numbers of fixed statements - int startLineNum2 = revUnit.getLineNumber(startPosition2); - int endLineNum2 = revUnit.getLineNumber(endPosition2); - - // Limit the range of buggy code and fixed code. TODO: - - actionSet.setStartLineNum(startLineNum); - actionSet.setEndLineNum(endLineNum); - /* * Convert the ITree of buggy code to a simple tree. * It will be used to compute the similarity. @@ -125,18 +136,9 @@ public class Parser { SimplifyTree abstractIdentifier = new SimplifyTree(); abstractIdentifier.abstractTree(actionSet); SimpleTree simpleTree = actionSet.getSimpleTree(); - clearITree(actionSet); if (simpleTree == null) { // Failed to get the simple tree for INS actions. continue; } - this.buggyTrees += Configuration.BUGGY_TREE_TOKEN + "\n" + simpleTree.toString() + "\n"; - - // Source Code of patches. - String patchSourceCode = getPatchSourceCode(sourceCode, startLineNum, endLineNum, startLineNum2, - endLineNum2); - if (patchSourceCode == null) continue; - patchesSourceCode += "PATCH###\n" + patchSourceCode; - patchesSourceCode += actionSet.toString() + "\n"; /** * Select edit scripts for deep learning. @@ -145,6 +147,11 @@ public class Parser { // 1. First level: AST node type. String astEditScripts = getASTEditScripts(actionSet); int size = astEditScripts.split(" ").length; + if (size == 1) { + System.out.println(actionSet); + System.out.println(revFile.getPath()); +// continue; + } this.sizes += size + "\n"; this.astEditScripts += astEditScripts + "\n"; // 2. source code: raw tokens @@ -153,11 +160,69 @@ public class Parser { String abstractIdentifiersEditScripts = getAbstractIdentifiersEditScripts(actionSet); // 4. semi-source code: String semiSourceCodeEditScripts = getSemiSourceCodeEditScripts(actionSet); + + + this.buggyTrees += Configuration.BUGGY_TREE_TOKEN + "\n" + simpleTree.toString() + "\n"; + this.tokensOfSourceCode += getTokensDeepFirst(simpleTree).trim() + "\n"; + this.actionSets += Configuration.BUGGY_TREE_TOKEN + "\n" + readActionSet(actionSet, "") + "\n"; + this.originalTree += Configuration.BUGGY_TREE_TOKEN + "\n" + actionSet.getOriginalTree().toString() + "\n"; + +// // Source Code of patches. +// String patchSourceCode = getPatchSourceCode(sourceCode, startLineNum, endLineNum, startLineNum2, +// endLineNum2); +// if (patchSourceCode == null) continue; +// patchesSourceCode += "PATCH###\n" + patchSourceCode; +// patchesSourceCode += actionSet.toString() + "\n"; } } } } + private String readActionSet(HierarchicalActionSet actionSet, String line) { + String str = line + actionSet.getActionString() + "\n"; + List subActions = actionSet.getSubActions(); + for (HierarchicalActionSet subAction : subActions) { + str += readActionSet(subAction, line + "---"); + } + return str; + } + + private String getTokensDeepFirst(SimpleTree simpleTree) { + String tokens = ""; + List children = simpleTree.getChildren(); + String astNodeType = simpleTree.getNodeType(); + if ("AssertStatement".equals(astNodeType) || "DoStatement".equals(astNodeType) + || "ForStatement".equals(astNodeType) || "IfStatement".equals(astNodeType) + || "ReturnStatement".equals(astNodeType) || "SwitchStatement".equals(astNodeType) + || "SynchronizedStatement".equals(astNodeType) || "ThrowStatement".equals(astNodeType) + || "TryStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType)) { + String label = simpleTree.getLabel(); + label = label.substring(0, label.indexOf("S")).toLowerCase(); + tokens += label + " "; + } else if ("EnhancedForStatement".equals(astNodeType)) { + tokens += "for "; + } else if ("CatchClause".equals(astNodeType)) { + tokens += "catch "; + } else if ("SwitchCase".equals(astNodeType)) { + tokens += "case "; + } else if ("SuperConstructorInvocation".equals(astNodeType)) { + tokens += "super "; + } else if ("ConstructorInvocation".equals(astNodeType)) { + tokens += "this "; + } else if ("FinallyBody".equals(astNodeType)) { + tokens += "finally "; + } + + if (children.isEmpty()) { + tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " "; + } else { + for (SimpleTree child : children) { + tokens += getTokensDeepFirst(child); + } + } + return tokens; + } + private String getSemiSourceCodeEditScripts(HierarchicalActionSet actionSet) { // TODO Auto-generated method stub return null; @@ -184,49 +249,6 @@ public class Parser { return endPosition; } - private List getFirstAndLastMoveAction(HierarchicalActionSet gumTreeResult) { - List firstAndLastMoveActions = new ArrayList<>(); - List actions = gumTreeResult.getSubActions(); - if (actions.size() == 0) { - return null; - } - Move firstMoveAction = null; - Move lastMoveAction = null; - while (actions.size() > 0) { - List subActions = new ArrayList<>(); - for (HierarchicalActionSet action : actions) { - subActions.addAll(action.getSubActions()); - if (action.toString().startsWith("MOV")) { - if (firstMoveAction == null) { - firstMoveAction = (Move) action.getAction(); - lastMoveAction = (Move) action.getAction(); - } else { - int startPosition = action.getStartPosition(); - int length = action.getLength(); - int startPositionFirst = firstMoveAction.getPosition(); - int startPositionLast = lastMoveAction.getPosition(); - int lengthLast = lastMoveAction.getNode().getLength(); - if (startPosition < startPositionFirst || (startPosition == startPositionFirst && length > firstMoveAction.getLength())) { - firstMoveAction = (Move) action.getAction(); - } - if ((startPosition + length) > (startPositionLast + lengthLast)) { - lastMoveAction = (Move) action.getAction(); - } - } - } - } - - actions.clear(); - actions.addAll(subActions); - } - if (firstMoveAction == null) { - return null; - } - firstAndLastMoveActions.add(firstMoveAction); - firstAndLastMoveActions.add(lastMoveAction); - return firstAndLastMoveActions; - } - private String getPatchSourceCode(String sourceCode, int startLineNum, int endLineNum, int startLineNum2, int endLineNum2) { String buggyStatements = ""; String fixedStatements = ""; @@ -243,6 +265,7 @@ public class Parser { int counter3 = 0; // counter of non-buggy code line. while ((line = reader.readLine()) != null) { if (startLine == 0 && line.startsWith("@@ -")) { + // RegExp.filterSignal(line) int plusIndex = line.indexOf("+"); String lineNum = line.substring(4, plusIndex); String[] nums = lineNum.split(","); @@ -370,4 +393,16 @@ public class Parser { public String getSizes() { return sizes; } + + public String getTokensOfSourceCode() { + return tokensOfSourceCode; + } + + public String getOriginalTree() { + return originalTree; + } + + public String getActionSets() { + return actionSets; + } } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/SingleStatementParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/SingleStatementParser.java new file mode 100644 index 0000000..e1ee958 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/SingleStatementParser.java @@ -0,0 +1,430 @@ +package edu.lu.uni.serval.FixPatternParser; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jdt.core.dom.CompilationUnit; + +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.actions.model.Move; +import com.github.gumtreediff.actions.model.Update; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.config.Configuration; +import edu.lu.uni.serval.diffentry.DiffEntryHunk; +import edu.lu.uni.serval.diffentry.DiffEntryReader; +import edu.lu.uni.serval.gumtree.GumTreeComparer; +import edu.lu.uni.serval.gumtree.regroup.ActionFilter; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; +import edu.lu.uni.serval.gumtree.regroup.SimpleTree; +import edu.lu.uni.serval.gumtree.regroup.SimplifyTree; + +/** + * Parse fix patterns with GumTree. + * + * @author kui.liu + * + */ +public class SingleStatementParser { + + private String astEditScripts = ""; // it will be used for fix patterns mining. + private String patchesSourceCode = ""; // testing + private String buggyTrees = ""; // Compute similarity for bug localization. + private String sizes = ""; // fix patterns' selection before mining. + private String tokensOfSourceCode = ""; // Compute similarity for bug localization. + private String originalTree = ""; // Guide of generating patches. + private String actionSets = ""; // Guide of generating patches. + + public void parseFixPatterns(File prevFile, File revFile, File diffEntryFile) throws FileNotFoundException, IOException { + // GumTree results + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); + + if (gumTreeResults != null && gumTreeResults.size() > 0) { + List allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); + // Filter out modified actions of changing method names, method parameters, variable names and field names in declaration part. + // TODO: variable effects range, sub-actions are these kinds of modification? + List actionSets = new ActionFilter().filterOutUselessActions(allActionSets); + + if (actionSets.size() > 0) { + // DiffEntry Hunks: filter out big hunks. + List diffentryHunks = new DiffEntryReader().readHunks(diffEntryFile); + for (HierarchicalActionSet actionSet : actionSets) { + // position of buggy statements + int startPosition = 0; + int endPosition = 0; + // position of fixed statements + int startPosition2 = 0; + int endPosition2 = 0; + + String actionStr = actionSet.getActionString(); + String astNodeType = actionSet.getAstNodeType(); + if (actionStr.startsWith("INS")) { + startPosition2 = actionSet.getStartPosition(); + endPosition2 = startPosition2 + actionSet.getLength(); + List firstAndLastMov = getFirstAndLastMoveAction(actionSet); + if (firstAndLastMov != null) { + startPosition = firstAndLastMov.get(0).getNode().getPos(); + ITree lastTree = firstAndLastMov.get(1).getNode(); + endPosition = lastTree.getPos() + lastTree.getLength(); + } else { // Ignore the pure insert actions without any move actions. + continue; + } + } else if (actionStr.startsWith("UPD")) { + startPosition = actionSet.getStartPosition(); + endPosition = startPosition + actionSet.getLength(); + Update update = (Update) actionSet.getAction(); + ITree newNode = update.getNewNode(); + startPosition2 = newNode.getPos(); + endPosition2 = startPosition2 + newNode.getLength(); + + if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) + || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) + || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) + || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { + List children = update.getNode().getChildren(); + endPosition = getEndPosition(children); + List newChildren = newNode.getChildren(); + endPosition2 = getEndPosition(newChildren); + + if (endPosition == 0) { + endPosition = startPosition + actionSet.getLength(); + } + if (endPosition2 == 0) { + endPosition2 = startPosition2 + newNode.getLength(); + } + } + } else {// DEL actions and MOV actions: we don't need these actions, as for now. + continue; + } + if (startPosition == 0 || startPosition2 == 0) { + continue; + } + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + continue; + } + + // Get line numbers. + int startLine = prevUnit.getLineNumber(startPosition); + int endLine = prevUnit.getLineNumber(endPosition); + int startLine2 = revUnit.getLineNumber(startPosition2); + int endLine2 = revUnit.getLineNumber(endPosition2); + //Filter out the modify actions, which are not in the DiffEntry hunks. + DiffEntryHunk hunk = matchHunk(startLine, endLine, startLine2, endLine2, actionStr, diffentryHunks); + if (hunk == null) { + continue; + } + + /* + * Convert the ITree of buggy code to a simple tree. + * It will be used to compute the similarity. + */ + SimplifyTree abstractIdentifier = new SimplifyTree(); + abstractIdentifier.abstractTree(actionSet); + SimpleTree simpleTree = actionSet.getSimpleTree(); + if (simpleTree == null) { // Failed to get the simple tree for INS actions. + continue; + } + + /** + * Select edit scripts for deep learning. + * Edit scripts will be used to mine common fix patterns. + */ + // 1. First level: AST node type. + String astEditScripts = getASTEditScripts(actionSet); + int size = astEditScripts.split(" ").length; + if (size == 1) { + System.out.println(actionSet); + continue; + } + this.sizes += size + "\n"; + this.astEditScripts += astEditScripts + "\n"; + // 2. source code: raw tokens + String rawTokenEditScripts = getRawTokenEditScripts(actionSet); + // 3. abstract identifiers: + String abstractIdentifiersEditScripts = getAbstractIdentifiersEditScripts(actionSet); + // 4. semi-source code: + String semiSourceCodeEditScripts = getSemiSourceCodeEditScripts(actionSet); + + + this.buggyTrees += Configuration.BUGGY_TREE_TOKEN + "\n" + simpleTree.toString() + "\n"; + this.tokensOfSourceCode += getTokensDeepFirst(simpleTree).trim() + "\n"; + this.actionSets += Configuration.BUGGY_TREE_TOKEN + "\n" + readActionSet(actionSet, "") + "\n"; + this.originalTree += Configuration.BUGGY_TREE_TOKEN + "\n" + actionSet.getOriginalTree().toString() + "\n"; + + // Source Code of patches. + String patchSourceCode = getPatchSourceCode(hunk, startLine, endLine, startLine2, endLine2); + patchesSourceCode += Configuration.PATCH_TOKEN +"\n" + patchSourceCode + "\n"; + } + } + } + } + + private DiffEntryHunk matchHunk(int startLine, int endLine, int startLine2, int endLine2, String actionStr, List hunks) { + for (DiffEntryHunk hunk : hunks) { + int bugStartLine = hunk.getBugLineStartNum(); + int bugRange = hunk.getBugRange(); + int fixStartLine = hunk.getFixLineStartNum(); + int fixRange = hunk.getFixRange(); + + if (actionStr.startsWith("INS")) { + if (fixStartLine + fixRange < startLine2) { + continue; + } + if (endLine2 < fixStartLine ) { + return null; + } + return hunk; + } else { + if (bugStartLine + bugRange < startLine) { + continue; + } + if (endLine < bugStartLine ) { + return null; + } + return hunk; + } + } + return null; + } + + private List getFirstAndLastMoveAction(HierarchicalActionSet gumTreeResult) { + List firstAndLastMoveActions = new ArrayList<>(); + List actions = gumTreeResult.getSubActions(); + if (actions.size() == 0) { + return null; + } + Move firstMoveAction = null; + Move lastMoveAction = null; + while (actions.size() > 0) { + List subActions = new ArrayList<>(); + for (HierarchicalActionSet action : actions) { + subActions.addAll(action.getSubActions()); + if (action.toString().startsWith("MOV")) { + if (firstMoveAction == null) { + firstMoveAction = (Move) action.getAction(); + lastMoveAction = (Move) action.getAction(); + } else { + int startPosition = action.getStartPosition(); + int length = action.getLength(); + int startPositionFirst = firstMoveAction.getPosition(); + int startPositionLast = lastMoveAction.getPosition(); + int lengthLast = lastMoveAction.getNode().getLength(); + if (startPosition < startPositionFirst || (startPosition == startPositionFirst && length > firstMoveAction.getLength())) { + firstMoveAction = (Move) action.getAction(); + } + if ((startPosition + length) > (startPositionLast + lengthLast)) { + lastMoveAction = (Move) action.getAction(); + } + } + } + } + + actions.clear(); + actions.addAll(subActions); + } + if (firstMoveAction == null) { + return null; + } + firstAndLastMoveActions.add(firstMoveAction); + firstAndLastMoveActions.add(lastMoveAction); + return firstAndLastMoveActions; + } + + private String readActionSet(HierarchicalActionSet actionSet, String line) { + String str = line + actionSet.getActionString() + "\n"; + List subActions = actionSet.getSubActions(); + for (HierarchicalActionSet subAction : subActions) { + str += readActionSet(subAction, line + "---"); + } + return str; + } + + private String getTokensDeepFirst(SimpleTree simpleTree) { + String tokens = ""; + List children = simpleTree.getChildren(); + String astNodeType = simpleTree.getNodeType(); + if ("AssertStatement".equals(astNodeType) || "DoStatement".equals(astNodeType) + || "ForStatement".equals(astNodeType) || "IfStatement".equals(astNodeType) + || "ReturnStatement".equals(astNodeType) || "SwitchStatement".equals(astNodeType) + || "SynchronizedStatement".equals(astNodeType) || "ThrowStatement".equals(astNodeType) + || "TryStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType)) { + String label = simpleTree.getLabel(); + label = label.substring(0, label.indexOf("S")).toLowerCase(); + tokens += label + " "; + } else if ("EnhancedForStatement".equals(astNodeType)) { + tokens += "for "; + } else if ("CatchClause".equals(astNodeType)) { + tokens += "catch "; + } else if ("SwitchCase".equals(astNodeType)) { + tokens += "case "; + } else if ("SuperConstructorInvocation".equals(astNodeType)) { + tokens += "super "; + } else if ("ConstructorInvocation".equals(astNodeType)) { + tokens += "this "; + } else if ("FinallyBody".equals(astNodeType)) { + tokens += "finally "; + } + + if (children.isEmpty()) { + tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " "; + } else { + for (SimpleTree child : children) { + tokens += getTokensDeepFirst(child); + } + } + return tokens; + } + + private String getSemiSourceCodeEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private String getAbstractIdentifiersEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private String getRawTokenEditScripts(HierarchicalActionSet actionSet) { + // TODO Auto-generated method stub + return null; + } + + private int getEndPosition(List children) { + int endPosition = 0; + for (ITree child : children) { + if (child.getLabel().endsWith("Body")) { + endPosition = child.getPos() - 1; + break; + } + } + return endPosition; + } + + private String getPatchSourceCode(DiffEntryHunk hunk, int startLineNum, int endLineNum, int startLineNum2, int endLineNum2) { + String sourceCode = hunk.getHunk(); + int bugStartLine = hunk.getBugLineStartNum(); + int fixStartLine = hunk.getFixLineStartNum(); + String buggyStatements = ""; + String fixedStatements = ""; + BufferedReader reader = null; + try { + reader = new BufferedReader(new StringReader(sourceCode)); + String line = null; + int bugLines = 0; + int fixLines = 0; + int contextLines = 0; // counter of non-buggy code line. + while ((line = reader.readLine()) != null) { + int bugLineIndex = bugLines + contextLines; + int fixLineIndex = fixLines + contextLines; + if (line.startsWith("-")) { + if (bugStartLine + bugLineIndex >= startLineNum && bugStartLine + bugLineIndex <= endLineNum) { + buggyStatements += line + "\n"; + } + bugLines ++; + } else if (line.startsWith("+")) { + if (fixStartLine + fixLineIndex >= startLineNum2 && fixStartLine + fixLineIndex <= endLineNum2) { + fixedStatements += line + "\n"; + } + } else { + contextLines ++; + } + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + if (reader != null) { + reader.close(); + reader = null; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + return buggyStatements + fixedStatements; + } + + /** + * Get the AST node based edit script of patches in terms of breadth first. + * + * @param actionSet + * @return + */ + private String getASTEditScripts(HierarchicalActionSet actionSet) { + String editScript = ""; + + List actionSets = new ArrayList<>(); + actionSets.add(actionSet); + while (actionSets.size() != 0) { + List subSets = new ArrayList<>(); + for (HierarchicalActionSet set : actionSets) { + subSets.addAll(set.getSubActions()); + String actionStr = set.getActionString(); + int index = actionStr.indexOf("@@"); + String singleEdit = actionStr.substring(0, index).replace(" ", ""); + + if (singleEdit.endsWith("SimpleName")) { + actionStr = actionStr.substring(index + 2); + if (actionStr.startsWith("MethodName")) { + singleEdit = singleEdit.replace("SimpleName", "MethodName"); + } else { + if (actionStr.startsWith("Name")) { + actionStr = actionStr.substring(5, 6); + if (!actionStr.equals(actionStr.toLowerCase())) { + singleEdit = singleEdit.replace("SimpleName", "Name"); + } else { + singleEdit = singleEdit.replace("SimpleName", "Variable"); + } + } else { + singleEdit = singleEdit.replace("SimpleName", "Variable"); + } + } + } + + editScript += singleEdit + " "; + } + actionSets.clear(); + actionSets.addAll(subSets); + } + return editScript; + } + + public String getAstEditScripts() { + return astEditScripts; + } + + public String getPatchesSourceCode() { + return patchesSourceCode; + } + + public String getBuggyTrees() { + return buggyTrees; + } + + public String getSizes() { + return sizes; + } + + public String getTokensOfSourceCode() { + return tokensOfSourceCode; + } + + public String getOriginalTree() { + return originalTree; + } + + public String getActionSets() { + return actionSets; + } +} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/TestParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/TestParser.java index 0ffa8c4..3e1885d 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/TestParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/TestParser.java @@ -20,8 +20,12 @@ public class TestParser { FileHelper.deleteDirectory("OUTPUT/GumTreeResults_Exp_ASTNode/"); FileHelper.deleteDirectory("OUTPUT/GumTreeResults_Exp_RawCode/"); - StringBuilder astEditScriptsBuilder = new StringBuilder(); - StringBuilder sourceCodeBuilder = new StringBuilder(); + StringBuilder astEditScripts = new StringBuilder(); + StringBuilder originalTrees = new StringBuilder(); + StringBuilder buggyTrees = new StringBuilder(); + StringBuilder actionSets = new StringBuilder(); + StringBuilder tokens = new StringBuilder(); + StringBuilder sizes = new StringBuilder(); for (File file : files) { String projectFolder = file.getPath(); @@ -36,8 +40,13 @@ public class TestParser { Parser parser = new Parser(); try { parser.parseFixPatterns(prevFile, revFile, diffentryFile); - astEditScriptsBuilder.append(parser.getAstEditScripts()); - sourceCodeBuilder.append(parser.getPatchesSourceCode()); + + astEditScripts.append(parser.getAstEditScripts()); + originalTrees.append(parser.getOriginalTree()); + buggyTrees.append(parser.getBuggyTrees()); + actionSets.append(parser.getActionSets()); + tokens.append(parser.getTokensOfSourceCode()); + sizes.append(parser.getSizes()); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { @@ -46,7 +55,11 @@ public class TestParser { } } } - FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/EditScripts.list", astEditScriptsBuilder, false); - FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/Patches.list", sourceCodeBuilder, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/EditScripts.list", astEditScripts, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/OriginalTrees.list", originalTrees, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/BuggyTrees.list", buggyTrees, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/ActionSets.list", actionSets, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/Tokens.list", tokens, false); + FileHelper.outputToFile("OUTPUT/GumTreeResults_Exp/Sizes.list", sizes, false); } } diff --git a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java index a1c3e3e..bb7e483 100644 --- a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java +++ b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java @@ -10,6 +10,7 @@ import akka.actor.Props; import akka.actor.UntypedActor; import akka.japi.Creator; import edu.lu.uni.serval.FixPatternParser.Parser; +import edu.lu.uni.serval.FixPatternParser.SingleStatementParser; import edu.lu.uni.serval.utils.FileHelper; public class ParseFixPatternWorker extends UntypedActor { @@ -56,13 +57,13 @@ public class ParseFixPatternWorker extends UntypedActor { File revFile = msgFile.getRevFile(); File prevFile = msgFile.getPrevFile(); File diffentryFile = msgFile.getDiffEntryFile(); - Parser miner = new Parser(); + SingleStatementParser parser = new SingleStatementParser(); log.info("Start to parse file: " + revFile.getPath()); - miner.parseFixPatterns(prevFile, revFile, diffentryFile); - editScripts.append(miner.getAstEditScripts()); - patchesSourceCode.append(miner.getPatchesSourceCode()); - sizes.append(miner.getSizes()); - buggyTrees.append(miner.getBuggyTrees()); + parser.parseFixPatterns(prevFile, revFile, diffentryFile); + editScripts.append(parser.getAstEditScripts()); + patchesSourceCode.append(parser.getPatchesSourceCode()); + sizes.append(parser.getSizes()); + buggyTrees.append(parser.getBuggyTrees()); log.info("Finish of parsing file: " + revFile.getPath()); counter ++; if (counter % 1000 == 0) { diff --git a/src/main/java/edu/lu/uni/serval/config/Configuration.java b/src/main/java/edu/lu/uni/serval/config/Configuration.java index f0a8b24..f773b96 100644 --- a/src/main/java/edu/lu/uni/serval/config/Configuration.java +++ b/src/main/java/edu/lu/uni/serval/config/Configuration.java @@ -3,6 +3,7 @@ package edu.lu.uni.serval.config; public class Configuration { private static final String ROOT_PATH = "../"; + public static final int HUNK_SIZE = 7; public static final String BUGGY_TREE_TOKEN = "BUGGY_TREE###"; public static final String PATCH_TOKEN = "PATCH###"; diff --git a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java new file mode 100644 index 0000000..f0c85c2 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java @@ -0,0 +1,43 @@ +package edu.lu.uni.serval.diffentry; + +public class DiffEntryHunk { + + private int bugLineStartNum; + private int fixLineStartNum; + private int bugRange; + private int fixRange; + private String hunk; + + public DiffEntryHunk(int bugLineStartNum, int fixLineStartNum, int bugRange, int fixRange) { + super(); + this.bugLineStartNum = bugLineStartNum; + this.fixLineStartNum = fixLineStartNum; + this.bugRange = bugRange; + this.fixRange = fixRange; + } + + public int getBugLineStartNum() { + return bugLineStartNum; + } + + public int getFixLineStartNum() { + return fixLineStartNum; + } + + public int getBugRange() { + return bugRange; + } + + public int getFixRange() { + return fixRange; + } + + public String getHunk() { + return hunk; + } + + public void setHunk(String hunk) { + this.hunk = hunk; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java new file mode 100644 index 0000000..6b90778 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryReader.java @@ -0,0 +1,80 @@ +package edu.lu.uni.serval.diffentry; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; + +import edu.lu.uni.serval.utils.FileHelper; + +public class DiffEntryReader { + + public List readHunks(File diffentryFile) { + List diffentryHunks = new ArrayList<>(); + String content = FileHelper.readFile(diffentryFile); + BufferedReader reader = null; + try { + reader = new BufferedReader(new StringReader(content)); + String line = null; + int startLine = 0; + int range = 0; + int startLine2 = 0; + int range2 = 0; + StringBuilder hunk = new StringBuilder(); + + while ((line = reader.readLine()) != null) { + if (RegExp.filterSignal(line.trim())) { + if (hunk.length() > 0) { + if ((range < 7 && range2 < 7) || range == 0 || range2 == 0) { // filter out big hunks + DiffEntryHunk diffEntryHunk = new DiffEntryHunk(startLine, startLine2, range, range2); + diffEntryHunk.setHunk(hunk.toString()); + diffentryHunks.add(diffEntryHunk); + } + hunk.setLength(0); + } + int plusIndex = line.indexOf("+"); + String lineNum = line.substring(4, plusIndex); + String[] nums = lineNum.split(","); + startLine = Integer.parseInt(nums[0].trim()); + if (nums.length == 2) { + range = Integer.parseInt(nums[1].trim()); + } + + String lineNum2 = line.substring(plusIndex) .trim(); + lineNum2 = lineNum2.substring(1, lineNum2.length() - 2); + String[] nums2 = lineNum2.split(","); + startLine2 = Integer.parseInt(nums2[0].trim()); + if (nums2.length != 2) { + range2 = Integer.parseInt(nums2[1].trim()); + } + continue; + } + hunk.append(line + "\n"); + } + + if (range < 7 && range2 < 7) { // filter out big hunks + DiffEntryHunk diffEntryHunk = new DiffEntryHunk(startLine, startLine2, range, range2); + diffEntryHunk.setHunk(hunk.toString()); + diffentryHunks.add(diffEntryHunk); + } + hunk.setLength(0); + + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + if (reader != null) { + reader.close(); + reader = null; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + return diffentryHunks; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java b/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java new file mode 100644 index 0000000..ebcf9a8 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/diffentry/RegExp.java @@ -0,0 +1,20 @@ +package edu.lu.uni.serval.diffentry; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class RegExp { + private static final String REGULAR_EXPRESSION = "^@@\\s\\-\\d+,*\\d*\\s\\+\\d+,*\\d*\\s@@$"; //@@ -21,0 +22,2 @@ + private static Pattern pattern = Pattern.compile(REGULAR_EXPRESSION); + + public static boolean filterSignal(String string) { + boolean flag = false; + + Matcher res = pattern.matcher(string); + if (res.matches()) { + flag = true; + } + + return flag; + } +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/ActionFilter.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/ActionFilter.java new file mode 100644 index 0000000..415e991 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/ActionFilter.java @@ -0,0 +1,250 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.List; + +public class ActionFilter { + + private List methodNames = new ArrayList<>(); + private List variableNames = new ArrayList<>(); + + /** + * Filter out the modify actions of changing method names, method parameters, variable names and field names in declaration part. + * + * @param actionSets + * @return + */ + public List filterOutUselessActions(List actionSets) { + // Filter out modifications of variable names and method names. + List uselessActions = findoutUselessActions(actionSets); + actionSets.removeAll(uselessActions); + uselessActions.clear(); + + // Filter out non-UPD modifications, and modifications of variable names and method names. + uselessActions = findoutUselessActionSets(actionSets, true); + actionSets.removeAll(uselessActions); + return actionSets; + } + + private List findoutUselessActionSets(List actionSets, boolean isRoot) { + List uselessActions = new ArrayList<>(); + + FindActionSet: { + for (HierarchicalActionSet actionSet : actionSets) { + if (!isRoot) { + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("UPD MethodInvocation") || actionStr.startsWith("INS MethodInvocation") || actionStr.startsWith("DEL MethodInvocation")) { + String label = actionSet.getAction().getNode().getLabel(); + for (String methodName : methodNames) { + if (actionSet.getActionString().startsWith("UPD MethodInvocation@@" + methodName + "(") + || actionSet.getActionString().startsWith("INS MethodInvocation@@" + methodName + "(") + || actionSet.getActionString().startsWith("DEL MethodInvocation@@" + methodName + "(") + || label.contains("." + methodName + "(")) { + addToUselessActions(actionSet, uselessActions); + break FindActionSet; + } + } + } else if (actionStr.startsWith("UPD SimpleName") || actionStr.startsWith("INS SimpleName") || actionStr.startsWith("DEL SimpleName")) { + String label = actionSet.getAction().getNode().getLabel(); + for (String variableName : variableNames) { + if (label.equals(variableName) || label.equals("Name:" + variableName)) { + addToUselessActions(actionSet, uselessActions); + break FindActionSet; + } + } + } else if (actionStr.startsWith("INS StringLiteral") || actionStr.startsWith("DEL StringLiteral") || actionStr.startsWith("MOV StringLiteral")) { + addToUselessActions(actionSet, uselessActions); + break FindActionSet; + } + + List uselessActionSets = findoutUselessActionSets(actionSet.getSubActions(), false); + if (uselessActionSets.size() > 0) { + uselessActions.addAll(uselessActionSets); + break; + } + } else { + if (!actionSet.getAstNodeType().endsWith("Statement") || !"FieldDeclaration".equals(actionSet.getAstNodeType())) { + uselessActions.add(actionSet); + } else { + uselessActions.addAll(findoutUselessActionSets(actionSet.getSubActions(), false)); + } + } + } + } + + return uselessActions; + } + + private void addToUselessActions(HierarchicalActionSet actionSet, List uselessActions) { + while (actionSet.getParent() != null) { + actionSet = actionSet.getParent(); + } + if (!uselessActions.contains(actionSet)) { + uselessActions.add(actionSet); + } + } + + /** + * Identify the the modify actions of changing method names, method parameters, variable names and field names in declaration part. + * + * @param actionSets + * @return + */ + private List findoutUselessActions(List actionSets) { + List uselessActions = new ArrayList<>(); + + for (HierarchicalActionSet actionSet : actionSets) { + String actionType = actionSet.getAstNodeType(); + if (actionType.equals("MethodDeclaration")) { + addToUselessActions(actionSet, uselessActions);// INS, DEL: useful?, UPD, except the modifier actions + if (!actionSet.getActionString().startsWith("MOV ")) { + String label = actionSet.getNode().getLabel(); + String methodName = label.substring(label.indexOf("MethodName:")); + methodName = methodName.substring(11, methodName.indexOf(",")); + methodNames.add(methodName); // "MethodName:***" + + // UPD, DEL, INS parameters. + List subActionSets = actionSet.getSubActions(); + for (HierarchicalActionSet subActionSet : subActionSets) { + if (subActionSet.getAstNodeType().equals("SingleVariableDeclaration")) { + List subActionSets2 = subActionSet.getSubActions(); // + if (subActionSets2.size() == 0) { + String actSetStr = subActionSet.getActionString(); + int index1 = actSetStr.indexOf("@@"); + int index2 = 0; + if (actSetStr.startsWith("DEL")) { + index2 = actSetStr.indexOf("@AT@"); + } else { + index2 = actSetStr.indexOf("@TO@");; + } + actSetStr = actSetStr.substring(index1, index2).trim(); + String variableName = actSetStr.substring(actSetStr.lastIndexOf(" ")); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + } else { + HierarchicalActionSet actSet = subActionSets2.get(subActionSets2.size() - 1); + String actStr = actSet.getActionString(); + if (actStr.startsWith("UPD SimpleName") || actStr.startsWith("INS SimpleName") || actStr.startsWith("DEL SimpleName")) { + String variableName = actSet.getNode().getLabel(); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + } + } + } + } + } + } else if (actionType.equals("FieldDeclaration") || actionType.equals("VariableDeclarationStatement")) { + // UPD VariableDeclarationFragment + if (!actionSet.getActionString().startsWith("MOV ")) { + List subActionSets = actionSet.getSubActions(); + if (subActionSets.size() > 0) { + for (HierarchicalActionSet subActionSet : subActionSets) { // VariableDeclarationFragments + if (identifyUpdateVDF(subActionSet)) { + addToUselessActions(actionSet, uselessActions); + } + } + } + } + } else if (actionType.equals("TryStatement")) { + if (actionSet.getActionString().startsWith("UPD ")) { + List subActionSets = actionSet.getSubActions(); + if (subActionSets.size() > 0) { + for (HierarchicalActionSet subActionSet : subActionSets) { + if (subActionSet.getActionString().startsWith("UPD VariableDeclarationExpression")) { + List subActionSets2 = subActionSet.getSubActions(); // VariableDeclarationFragments + for (HierarchicalActionSet subActionSet2 : subActionSets2) { + if (identifyUpdateVDF(subActionSet2)) { + addToUselessActions(actionSet, uselessActions); + } + } + } else { + break; + } + } + } + } + } else if (actionType.equals("EnhancedForStatement")) { // SingleVariableDeclaration + if (!actionSet.getActionString().startsWith("MOV ")) { + List subActionSets = actionSet.getSubActions(); + if (subActionSets.size() > 0) { + HierarchicalActionSet subActionSet = subActionSets.get(0); + if (subActionSet.getActionString().startsWith("UPD SingleVariableDeclaration")) { + List subActionSets2 = subActionSet.getSubActions(); + for (HierarchicalActionSet subActionSet2 : subActionSets2) { // Type or Identifier + if (subActionSet2.getActionString().startsWith("UPD SimpleName")) { + String variableName = subActionSet2.getNode().getLabel(); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + addToUselessActions(actionSet, uselessActions); + } + } + } + } + } + } else if (actionType.equals("SingleVariableDeclaration")) { + if (!actionSet.getActionString().startsWith("MOV ")) { + List subActionSets2 = actionSet.getSubActions(); // + if (subActionSets2.size() == 0) { + String actSetStr = actionSet.getActionString(); + int index1 = actSetStr.indexOf("@@"); + int index2 = 0; + if (actSetStr.startsWith("DEL")) { + index2 = actSetStr.indexOf("@AT@"); + } else { + index2 = actSetStr.indexOf("@TO@");; + } + actSetStr = actSetStr.substring(index1, index2).trim(); + String variableName = actSetStr.substring(actSetStr.lastIndexOf(" ")); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + addToUselessActions(actionSet, uselessActions); + } else { + HierarchicalActionSet actSet = subActionSets2.get(subActionSets2.size() - 1); + String actStr = actSet.getActionString(); + if (actStr.startsWith("UPD SimpleName") || actStr.startsWith("INS SimpleName") || actStr.startsWith("DEL SimpleName")) { + String variableName = actSet.getNode().getLabel(); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + addToUselessActions(actionSet, uselessActions); + } + } + } + } else { + if (actionSet.getParent() != null) { + while (actionSet.getParent() != null) { + actionSet = actionSet.getParent(); + } + if (uselessActions.contains(actionSet)) { + return uselessActions; + } else { + uselessActions.addAll(findoutUselessActions(actionSet.getSubActions())); + } + } + } + } + return uselessActions; + } + + /** + * Identify the AST node of this ActionSet is VariableDeclarationFragment or not. + * And, whether the action is happened on the Variable name or not. + * + * @param actionSet + */ + private boolean identifyUpdateVDF(HierarchicalActionSet actionSet) { + String actStr = actionSet.getActionString(); + if (actStr.startsWith("UPD VariableDeclarationFragment") + || actStr.startsWith("INS VariableDeclarationFragment") + || actStr.startsWith("DEL VariableDeclarationFragment")) { + List subActionSets = actionSet.getSubActions(); + if (subActionSets == null || subActionSets.size() == 0) { + // modification of Dimension + return true; + } + HierarchicalActionSet actSet = subActionSets.get(0); + String actSetStr = actSet.getActionString(); + if (actSetStr.startsWith("UPD SimpleName") || actSetStr.startsWith("INS SimpleName") || actSetStr.startsWith("DEL SimpleName")) { + String variableName = actSet.getNode().getLabel(); + variableNames.add(variableName); // "SimpleName:" + variableName TODO: effect range + return true; + } + } + return false; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java new file mode 100644 index 0000000..11b5ff9 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java @@ -0,0 +1,227 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.tree.ITree; + +/** + * Hierarchical-level results of GumTree results + * + * @author kui.liu + * + */ +public class HierarchicalActionSet implements Comparable { + + private String astNodeType; + private Action action; + private Action parentAction; + private String actionString; + private int startPosition; + private int length; + private int bugStartLineNum; + private int bugEndLineNum; + private int fixStartLineNum; + private int fixEndLineNum; + private HierarchicalActionSet parent = null; + private List subActions = new ArrayList<>(); + + private ITree node; + private SimpleTree abstractSimpleTree = null; // semi-source code tree. and AST node type tree + private SimpleTree abstractIdentifierTree = null; // abstract identifier tree + private SimpleTree simpleTree = null; // source code tree and AST node type tree + private SimpleTree originalTree = null; // source code tree. + + public ITree getNode() { + return node; + } + + public void setNode(ITree node) { + this.node = node; + } + + public String getAstNodeType() { + return astNodeType; + } + + public Action getAction() { + return action; + } + + public void setAction(Action action) { + this.action = action; + } + + public Action getParentAction() { + return parentAction; + } + + public void setParentAction(Action parentAction) { + this.parentAction = parentAction; + } + + public String getActionString() { + return actionString; + } + + public void setActionString(String actionString) { + this.actionString = actionString; + + int atIndex = actionString.indexOf("@AT@") + 4; + int lengthIndex = actionString.indexOf("@LENGTH@"); + if (lengthIndex == -1) { + this.startPosition = Integer.parseInt(actionString.substring(atIndex).trim()); + this.length = 0; + } else { + this.startPosition = Integer.parseInt(actionString.substring(atIndex, lengthIndex).trim()); + this.length = Integer.parseInt(actionString.substring(lengthIndex + 8).trim()); + } + + String nodeType = actionString.substring(0, actionString.indexOf("@@")); + nodeType = nodeType.substring(nodeType.indexOf(" ") + 1); + this.astNodeType = nodeType; + } + + public int getStartPosition() { + return startPosition; + } + + public int getLength() { + return length; + } + + public int getBugStartLineNum() { + return bugStartLineNum; + } + + public void setBugStartLineNum(int bugStartLineNum) { + this.bugStartLineNum = bugStartLineNum; + } + + public int getBugEndLineNum() { + return bugEndLineNum; + } + + public void setBugEndLineNum(int bugEndLineNum) { + this.bugEndLineNum = bugEndLineNum; + } + + public int getFixStartLineNum() { + return fixStartLineNum; + } + + public void setFixStartLineNum(int fixStartLineNum) { + this.fixStartLineNum = fixStartLineNum; + } + + public int getFixEndLineNum() { + return fixEndLineNum; + } + + public void setFixEndLineNum(int fixEndLineNum) { + this.fixEndLineNum = fixEndLineNum; + } + + public HierarchicalActionSet getParent() { + return parent; + } + + public void setParent(HierarchicalActionSet parent) { + this.parent = parent; + } + + public List getSubActions() { + return subActions; + } + + public void setSubActions(List subActions) { + this.subActions = subActions; + } + + public SimpleTree getAbstractSimpleTree() { + return abstractSimpleTree; + } + + public void setAbstractSimpleTree(SimpleTree simpleTree) { + this.abstractSimpleTree = simpleTree; + } + + public SimpleTree getAbstractIdentifierTree() { + return abstractIdentifierTree; + } + + public void setAbstractIdentifierTree(SimpleTree abstractIdentifierTree) { + this.abstractIdentifierTree = abstractIdentifierTree; + } + + public SimpleTree getSimpleTree() { + return simpleTree; + } + + public void setSimpleTree(SimpleTree rawTokenTree) { + this.simpleTree = rawTokenTree; + } + + public SimpleTree getOriginalTree() { + return originalTree; + } + + public void setOriginalTree(SimpleTree originalTree) { + this.originalTree = originalTree; + } + + @Override + public int compareTo(HierarchicalActionSet o) { + return this.action.compareTo(o.action); + } + + private List strList = new ArrayList<>(); + + @Override + public String toString() { + String str = actionString; + if (strList.size() == 0) { + strList.add(str); + for (HierarchicalActionSet actionSet : subActions) { + actionSet.toString(); + List strList1 = actionSet.strList; + for (String str1 : strList1) { + strList.add("----" + str1); + } + } + } + + str = ""; + for (String str1 : strList) { + str += str1 + "\n"; + } + + return str; + } + + public String toASTNodeLevelAction() { + if (strList.size() == 0) { + toString(); + } + String astNodeStr = ""; + for (String str : strList) { + astNodeStr += str.substring(0, str.indexOf("@@")) + "\n"; + } + return astNodeStr; + } + + public String toRawCodeLevelAction() { + if (strList.size() == 0) { + toString(); + } + String astNodeStr = ""; + for (String str : strList) { + str = str.substring(0, str.indexOf(" @AT@")) + "\n"; + int index1 = str.indexOf(" ") + 1; + int index2 = str.indexOf("@@") + 2; + astNodeStr += str.substring(0, index1) + str.substring(index2); + } + return astNodeStr; + } +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java new file mode 100644 index 0000000..caf26d6 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java @@ -0,0 +1,173 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.actions.model.Addition; +import com.github.gumtreediff.actions.model.Insert; +import com.github.gumtreediff.actions.model.Move; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.gumtree.utils.ASTNodeMap; +import edu.lu.uni.serval.utils.ListSorter; + +/** + * Regroup GumTree results to a hierarchical construction. + * + * @author kui.liu + * + */ +public class HierarchicalRegrouper { + + public List regroupGumTreeResults(List actionsArgu) { + /* + * First, sort actions by their positions. + */ + List actions = new ListSorter(actionsArgu).sortAscending(); + if (actions == null) { + actions = actionsArgu; + } + + /* + * Second, group actions by their positions. + */ + List actionSets = new ArrayList<>(); + HierarchicalActionSet actionSet = null; + for(Action act : actions){ + Action parentAct = findParentAction(act, actions); + if (parentAct == null) { + actionSet = createActionSet(act, parentAct); + actionSets.add(actionSet); + } else { + if (!addToAactionSet(act, parentAct, actionSets)) { + // The index of the parent action in the actions' list is larger than the index of this action. + actionSet = createActionSet(act, parentAct); + actionSets.add(actionSet); + } + } + } + + /* + * Third, add the subActionSet to its parent ActionSet. + */ + List reActionSets = new ArrayList<>(); + for (HierarchicalActionSet actSet : actionSets) { + Action parentAct = actSet.getParentAction(); + if (parentAct != null) { + addToActionSets(actSet, parentAct, actionSets); + } else { + reActionSets.add(actSet); + } + } + return reActionSets; + } + + private HierarchicalActionSet createActionSet(Action act, Action parentAct) { + HierarchicalActionSet actionSet = new HierarchicalActionSet(); + actionSet.setAction(act); + actionSet.setActionString(parseAction(act.toString())); + actionSet.setParentAction(parentAct); + actionSet.setNode(act.getNode()); + actionSet.setParent(null); + return actionSet; + } + + private String parseAction(String actStr1) { + // UPD 25@@!a from !a to isTrue(a) at 69 + String[] actStrArrays = actStr1.split("@@"); + String actStr = ""; + int length = actStrArrays.length; + for (int i =0; i < length - 1; i ++) { + String actStrFrag = actStrArrays[i]; + int index = actStrFrag.lastIndexOf(" ") + 1; + String nodeType = actStrFrag.substring(index); + if (!"".equals(nodeType)) { + try { + nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType)); + } catch (NumberFormatException e) { + nodeType = actStrFrag.substring(index); + } + } + actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@"; + actStr += actStrFrag; + } + actStr += actStrArrays[length - 1]; + return actStr; + } + + private void addToActionSets(HierarchicalActionSet actionSet, Action parentAct, List actionSets) { + for (HierarchicalActionSet actSet : actionSets) { + if (actSet.equals(actionSet)) continue; + if (actSet.getAction().equals(parentAct)) { // actSet is the parent of actionSet. + actionSet.setParent(actSet); + actSet.getSubActions().add(actionSet); + ListSorter sorter = new ListSorter(actSet.getSubActions()); + actSet.setSubActions(sorter.sortAscending()); + break; + } else { + addToActionSets(actionSet, parentAct, actSet.getSubActions()); + } + } + } + + private boolean addToAactionSet(Action act, Action parentAct, List actionSets) { + ITree parentTree = parentAct.getNode(); + + for(HierarchicalActionSet actionSet : actionSets) { + Action action = actionSet.getAction(); + + ITree tree = action.getNode(); + if (tree.equals(parentTree)) { // actionSet is the parent of actSet. + if (action instanceof Move && !(act instanceof Move)) { + continue; + } + + HierarchicalActionSet actSet = createActionSet(act, actionSet.getAction()); + actSet.setParent(actionSet); + actionSet.getSubActions().add(actSet); + return true; + } else { + if ((!(act instanceof Insert) && !(action instanceof Insert)) + || (act instanceof Insert && action instanceof Insert)) { + int startPosition = act.getPosition(); + int length = act.getLength(); + int startP = action.getPosition(); + int leng = action.getLength(); + if (!(startP <= startPosition) || !(length <= leng)) { + continue; + } else if (startP > startPosition + length) { + break; + } + } + List subActionSets = actionSet.getSubActions(); + if (subActionSets.size() > 0) { + boolean added = addToAactionSet(act, parentAct, subActionSets); + if (added) { + return true; + } else { + continue; + } + } + } + } + return false; + } + + private Action findParentAction(Action action, List actions) { + + ITree parent = action.getNode().getParent(); + if (action instanceof Addition) { + parent = ((Addition) action).getParent(); + } + for (Action act : actions) { + if (act.getNode().equals(parent)) { + if (act instanceof Move && !(action instanceof Move)) { + continue; + } + return act; + } + } + return null; + } +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java new file mode 100644 index 0000000..ae67b75 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java @@ -0,0 +1,326 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jdt.core.dom.CompilationUnit; + +import com.github.gumtreediff.actions.model.Move; +import com.github.gumtreediff.actions.model.Update; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.FixPatternParser.CUCreator; +import edu.lu.uni.serval.diffentry.DiffEntryHunk; + +public class HunkActionFilter { + + /** + * Filter out the modify actions, which are not in the DiffEntry hunks. + * + * @param hunks + * @param actionSets + * @return + */ + public List filterActionsByDiffEntryHunk(List hunks, + List actionSets, File revFile, File prevFile) { + List uselessActions = new ArrayList<>(); + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + return uselessActions; + } + + for (HierarchicalActionSet actionSet : actionSets) { + // position of buggy statements + int startPosition = 0; + int endPosition = 0; + int startLine = 0; + int endLine = 0; + // position of fixed statements + int startPosition2 = 0; + int endPosition2 = 0; + int startLine2 = 0; + int endLine2 = 0; + + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("INS")) { + startPosition2 = actionSet.getStartPosition(); + endPosition2 = startPosition2 + actionSet.getLength(); + + List firstAndLastMov = getFirstAndLastMoveAction(actionSet); + if (firstAndLastMov != null) { + startPosition = firstAndLastMov.get(0).getNode().getPos(); + ITree lastTree = firstAndLastMov.get(1).getNode(); + endPosition = lastTree.getPos() + lastTree.getLength(); + } + } else { + startPosition = actionSet.getStartPosition(); // range of actions + endPosition = startPosition + actionSet.getLength(); + if (actionStr.startsWith("UPD")) { + Update update = (Update) actionSet.getAction(); + ITree newNode = update.getNewNode(); + startPosition2 = newNode.getPos(); + endPosition2 = startPosition2 + newNode.getLength(); + } + } + startLine = prevUnit.getLineNumber(startPosition); + endLine = prevUnit.getLineNumber(endPosition); + startLine2 = revUnit.getLineNumber(startPosition2); + endLine2 = revUnit.getLineNumber(endPosition2); + + for (DiffEntryHunk hunk : hunks) { + int bugStartLine = hunk.getBugLineStartNum(); + int bugRange = hunk.getBugRange(); + int fixStartLine = hunk.getFixLineStartNum(); + int fixRange = hunk.getFixRange(); + + if (actionStr.startsWith("INS")) { + if (fixStartLine + fixRange < startLine2) { + continue; + } + if (endLine2 < fixStartLine ) { + uselessActions.add(actionSet); + } + break; + } else { + if (bugStartLine + bugRange < startLine) { + continue; + } + if (endLine < bugStartLine ) { + uselessActions.add(actionSet); + } + break; + } + } + actionSet.setBugStartLineNum(startLine); + actionSet.setBugEndLineNum(endLine); + actionSet.setFixStartLineNum(startLine2); + actionSet.setFixEndLineNum(endLine2); + } + + actionSets.removeAll(uselessActions); + uselessActions.clear(); + return actionSets; + } + + /** + * Filter out the modify actions, which are not in the DiffEntry hunks. + * + * @param hunks + * @param actionSets + * @return + */ + public List filterActionsByDiffEntryHunk2(List hunks, + List actionSets, File revFile, File prevFile) { + List allHunkFixPatterns = new ArrayList<>(); + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + return allHunkFixPatterns; + } + + int i = 0; + int size = actionSets.size(); + for (DiffEntryHunk hunk : hunks) { + int bugStartLine = hunk.getBugLineStartNum(); + int bugRange = hunk.getBugRange(); + int fixStartLine = hunk.getFixLineStartNum(); + int fixRange = hunk.getFixRange(); + + for (; i < size; i ++) { + // position of buggy statements + int startPosition = 0; + int endPosition = 0; + int startLine = 0; + int endLine = 0; + // position of fixed statements + int startPosition2 = 0; + int endPosition2 = 0; + int startLine2 = 0; + int endLine2 = 0; + + HierarchicalActionSet actionSet = actionSets.get(i); + String actionStr = actionSet.getActionString(); + ITree parentITree = null; + List hunkActionSets = new ArrayList<>(); + if (actionStr.startsWith("INS")) { + startPosition2 = actionSet.getStartPosition(); + endPosition2 = startPosition2 + actionSet.getLength(); + + List firstAndLastMov = getFirstAndLastMoveAction(actionSet); + if (firstAndLastMov != null) { + startPosition = firstAndLastMov.get(0).getNode().getPos(); + ITree lastTree = firstAndLastMov.get(1).getNode(); + endPosition = lastTree.getPos() + lastTree.getLength(); + } + } else { + startPosition = actionSet.getStartPosition(); // range of actions + endPosition = startPosition + actionSet.getLength(); + if (actionStr.startsWith("UPD")) { + Update update = (Update) actionSet.getAction(); + ITree newNode = update.getNewNode(); + startPosition2 = newNode.getPos(); + endPosition2 = startPosition2 + newNode.getLength(); + + String astNodeType = actionSet.getAstNodeType(); + if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) + || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) + || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) + || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { + List children = update.getNode().getChildren(); + endPosition = getEndPosition(children); + List newChildren = newNode.getChildren(); + endPosition2 = getEndPosition(newChildren); + + if (endPosition == 0) { + endPosition = startPosition + actionSet.getLength(); + } + if (endPosition2 == 0) { + endPosition2 = startPosition2 + newNode.getLength(); + } + } + } + } + startLine = prevUnit.getLineNumber(startPosition); + endLine = prevUnit.getLineNumber(endPosition); + startLine2 = revUnit.getLineNumber(startPosition2); + endLine2 = revUnit.getLineNumber(endPosition2); + actionSet.setBugStartLineNum(startLine); + actionSet.setBugEndLineNum(endLine); + actionSet.setFixStartLineNum(startLine2); + actionSet.setFixEndLineNum(endLine2); + + if (actionStr.startsWith("INS")) { + if (fixStartLine + fixRange < startLine2) { + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + break; + } + if (endLine2 >= fixStartLine ) { + ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, startLine, startLine2, endLine, endLine2, parentITree, hunk); + if (parent != null) { + if (parent != parentITree) { + hunkActionSets = new ArrayList<>(); + } + hunkActionSets.add(actionSet); + } else if (hunkActionSets.size() > 0) { + hunkActionSets = new ArrayList<>(); + } + parentITree = parent; + } + } else { // UPD, DEL, MOV + if (bugStartLine + bugRange < startLine) { + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + break; + } + if (endLine >= bugStartLine ) { + ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, startLine, startLine2, endLine, endLine2, parentITree, hunk); + if (parent != null) { + if (parent != parentITree) { + hunkActionSets = new ArrayList<>(); + } + hunkActionSets.add(actionSet); + } else if (hunkActionSets.size() > 0) { + hunkActionSets = new ArrayList<>(); + } + parentITree = parent; + } + } + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + } + } + + return allHunkFixPatterns; + } + + private int getEndPosition(List children) { + int endPosition = 0; + for (ITree child : children) { + if (child.getLabel().endsWith("Body")) { + endPosition = child.getPos() - 1; + break; + } + } + return endPosition; + } + + private void addHunkActionSets(List hunkActionSets, List allHunkFixPatterns, DiffEntryHunk hunk) { + if (hunkActionSets.size() > 0) { + HunkFixPattern hunkFixPattern = new HunkFixPattern(hunk, hunkActionSets); + allHunkFixPatterns.add(hunkFixPattern); + } + } + + private ITree addToHunkActionSets(HierarchicalActionSet actionSet, List hunkActionSets, List allHunkFixPatterns, + int startLine, int startLine2, int endLine, int endLine2, ITree parentITree, DiffEntryHunk hunk) { + String astNodeType = actionSet.getAstNodeType(); + if ("FieldDeclaration".equals(astNodeType)) { + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + hunkActionSets = new ArrayList<>(); + hunkActionSets.add(actionSet); + HunkFixPattern hunkFixPattern = new HunkFixPattern(hunk, hunkActionSets); + allHunkFixPatterns.add(hunkFixPattern); + return null; + } else { + ITree currentParent = actionSet.getNode().getParent(); + if (parentITree == null) { + parentITree = currentParent; + } else { + if (!parentITree.equals(currentParent)) { + HunkFixPattern hunkFixPattern = new HunkFixPattern(hunk, hunkActionSets); + allHunkFixPatterns.add(hunkFixPattern); + parentITree = currentParent; + } + } + return parentITree; + } + } + + private List getFirstAndLastMoveAction(HierarchicalActionSet gumTreeResult) { + List firstAndLastMoveActions = new ArrayList<>(); + List actions = gumTreeResult.getSubActions(); + if (actions.size() == 0) { + return null; + } + Move firstMoveAction = null; + Move lastMoveAction = null; + while (actions.size() > 0) { + List subActions = new ArrayList<>(); + for (HierarchicalActionSet action : actions) { + subActions.addAll(action.getSubActions()); + if (action.toString().startsWith("MOV")) { + if (firstMoveAction == null) { + firstMoveAction = (Move) action.getAction(); + lastMoveAction = (Move) action.getAction(); + } else { + int startPosition = action.getStartPosition(); + int length = action.getLength(); + int startPositionFirst = firstMoveAction.getPosition(); + int startPositionLast = lastMoveAction.getPosition(); + int lengthLast = lastMoveAction.getNode().getLength(); + if (startPosition < startPositionFirst || (startPosition == startPositionFirst && length > firstMoveAction.getLength())) { + firstMoveAction = (Move) action.getAction(); + } + if ((startPosition + length) > (startPositionLast + lengthLast)) { + lastMoveAction = (Move) action.getAction(); + } + } + } + } + + actions.clear(); + actions.addAll(subActions); + } + if (firstMoveAction == null) { + return null; + } + firstAndLastMoveActions.add(firstMoveAction); + firstAndLastMoveActions.add(lastMoveAction); + return firstAndLastMoveActions; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkFixPattern.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkFixPattern.java new file mode 100644 index 0000000..3bd89fe --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkFixPattern.java @@ -0,0 +1,27 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.List; + +import edu.lu.uni.serval.diffentry.DiffEntryHunk; + +public class HunkFixPattern { + + private DiffEntryHunk hunk; + + private List hunkActionSets; + + public HunkFixPattern(DiffEntryHunk hunk, List hunkActionSets) { + super(); + this.hunk = hunk; + this.hunkActionSets = hunkActionSets; + } + + public DiffEntryHunk getHunk() { + return hunk; + } + + public List getHunkActionSets() { + return hunkActionSets; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimpleTree.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimpleTree.java new file mode 100644 index 0000000..0900b7a --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimpleTree.java @@ -0,0 +1,68 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.List; + +public class SimpleTree { + + private String nodeType; + private String label; + private SimpleTree parent; + private List children = new ArrayList<>(); + + public String getNodeType() { + return nodeType; + } + + public void setNodeType(String nodeType) { + this.nodeType = nodeType; + } + + public String getLabel() { + return label; + } + + public void setLabel(String label) { + this.label = label; + } + + public SimpleTree getParent() { + return parent; + } + + public void setParent(SimpleTree parent) { + this.parent = parent; + } + + public List getChildren() { + return children; + } + + public void setChildren(List children) { + this.children = children; + } + + private List strList = new ArrayList<>(); + + @Override + public String toString() { + String str = this.nodeType + "@@" + this.label; + if (strList.size() == 0) { + strList.add(str); + for (SimpleTree child : children) { + child.toString(); + List strList1 = child.strList; + for (String str1 : strList1) { + strList.add("------" + str1); + } + } + } + + str = ""; + for (String str1 : strList) { + str += str1 + "\n"; + } + + return str; + } +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java new file mode 100644 index 0000000..61586aa --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java @@ -0,0 +1,662 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.github.gumtreediff.actions.model.Action; +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.gumtree.utils.ASTNodeMap; +import edu.lu.uni.serval.utils.ListSorter; + +/** + * Simplify the ITree of source code into a simple tree. + * + * @author kui.liu + * + */ +public class SimplifyTree { + + private static final String ABSTRACT_TYPE = "T"; + private static final String ABSTRACT_NAME = "N"; + private static final String ABSTRACT_METHOD = "m"; + private static final String ABSTRACT_VARIABLE = "v"; + + private Map abstractTypeIdentifiers = new HashMap<>(); + private Map abstractMethodIdentifiers = new HashMap<>(); + private Map abstractNameIdentifiers = new HashMap<>(); + private Map abstractVariableIdentifiers = new HashMap<>(); + + /** + * Convert ITree to a source code simple tree, an abstract identifier simple tree, and a semi-source code simple tree. + * + * @param actionSet + */ + public void abstractTree(HierarchicalActionSet actionSet) { + SimpleTree sourceCodeSimpleTree = null; // source code tree and AST node type tree + SimpleTree abstractIdentifierTree = null; // abstract identifier tree + SimpleTree abstractSimpleTree = null; // semi-source code tree. and AST node type tree + SimpleTree simpleTree = null; // source code tree with canonical variable names. + + if (actionSet.getActionString().startsWith("INS")) { + List allMoveActions = getAllMoveActions(actionSet); + if (allMoveActions != null) { + List actions = new ArrayList<>(); + for (Action action : allMoveActions) { + boolean hasParent = false; + ITree parent = action.getNode().getParent(); + for (Action act : allMoveActions) { + if (act == action) continue; + ITree actNode = act.getNode(); + if (actNode.equals(parent)) { + hasParent = true; + break; + } + } + if (!hasParent) { + actions.add(action); + } + } + sourceCodeSimpleTree = sourceCodeTree(actions); + simpleTree = canonicalizeSourceCodeTree(actions, null); + } + } else { + ITree tree = actionSet.getNode(); + String astNodeType = actionSet.getAstNodeType(); + if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) + || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) + || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) + || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { + // delete the body block. + List children = tree.getChildren(); + List newChildren = new ArrayList<>(); + for (ITree child : children) { + if (!child.getLabel().endsWith("Body")) { + newChildren.add(child); + } + } + tree.setChildren(newChildren); + } + sourceCodeSimpleTree = originalSourceCodeTree(tree, null); +// abstractIdentifierTree = abstractIdentifierTree(actionSet, tree, null); +// abstractSimpleTree = semiSourceCodeTree(actionSet, tree, null); + simpleTree = canonicalizeSourceCodeTree(tree, null); + } + +// actionSet.setAbstractSimpleTree(abstractSimpleTree); +// actionSet.setAbstractIdentifierTree(abstractIdentifierTree); +// actionSet.setSimpleTree(sourceCodeSimpleTree); + actionSet.setSimpleTree(simpleTree); + actionSet.setOriginalTree(sourceCodeSimpleTree); + } + + private SimpleTree canonicalizeSourceCodeTree(List actions, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + simpleTree.setLabel("Block"); + simpleTree.setNodeType("Block"); + simpleTree.setParent(parent); + List children = new ArrayList<>(); + for (Action action : actions) { + ITree node = action.getNode(); + children.add(canonicalizeSourceCodeTree(node, simpleTree)); + } + simpleTree.setChildren(children); + return simpleTree; + } + + private SimpleTree canonicalizeSourceCodeTree(ITree tree, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + + String label = tree.getLabel(); + String astNode = ASTNodeMap.map.get(tree.getType()); + + List children = tree.getChildren(); + if (children.size() > 0) { + simpleTree.setNodeType(astNode); + if (astNode.endsWith("Type")) { + simpleTree.setLabel(canonicalizeTypeStr(label).replaceAll(" ", "")); + } else { + List subTrees = new ArrayList<>(); + for (ITree child : children) { + subTrees.add(canonicalizeSourceCodeTree(child, simpleTree)); + } + simpleTree.setChildren(subTrees); + simpleTree.setLabel(astNode); + } + } else { + if (astNode.endsWith("Name")) { + // variableName, methodName, QualifiedName + if (label.startsWith("MethodName:")) { // + label = label.substring(11); + simpleTree.setNodeType("MethodName"); + simpleTree.setLabel(label); + } else if (label.startsWith("Name:")) { + label = label.substring(5); + String firstChar = label.substring(0, 1); + if (firstChar.equals(firstChar.toUpperCase())) { + simpleTree.setNodeType("Name"); + simpleTree.setLabel(label); + } else {// variableName: + simpleTree.setNodeType("VariableName"); + simpleTree.setLabel(canonicalVariableName(label, tree)); + } + } else {// variableName: + simpleTree.setNodeType("VariableName"); + simpleTree.setLabel(canonicalVariableName(label, tree)); + } + } else { + simpleTree.setNodeType(astNode); + simpleTree.setLabel(label.replaceAll(" ", "")); + } + } + + simpleTree.setParent(parent); + return simpleTree; + } + + private String canonicalVariableName(String label, ITree tree) { + ITree parent = tree.getParent(); + if (parent == null) { + return label; + } else { + String matchStr = null; + int parentType = parent.getType(); + if (parentType == 44) { // SingleVariableDeclaration + matchStr = matchSingleVariableDeclaration(parent, label); + } else if (parentType == 23 || parentType == 58 || parentType == 60) { + //FieldDeclaration, VariableDeclarationExpression, VariableDeclarationStatement + matchStr = matchVariableDeclarationExpression(parent, label); + } else if (parentType == 31) { // MethodDeclaration + List children = parent.getChildren(); + int index = children.indexOf(tree); + for (int i = index - 1; i >=0; i --) { + ITree child = children.get(i); + int childType = child.getType(); + if (childType == 60) { // VariableDeclarationStatement + matchStr = matchVariableDeclarationExpression(child, label); + } else if (childType == 44) { // SingleVariableDeclaration + matchStr = matchSingleVariableDeclaration(child, label); + } else if (childType ==70 || childType == 24 ||childType == 12 || childType == 54) { + matchStr = matchStatements(childType, child, label); + } + if (matchStr != null) break; + } + } else if (parentType ==70 || parentType == 24 ||parentType == 12 || parentType == 54) { + // EnhancedForStatement, ForStatement, CatchClause, TryStatement + matchStr = matchStatements(parentType, parent, label); + } else if (parentType == 55) { // TypeDeclaration: Class Declaration + List children = parent.getChildren(); + for (ITree child : children) { + if (child.getType() == 23) { // FieldDeclaration + matchStr = matchVariableDeclarationExpression(child, label); + } + } + } + + if (matchStr != null) { + return matchStr; + } else { + return canonicalVariableName(label, parent); + } + } + } + + private String matchStatements(int typeInt, ITree tree, String label) { + String matchStr = null; + if (typeInt == 70) { // EnhancedForStatement + matchStr = matchSingleVariableDeclaration(tree.getChild(0), label); + } else if (typeInt == 24) { // ForStatement + List children = tree.getChildren(); + for (ITree child : children) { + if (child.getType() == 58) { + matchStr = matchVariableDeclarationExpression(child, label); + if (matchStr != null) break; + } else { + break; + } + } + } else if (typeInt == 12) { // CatchClause + matchStr = matchSingleVariableDeclaration(tree.getChild(0), label); + } else if (typeInt == 54) { // TryStatement + List children = tree.getChildren(); + for (ITree child : children) { + if (child.getType() == 58) { //VariableDeclarationExpression + matchStr = matchVariableDeclarationExpression(tree, label); + if (matchStr != null) break; + } else { + break; + } + } + } + return null; + } + + private String matchVariableDeclarationExpression(ITree variable, String label) { + List children = variable.getChildren(); + ITree type = null; + for (int i = 0, size = children.size(); i < size; i ++) { + ITree child = children.get(i); + if (child.getType() == 59) {// VariableDeclarationFragment + if (type == null) { + type = children.get(i - 1); + } + ITree simpleName = child.getChild(0); + if (simpleName.getLabel().equals(label)) { + String typeStr = canonicalizeTypeStr(type.getLabel()); + label = typeStr.toLowerCase() + "Var"; + return label; + } + } + } + return null; + } + + private String matchSingleVariableDeclaration(ITree singleVariable, String label) { + List children = singleVariable.getChildren(); + for (int i = 0, size = children.size(); i < size; i ++) { + ITree child = children.get(i); + if (child.getType() == 42) { // SimpleName + if (child.getLabel().equals(label)) { + ITree type = children.get(i - 1); + String typeStr = canonicalizeTypeStr(type.getLabel()); + label = typeStr.toLowerCase() + "Var"; + return label; + } + break; + } + } + return null; + } + + private String canonicalizeTypeStr(String label) { + String typeStr = label; + int index1 = typeStr.indexOf("<"); + if (index1 != -1) { + typeStr = typeStr.substring(0, index1); + } + index1 = typeStr.lastIndexOf("."); + if (index1 != -1) { + typeStr = typeStr.substring(index1 + 1); + } + return typeStr; + } + +// public static String addPrefixByType(Type type) { +// String newName = ""; +// if (type instanceof PrimitiveType) { +// // byte,short,char,int,long,float,double,boolean,void +// newName = type.toString().toLowerCase(); +// } else if (type instanceof ArrayType) { +// // Type [ ] +// ArrayType at = (ArrayType) type; +// type = at.getElementType(); +// if (type instanceof SimpleType || type instanceof PrimitiveType) { +// newName = getNewName(type); +// } else { +// newName = addPrefixByType(type); +// } +// } else if (type instanceof SimpleType) { +// // TypeName +// if (type.toString().equals("Integer")) { +// newName = "int"; +// } else { +// newName = getNewName(type); +// } +// } else if (type instanceof QualifiedType) { +// // Type.SimpleName +// newName = ((QualifiedType) type).getName().toString().toLowerCase(); +// } else if (type instanceof ParameterizedType) { +// // Type < Type { , Type } > 泛型 +// ParameterizedType t = (ParameterizedType) type; +// newName = getNewName(t.getType()); +// } else if (type instanceof WildcardType) { +// newName = "object"; +// } +// return newName; +// } +// +// private static String getNewName(Type type) { +// String newName = ""; +// String typeName = type.toString(); +// int dot = typeName.lastIndexOf("."); +// if (dot > 0) { +// newName = typeName.substring(dot + 1).toString().toLowerCase(); +// } else { +// newName = typeName.toString().toLowerCase(); +// } +// return newName; +// } + + /** + * Convert the Move actions of an INS action into a simple tree with AST nodes and leaf labels. + * + * @param actions + * @return + */ + private SimpleTree sourceCodeTree(List actions) { + if (actions.size() > 0) { + SimpleTree simpleTree = new SimpleTree(); + simpleTree.setNodeType("Block"); + simpleTree.setLabel("Block"); + simpleTree.setParent(null); + List subTrees = new ArrayList<>(); + for (Action action : actions) { + ITree node = action.getNode(); + subTrees.add(sourceCodeTree(node, simpleTree)); + } + simpleTree.setChildren(subTrees); + + return simpleTree; + } + return null; + } + + /** + * Convert a Move action into a simple tree with AST nodes and leaf labels. + * + * @param tree + * @param parent + * @return + */ + private SimpleTree sourceCodeTree(ITree tree, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + String astNode = ASTNodeMap.map.get(tree.getType()); + do { + if (astNode.endsWith("Statement") || astNode.equals("FieldDeclaration")) break; + + tree = tree.getParent(); + astNode = ASTNodeMap.map.get(tree.getType());// FIXME if the ASTNode is a method declaration or class declaration? + } while (!astNode.endsWith("Statement") && !astNode.equals("FieldDeclaration")); + + String label = tree.getLabel(); + List children = tree.getChildren(); + if (children.size() > 0) { + List subTrees = new ArrayList<>(); + for (ITree child : children) { + subTrees.add(sourceCodeTree(child, simpleTree)); + } + simpleTree.setChildren(subTrees); + simpleTree.setLabel(astNode); + } else { + simpleTree.setLabel(label); + } + simpleTree.setNodeType(astNode); + simpleTree.setParent(parent); + return simpleTree; + } + + /** + * Convert an UPD/DEL/MOV action into a simple tree with AST nodes and leaf labels. + * + * @param tree + * @param parent + * @return + */ + private SimpleTree originalSourceCodeTree(ITree tree, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + + String label = tree.getLabel(); + String astNode = ASTNodeMap.map.get(tree.getType()); + + simpleTree.setNodeType(astNode); + List children = tree.getChildren(); + if (children.size() > 0) { + List subTrees = new ArrayList<>(); + for (ITree child : children) { + subTrees.add(originalSourceCodeTree(child, simpleTree)); + } + simpleTree.setChildren(subTrees); + simpleTree.setLabel(astNode); + } else { + simpleTree.setLabel(label); + } + + simpleTree.setParent(parent); + return simpleTree; + } + + /** + * Convert an UPD/DEL/MOV action into a simple tree with abstract identifiers of AST nodes and abstract identifiers of leaf labels. + * + * @param actionSet + * @param tree + * @param parent + * @return + */ + private SimpleTree abstractIdentifierTree(ITree tree, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + + String label = tree.getLabel(); + String astNode = ASTNodeMap.map.get(tree.getType()); + + simpleTree.setNodeType(astNode); + List children = tree.getChildren(); + if (children.size() > 0) { + if (astNode.endsWith("Type")) { + simpleTree.setNodeType("Type"); + simpleTree.setLabel(getAbstractLabel(abstractTypeIdentifiers, label, ABSTRACT_TYPE)); // abstract Type identifier + } else { + List subTrees = new ArrayList<>(); + for (ITree child : children) { + subTrees.add(abstractIdentifierTree(child, simpleTree)); + } + simpleTree.setChildren(subTrees); + simpleTree.setLabel(astNode); + } + } else { + if (astNode.endsWith("Type")) { + simpleTree.setNodeType("Type"); + if (astNode.equals("WildcardType")) { + simpleTree.setLabel("?"); + } else { + simpleTree.setLabel(getAbstractLabel(abstractTypeIdentifiers, label, ABSTRACT_TYPE)); // abstract Type identifier + } + } else if (astNode.endsWith("Name")) { + // variableName, methodName, QualifiedName + if (label.startsWith("MethodName:")) { // + label = label.substring(11); + simpleTree.setNodeType("Method"); + simpleTree.setLabel(getAbstractLabel(abstractMethodIdentifiers, label, ABSTRACT_METHOD)); // abstract method identifier + } else if (label.startsWith("Name:")) { + label = label.substring(5); + String firstChar = label.substring(0, 1); + if (firstChar.equals(firstChar.toUpperCase())) { + simpleTree.setNodeType("Name"); + simpleTree.setLabel(getAbstractLabel(abstractNameIdentifiers, label, ABSTRACT_NAME)); // abstract Name identifier + } else {// variableName: + simpleTree.setNodeType("Variable"); + simpleTree.setLabel(getAbstractLabel(abstractVariableIdentifiers, label, ABSTRACT_VARIABLE));// abstract Variable identifier + } + } else {// variableName: + simpleTree.setNodeType("Variable"); + simpleTree.setLabel(getAbstractLabel(abstractVariableIdentifiers, label, ABSTRACT_VARIABLE));// abstract Variable identifier + } + } else if (astNode.equals("BooleanLiteral") || astNode.equals("CharacterLiteral") || astNode.equals("NullLiteral") + || astNode.equals("NumberLiteral") || astNode.equals("StringLiteral") || astNode.equals("ThisExpression") + || astNode.equals("Modifier") || astNode.equals("Operator")) { + simpleTree.setNodeType(astNode); + simpleTree.setLabel(label); + } + } + + simpleTree.setParent(parent); + return simpleTree; + } + + /** + * Convert an UPD/DEL/MOV action into a semi-source code simple tree by abstracting the non-buggy code. + * + * @param actionSet + * @param tree + * @param parent + * @return + */ + private SimpleTree semiSourceCodeTree(HierarchicalActionSet actionSet, ITree tree, SimpleTree parent) { + SimpleTree simpleTree = new SimpleTree(); + simpleTree.setParent(parent); + // deep first + abstractBuggyTreeDeepFirst(actionSet, tree, simpleTree); + + return simpleTree; + } + + private void abstractBuggyTreeDeepFirst(HierarchicalActionSet actionSet, ITree tree, SimpleTree simpleTree) { + List children = tree.getChildren(); + HierarchicalActionSet modifyAction = findHierarchicalActionSet(tree.getPos(), tree.getLength(), actionSet); + String label = tree.getLabel(); + String astNode = ASTNodeMap.map.get(tree.getType()); + + if (isExpressionType(astNode)) { + if (modifyAction == null || !modifyAction.getActionString().contains("@@" + label)) { + simpleTree.setNodeType("Expression"); + simpleTree.setLabel("EXP"); // astNode + } + } else { + if (astNode.endsWith("Type")) { // TODO: sub Type + simpleTree.setNodeType("Type"); + // simpleTree.setLabel("?"); + if (astNode.equals("WildcardType")) { + simpleTree.setLabel("?"); + } else { // ArrayType, PrimitiveType, SimpleType, ParameterizedType, QualifiedType, WildcardType, UnionType,NameQualifiedType, IntersectionType + simpleTree.setLabel(astNode + "@@" + label); + } + } else if (astNode.endsWith("Name")) { // variableName, methodName, QualifiedName + if (label.startsWith("MethodName:")) { // + label = label.substring(11); + simpleTree.setNodeType("Method"); + simpleTree.setLabel(label); + } else if (label.startsWith("Name:")) { + label = label.substring(5); + String firstChar = label.substring(0, 1); + if (firstChar.equals(firstChar.toUpperCase())) { + simpleTree.setNodeType("Name"); + simpleTree.setLabel(label); // + } else {// variableName: + simpleTree.setNodeType("Variable"); + simpleTree.setLabel(getAbstractLabel(abstractVariableIdentifiers, label, ABSTRACT_VARIABLE)); + } + } else {// variableName: + simpleTree.setNodeType("Variable"); + simpleTree.setLabel(getAbstractLabel(abstractVariableIdentifiers, label, ABSTRACT_VARIABLE)); + } + } else if (astNode.equals("BooleanLiteral") ||astNode.equals("CharacterLiteral") || astNode.equals("ThisExpression") + || astNode.equals("NullLiteral") || astNode.equals("NumberLiteral") || astNode.equals("StringLiteral") + || astNode.equals("Modifier") || astNode.equals("Operator")) { + simpleTree.setNodeType(astNode); + simpleTree.setLabel(label); + } else { + simpleTree.setNodeType(astNode); + simpleTree.setLabel(astNode); + } + } + + List simpleChildren = new ArrayList<>(); + if (children != null && !astNode.endsWith("Type")) { + for (ITree child : children) { + simpleChildren.add(semiSourceCodeTree(actionSet, child, simpleTree)); + } + } + simpleTree.setChildren(simpleChildren); + } + + private List getAllMoveActions(HierarchicalActionSet actionSet) { + String astNodeType = actionSet.getAstNodeType(); + if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) + || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) + || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) + || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { + List allMoveActions = getAllMoveActions2(actionSet); + if (allMoveActions != null && allMoveActions.size() > 0) { + ListSorter sorter = new ListSorter(allMoveActions); + allMoveActions = sorter.sortAscending(); + return allMoveActions; + } else {// FIXME: pure INS actions. + return null; + } + } else {// FIXME: pure INS actions. + return null; + } + /** + * Variables, non-new and used in the inserted statements, could be selected to localize buggy code + */ + } + + private List getAllMoveActions2(HierarchicalActionSet actionSet) { + List allMoveActions = new ArrayList<>(); + List actions = actionSet.getSubActions(); + if (actions.size() == 0) { + return null; + } + while (actions.size() > 0) { + List subActions = new ArrayList<>(); + for (HierarchicalActionSet action : actions) { + subActions.addAll(action.getSubActions()); + if (action.toString().startsWith("MOV")) { + allMoveActions.add(action.getAction()); + } + } + + actions.clear(); + actions.addAll(subActions); + } + return allMoveActions; + } + + private String getAbstractLabel(Map map, String label, String nameType) { + if (map.containsKey(label)) { + return map.get(label); + } else { + String name = nameType + map.size(); + map.put(label, name); + return name; + } + } + + private boolean isExpressionType(String astNode) { + if (astNode.equals("ArrayAccess") || astNode.equals("ArrayCreation") || + astNode.equals("ArrayInitializer") || astNode.equals("Assignment") || astNode.equals("CastExpression") || + astNode.equals("ClassInstanceCreation") || astNode.equals("ConditionalExpression") || astNode.equals("CreationReference") || + astNode.equals("ExpressionMethodReference") || astNode.equals("FieldAccess") || astNode.equals("InfixExpression") || + astNode.equals("InstanceofExpression") || astNode.equals("LambdaExpression") || astNode.equals("MethodInvocation") || + astNode.equals("MethodReference") || astNode.equals("ParenthesizedExpression") || astNode.equals("PostfixExpression") || + astNode.equals("PrefixExpression") || astNode.equals("SuperFieldAccess") || astNode.equals("SuperMethodInvocation") || + astNode.equals("SuperMethodReference") || astNode.equals("TypeLiteral") || astNode.equals("TypeMethodReference") + || astNode.equals("VariableDeclarationExpression") ) { + return true; + } + return false; + } + + private HierarchicalActionSet findHierarchicalActionSet(int position, int length, HierarchicalActionSet actionSet) { + if (actionSet.getStartPosition() == position && actionSet.getLength() == length && !actionSet.getActionString().startsWith("INS")) { + return actionSet; + } else { + for (HierarchicalActionSet subActionSet : actionSet.getSubActions()) { + HierarchicalActionSet actSet = findHierarchicalActionSet(position, length, subActionSet); + if (actSet != null) { + return actSet; + } + } + } + return null; + } + + public Map getAbstractTypeIdentifiers() { + return abstractTypeIdentifiers; + } + + public Map getAbstractMethodIdentifiers() { + return abstractMethodIdentifiers; + } + + public Map getAbstractNameIdentifiers() { + return abstractNameIdentifiers; + } + + public Map getAbstractVariableIdentifiers() { + return abstractVariableIdentifiers; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/Traveler.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/Traveler.java new file mode 100644 index 0000000..8c2b85b --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/Traveler.java @@ -0,0 +1,140 @@ +package edu.lu.uni.serval.gumtree.regroup; + +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.tree.ITree; + +import edu.lu.uni.serval.gumtree.utils.ASTNodeMap; + +/** + * A traveler to travel a tree-constructed object. + * + * @author kui.liu + * + */ +public class Traveler { + + public List> list = new ArrayList<>(); + + /** + * Get all action string by traveling HierarchicalActionSet in a deep-first way. + * + * @param actionSet + * @param astNodeTypeActionQueue + */ + public void travelActionSetDeepFirstToASTNodeQueue(HierarchicalActionSet actionSet, List astNodeTypeActionQueue) { + if (actionSet == null) { + System.err.println("Null Action set!"); + } else { + if (astNodeTypeActionQueue == null) { + astNodeTypeActionQueue = new ArrayList<>(); + } + + String actionStr = actionSet.getActionString(); + actionStr = actionStr.substring(0, actionStr.indexOf("@@")); + astNodeTypeActionQueue.add(actionStr); // RawToken: TODO + + if (actionStr.startsWith("DEL")) { + list.add(astNodeTypeActionQueue); // FIXME BUG: Change AST node type 1 to AST node type 2. Solve method: a list is one pattern. + } else { + List subActionSet = actionSet.getSubActions(); + int size = subActionSet.size(); + if (size > 0) { + for (HierarchicalActionSet subAction : subActionSet) { + List astNodeTypeActionQueue_ = new ArrayList<>(); + astNodeTypeActionQueue_.addAll(astNodeTypeActionQueue); + travelActionSetDeepFirstToASTNodeQueue(subAction, astNodeTypeActionQueue_); + } + } else { + list.add(astNodeTypeActionQueue); + } + } + } + } + + /** + * Get all AST node types of a root tree by traveling the root tree in a deep-first way. + * + * @param root + * @return + */ + public static List travelTreeDeepFirstToASTNodeQueue(ITree root) { + if (root == null) { + System.err.println("Null tree!"); + return null; + } + + List astNodeTypeQueue = new ArrayList<>(); + astNodeTypeQueue.add(ASTNodeMap.map.get(root.getType())); // RawToken: root.getLabel(); + + List childrenTreeList = root.getChildren(); + + if (childrenTreeList != null && childrenTreeList.size() > 0) { + for (ITree childTree : childrenTreeList) { + astNodeTypeQueue.addAll(travelTreeDeepFirstToASTNodeQueue(childTree)); + } + } + return astNodeTypeQueue; + } + + /** + * Get all AST node types of a root tree by traveling the root tree in a breadth-first way. + * + * @param root + * @return + */ + public static List travelTreeBreadthFirstToASTNodeQueue(ITree root) { + if (root == null) { + System.err.println("Null tree."); + return null; + } + + List astNodeTypeQueue = new ArrayList<>(); + astNodeTypeQueue.add(ASTNodeMap.map.get(root.getType())); // RawToken: root.getLabel(); + + List treeList = new ArrayList<>(); + treeList.add(root); + while (!treeList.isEmpty()) { + List childrenTreeList = new ArrayList<>(); + for (ITree tree : treeList) { + astNodeTypeQueue.addAll(travelTreeBreadthFirstToASTNodeQueue(tree)); + childrenTreeList.addAll(tree.getChildren()); + } + + treeList.clear(); + treeList.addAll(childrenTreeList); + } + return astNodeTypeQueue; + } + + /** + * Convert a root ITree into a SimpleTree by traveling the root tree in a deep-first way. + * + * SimpleTree node label is root.toShortString(). + * + * @param root + * @param parent + * @return + */ + public static SimpleTree travelITreeDeepFirstToSimpleTree(ITree root, SimpleTree parent) { + if (root == null) { + System.err.println("Null tree!"); + return null; + } + SimpleTree simpleTree = new SimpleTree(); + simpleTree.setLabel(root.toShortString()); + simpleTree.setParent(parent); + List children = new ArrayList<>(); + + List childrenTreeList = root.getChildren(); + if (childrenTreeList != null && childrenTreeList.size() > 0) { + for (ITree childTree : childrenTreeList) { + children.add(travelITreeDeepFirstToSimpleTree(childTree, simpleTree)); + } + } + simpleTree.setChildren(children); + return simpleTree; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/gumtree/utils/ASTNodeMap.java b/src/main/java/edu/lu/uni/serval/gumtree/utils/ASTNodeMap.java new file mode 100644 index 0000000..fe56da4 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/gumtree/utils/ASTNodeMap.java @@ -0,0 +1,109 @@ +package edu.lu.uni.serval.gumtree.utils; + +import java.util.HashMap; +import java.util.Map; + +public class ASTNodeMap { + + public static Map map; + + static { + map = new HashMap(); + map.put(-3, "Instanceof"); + map.put(-2, "New"); + map.put(-1, "Operator"); + map.put(0, "ASTNode"); + map.put(1, "AnonymousClassDeclaration"); + map.put(2, "ArrayAccess"); + map.put(3, "ArrayCreation"); + map.put(4, "ArrayInitializer"); + map.put(5, "ArrayType"); + map.put(6, "AssertStatement"); + map.put(7, "Assignment"); + map.put(8, "Block"); + map.put(9, "BooleanLiteral"); + map.put(10, "BreakStatement"); + map.put(11, "CastExpression"); + map.put(12, "CatchClause"); + map.put(13, "CharacterLiteral"); + map.put(14, "ClassInstanceCreation"); + map.put(15, "CompilationUnit"); + map.put(16, "ConditionalExpression"); + map.put(17, "ConstructorInvocation"); + map.put(18, "ContinueStatement"); + map.put(19, "DoStatement"); + map.put(20, "EmptyStatement"); + map.put(21, "ExpressionStatement"); + map.put(22, "FieldAccess"); + map.put(23, "FieldDeclaration"); + map.put(24, "ForStatement"); + map.put(25, "IfStatement"); + map.put(26, "ImportDeclaration"); + map.put(27, "InfixExpression"); + map.put(28, "Initializer"); + map.put(29, "Javadoc"); + map.put(30, "LabeledStatement"); + map.put(31, "MethodDeclaration"); + map.put(32, "MethodInvocation"); + map.put(33, "NullLiteral"); + map.put(34, "NumberLiteral"); + map.put(35, "PackageDeclaration"); + map.put(36, "ParenthesizedExpression"); + map.put(37, "PostfixExpression"); + map.put(38, "PrefixExpression"); + map.put(39, "PrimitiveType"); + map.put(40, "QualifiedName"); + map.put(41, "ReturnStatement"); + map.put(42, "SimpleName"); + map.put(43, "SimpleType"); + map.put(44, "SingleVariableDeclaration"); + map.put(45, "StringLiteral"); + map.put(46, "SuperConstructorInvocation"); + map.put(47, "SuperFieldAccess"); + map.put(48, "SuperMethodInvocation"); + map.put(49, "SwitchCase"); + map.put(50, "SwitchStatement"); + map.put(51, "SynchronizedStatement"); + map.put(52, "ThisExpression"); + map.put(53, "ThrowStatement"); + map.put(54, "TryStatement"); + map.put(55, "TypeDeclaration"); + map.put(56, "TypeDeclarationStatement"); + map.put(57, "TypeLiteral"); + map.put(58, "VariableDeclarationExpression"); + map.put(59, "VariableDeclarationFragment"); + map.put(60, "VariableDeclarationStatement"); + map.put(61, "WhileStatement"); + map.put(62, "InstanceofExpression"); + map.put(63, "LineComment"); + map.put(64, "BlockComment"); + map.put(65, "TagElement"); + map.put(66, "TextElement"); + map.put(67, "MemberRef"); + map.put(68, "MethodRef"); + map.put(69, "MethodRefParameter"); + map.put(70, "EnhancedForStatement"); + map.put(71, "EnumDeclaration"); + map.put(72, "EnumConstantDeclaration"); + map.put(73, "TypeParameter"); + map.put(74, "ParameterizedType"); + map.put(75, "QualifiedType"); + map.put(76, "WildcardType"); + map.put(77, "NormalAnnotation"); + map.put(78, "MarkerAnnotation"); + map.put(79, "SingleMemberAnnotation"); + map.put(80, "MemberValuePair"); + map.put(81, "AnnotationTypeDeclaration"); + map.put(82, "AnnotationTypeMemberDeclaration"); + map.put(83, "Modifier"); + map.put(84, "UnionType"); + map.put(85, "Dimension"); + map.put(86, "LambdaExpression"); + map.put(87, "IntersectionType"); + map.put(88, "NameQualifiedType"); + map.put(89, "CreationReference"); + map.put(90, "ExpressionMethodReference"); + map.put(91, "SuperMethhodReference"); + map.put(92, "TypeMethodReference"); + } +}