From 32ee70699505a8c72b987cd5189c0c4fa2d5d780 Mon Sep 17 00:00:00 2001 From: Kui LIU Date: Wed, 9 Aug 2017 18:11:01 +0200 Subject: [PATCH] Update. --- pom.xml | 11 - .../serval/FixPattern/info/FixPattern.java | 24 - .../FixPattern/info/GumTreeAnalysis.java | 163 ----- .../FixPatternMining/App/Specifications.txt | 47 -- .../serval/FixPatternMining/App/Step1.java | 35 - .../serval/FixPatternMining/App/Step10.java | 33 - .../serval/FixPatternMining/App/Step11.java | 37 - .../serval/FixPatternMining/App/Step12.java | 40 -- .../serval/FixPatternMining/App/Step13.java | 165 ----- .../serval/FixPatternMining/App/Step14.java | 225 ------ .../serval/FixPatternMining/App/Step2.java | 25 - .../serval/FixPatternMining/App/Step3.java | 24 - .../serval/FixPatternMining/App/Step4.java | 25 - .../serval/FixPatternMining/App/Step5.java | 24 - .../serval/FixPatternMining/App/Step6.java | 23 - .../serval/FixPatternMining/App/Step7.java | 47 -- .../serval/FixPatternMining/App/Step9.java | 33 - .../uni/serval/FixPatternMining/Cluster.java | 45 -- .../FixPatternMining/ClusterAnalyser.java | 146 ---- .../FixPatternMining/ClusterResults.java | 35 - .../FixPatternMining/CommonPatterns.java | 86 --- .../DataPrepare/DataPreparation.java | 661 ------------------ .../DataPrepare/MaxSizeSelector.java | 66 -- .../FixPatternMining/FeatureLearner.java | 121 ---- .../FixPatternMining/TokenEmbedder.java | 65 -- .../serval/FixPatternParser/Tokenizer.java | 2 + .../CommitPatchSingleStatementParser.java | 5 +- .../violations/FixedViolationHunkParser.java | 185 +++-- .../violations/FixedViolationParser.java | 56 +- .../FixedViolationSingleStatementParser.java | 20 +- .../violations/TestHunkParser.java | 269 +++++++ .../violations/Violation.java | 95 +++ .../MultipleThreadsParser/AkkaParser.java | 66 +- .../ParseFixPatternActor.java | 18 +- .../ParseFixPatternWorker.java | 78 ++- .../App => bugLocalization}/Step8.java | 3 +- .../lu/uni/serval/config/Configuration.java | 8 +- .../uni/serval/diffentry/DiffEntryHunk.java | 5 + .../regroup/HierarchicalActionSet.java | 2 +- .../regroup/HierarchicalRegrouper.java | 5 +- .../gumtree/regroup/HunkActionFilter.java | 500 ++++++++++--- .../serval/gumtree/regroup/SimplifyTree.java | 16 +- .../edu/lu/uni/serval/violation/Alarm.java | 29 +- .../lu/uni/serval/violation/Violation.java | 6 + .../serval/violation/parse/AlarmsReader.java | 38 +- .../violation/parse/TestViolationParser.java | 2 +- .../violation/parse/ViolationParser.java | 138 +++- 47 files changed, 1242 insertions(+), 2510 deletions(-) delete mode 100644 src/main/java/edu/lu/uni/serval/FixPattern/info/FixPattern.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPattern/info/GumTreeAnalysis.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Specifications.txt delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step1.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step10.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step11.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step12.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step13.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step14.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step2.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step3.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step4.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step5.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step6.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step7.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step9.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/Cluster.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterAnalyser.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterResults.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/CommonPatterns.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/DataPreparation.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/MaxSizeSelector.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/FeatureLearner.java delete mode 100644 src/main/java/edu/lu/uni/serval/FixPatternMining/TokenEmbedder.java create mode 100644 src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java create mode 100644 src/main/java/edu/lu/uni/serval/FixPatternParser/violations/Violation.java rename src/main/java/edu/lu/uni/serval/{FixPatternMining/App => bugLocalization}/Step8.java (89%) diff --git a/pom.xml b/pom.xml index 412a205..6cb33af 100644 --- a/pom.xml +++ b/pom.xml @@ -16,17 +16,6 @@ - edu.lu.uni diff --git a/src/main/java/edu/lu/uni/serval/FixPattern/info/FixPattern.java b/src/main/java/edu/lu/uni/serval/FixPattern/info/FixPattern.java deleted file mode 100644 index 2e9ae88..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPattern/info/FixPattern.java +++ /dev/null @@ -1,24 +0,0 @@ -package edu.lu.uni.serval.FixPattern.info; - -import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; -import edu.lu.uni.serval.gumtree.regroup.SimpleTree; - -public class FixPattern { - private SimpleTree buggyCodeTree; // it will be used to compute the similarity. - private HierarchicalActionSet editScripts; // it will be used to generate new patches. - - public SimpleTree getBuggyCodeTree() { - return buggyCodeTree; - } - - public HierarchicalActionSet getEditScripts() { - return editScripts; - } - - public FixPattern(SimpleTree buggyCodeTree, HierarchicalActionSet editScripts) { - super(); - this.buggyCodeTree = buggyCodeTree; - this.editScripts = editScripts; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPattern/info/GumTreeAnalysis.java b/src/main/java/edu/lu/uni/serval/FixPattern/info/GumTreeAnalysis.java deleted file mode 100644 index 30eaea5..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPattern/info/GumTreeAnalysis.java +++ /dev/null @@ -1,163 +0,0 @@ -package edu.lu.uni.serval.FixPattern.info; - -//import java.io.File; -//import java.io.IOException; -//import java.util.ArrayList; -//import java.util.HashMap; -//import java.util.List; -//import java.util.Map; - -import org.eclipse.jdt.core.dom.ASTParser; -//import org.slf4j.Logger; -//import org.slf4j.LoggerFactory; - -//import com.github.gumtreediff.actions.ActionGenerator; -//import com.github.gumtreediff.actions.model.Action; -//import com.github.gumtreediff.gen.jdt.JdtTreeGenerator; -//import com.github.gumtreediff.gen.jdt.cd.CdJdtTreeGenerator; -//import com.github.gumtreediff.matchers.Matcher; -//import com.github.gumtreediff.matchers.Matchers; -import com.github.gumtreediff.tree.ITree; -import com.github.gumtreediff.tree.TreeContext; - -import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap; -import edu.lu.uni.serval.gen.jdt.exp.ExpJdtTreeGenerator; - -@Deprecated -public class GumTreeAnalysis { - - -// private static void analyzeBugFixes(String gitRepoPath, String outputPath) { -// log.info("Repo: " + gitRepoPath); -// -// GitTraveller gitTraveller = new GitTraveller(gitRepoPath, outputPath); -// gitTraveller.travelGitRepo(); -//// Map> commitFiles = gitTraveller.getCommitFiles(); -// -// List allDiffEntries = gitTraveller.getAllDiffEntries(); -// String previousFilesPath = gitTraveller.getPreviousFilesPath(); -// String revisedFilesPath = gitTraveller.getRevisedFilesPath(); -// for (MyDiffEntry diff : allDiffEntries) { -// String fileA = previousFilesPath + diff.getPrevFile(); -// String fileB = revisedFilesPath + diff.getRevFile(); -// List gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB); -// if (gumTreeResults.size() == 0) { -// continue; -// } -// StringBuilder builder = new StringBuilder(); -// builder.append("Previous File: " + fileA + "\n"); -// builder.append("Revised File: " + fileB + "\n"); -// String diffs = ""; -// for (ModifiedDetails md : diff.getModifiedDetails()) { -// diffs += md.getLineNumber() + "\n"; -// diffs += md.getFragment() + "\n"; -// } -// builder.append("DiffEntry: " + diffs); -// for (String gumTreeResult : gumTreeResults) { -// builder.append(gumTreeResult.toString() + "\n"); -// } -// FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + diff.getRevFile().replace(".java", ".txt"), builder, false); -// } -// -//// DiffEntryParser diffEntryParser = new DiffEntryParser(allDiffEntries); -//// diffEntryParser.parseDiffEntries(); -//// -//// // : String ==> revisedFileName. -//// Map> parsedDiffEntries = diffEntryParser.getParsedDiffEntries(); -//// diffEntryParser = null; -//// allDiffEntries = null; -//// -//// gitTraveller = null; -//// -//// for (Map.Entry> entry : parsedDiffEntries.entrySet()) { -//// String revisedFileName = entry.getKey(); -//// String fileA = previousFilesPath + "prev_" + revisedFileName; -//// String fileB = revisedFilesPath + revisedFileName; -//// System.err.println("FileName" + fileA); -//// List gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB); -//// StringBuilder builder = new StringBuilder(); -//// builder.append("Previous File: " + fileA + "\n"); -//// builder.append("Revised File: " + fileB + "\n"); -//// builder.append("DiffEntry: "); -//// for (String gumTreeResult : gumTreeResults) { -//// builder.append(gumTreeResult + "\n"); -//// } -//// FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + revisedFileName.replace(".java", ".txt"), builder, false); -//// } -// -// } -// -// public static List compareTwoFilesWithGumTree(String prevFile, String revFile) { -// List gumTreeResults = new ArrayList(); -// -// try { -//// TreeContext tc1 = new ExpJdtTreeGenerator().generateFromFile(prevFile); -//// TreeContext tc2 = new ExpJdtTreeGenerator().generateFromFile(revFile); -//// TreeContext tc1 = new JdtTreeGenerator().generateFromFile(prevFile); -//// TreeContext tc2 = new JdtTreeGenerator().generateFromFile(revFile); -// TreeContext tc1 = new RowTokenJdtTreeGenerator().generateFromFile(prevFile); -// TreeContext tc2 = new RowTokenJdtTreeGenerator().generateFromFile(revFile); -//// TreeContext tc1 = new CdJdtTreeGenerator().generateFromFile(prevFile); -//// TreeContext tc2 = new CdJdtTreeGenerator().generateFromFile(revFile); -// ITree t1 = tc1.getRoot(); -// ITree t2 = tc2.getRoot(); -// -// Matcher m = Matchers.getInstance().getMatcher(t1, t2); -// m.match(); -// -// ActionGenerator ag = new ActionGenerator(t1, t2, m.getMappings()); -// ag.generate(); -// -// List actions = ag.getActions(); -// for(Action ac : actions){ -// String actionStr = parseAction(ac.toString()); -// gumTreeResults.add(actionStr); -// } -// -// } catch (IOException e) { -// e.printStackTrace(); -// } -// return gumTreeResults; -// } - -// private static String parseAction(String actStr) { -// // UPD 25@@!a from !a to isTrue(a) at 69 -// String[] actStrArrays = actStr.split("@@"); -// actStr = ""; -// int length = actStrArrays.length; -// for (int i = 0; i < length - 1; i++) { -// String actStrFrag = actStrArrays[i]; -// int index = actStrFrag.lastIndexOf(" ") + 1; -// String nodeType = actStrFrag.substring(index); -// String backup = nodeType; -// try { -// nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType)); -// } catch (NumberFormatException e) { -// nodeType = backup; -// log.info(actStr); -// } -// actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@"; -// actStr += actStrFrag; -// } -// actStr += actStrArrays[length - 1]; -// return actStr; -// } - - private static String parseAction(String actStr) { - // UPD 25@@!a from !a to isTrue(a) at 69 - String[] actStrArrays = actStr.split("@@"); - actStr = ""; - int length = actStrArrays.length; - for (int i =0; i < length - 1; i ++) { - String actStrFrag = actStrArrays[i]; - int index = actStrFrag.lastIndexOf(" ") + 1; - String nodeType = actStrFrag.substring(index); - nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType)); - actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@"; - actStr += actStrFrag; - } - actStr += actStrArrays[length - 1]; - return actStr; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Specifications.txt b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Specifications.txt deleted file mode 100644 index c434cb6..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Specifications.txt +++ /dev/null @@ -1,47 +0,0 @@ -Step 1: - Prepare data for tokens embedding of edit scripts. - Input data: parsed results of patches with GumTree. - Select token vectors of edit scripts by the value of upper whisker. - -Step 2: - Embed tokens of all selected edit scripts. - -Step 3: - Prepare data for features learning of selected edit scripts. - Vectorize edit scripts with embedded tokens of edit scripts. - -Step 4: - Learn features of all selected edit scripts with CNN algorithm. - Input data: vectorized edit scripts. - -Step 5: - Prepare data for clustering of edit scripts. - Input data: learned features of edit scripts by CNN. - -Step 6: - Clustering of edit scripts with extracted features of edit scripts. - -Step 7: - Analyze cluster results to obtain common fix patterns. - -Step 8: - Prepare testing data for evaluation. - Parse java projects to get the token vectors of all statements. - -Step 9: - Prepare data for evaluation. - Merge token vectors of source code of training data and testing data. - -Step 10: - Prepare data for evaluation. - Embed tokens of source code vectors of training data and testing data. - -Step 11: - Prepare data for evaluation. - Vectorize data (token vectors of source code) for deep learning. - -Step 12: - Evaluation: extract features of testing data and predict their labels. - -Step 13: - \ No newline at end of file diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step1.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step1.java deleted file mode 100644 index e41cd3d..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step1.java +++ /dev/null @@ -1,35 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Prepare data for tokens embedding of edit scripts. - * - * Input data: parsed results of patches with GumTree. - * - * @author kui.liu - * - */ -public class Step1 { - public static void main(String[] args) { - String editScriptsFile = Configuration.EDITSCRIPTS_FILE; - String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE; - String buggyTokensFile = Configuration.BUGGY_CODE_TOKENS_FILE; - String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE; - FileHelper.deleteFile(editScriptsFile); - FileHelper.deleteFile(patchesSourceCodeFile); - FileHelper.deleteFile(buggyTokensFile); - FileHelper.deleteFile(editScriptSizesFile); - - String selectedEditScripts = Configuration.SELECTED_EDITSCRIPTES_FILE; - String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE; - String selectedBuggyTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE; - FileHelper.deleteFile(selectedEditScripts); - FileHelper.deleteFile(selectedPatches); - FileHelper.deleteFile(selectedBuggyTokens); - - DataPreparation.prepareDataForTokenEmbedding(); - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step10.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step10.java deleted file mode 100644 index a6b4132..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step10.java +++ /dev/null @@ -1,33 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import edu.lu.uni.serval.FixPatternMining.TokenEmbedder; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Prepare data for evaluation. - * - * Embed tokens of source code vectors of training data and testing data. - * - * @author kui.liu - * - */ -public class Step10 { - - public static void main(String[] args) { - boolean isSupervisedLearning = true; - if (isSupervisedLearning) {// supervised learning - String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2; - FileHelper.deleteFile(outputFileName); - // Data pre-processing - TokenEmbedder embedder2 = new TokenEmbedder(); - embedder2.embedTokensOfSourceCodeForSupervisedTesting(); - } else { // un-supervised learning - String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1; - FileHelper.deleteFile(outputFileName); - // Data pre-processing - TokenEmbedder embedder2 = new TokenEmbedder(); - embedder2.embedTokensOfSourceCodeForUnsupervisedTesting(); - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step11.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step11.java deleted file mode 100644 index 229ed78..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step11.java +++ /dev/null @@ -1,37 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import java.util.Map; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Prepare data for evaluation. - * - * Vectorize data for deep learning. - * - * @author kui.liu - * - */ -public class Step11 { - - public static void main(String[] args) { - boolean isSupervisedLearning = true; - if (isSupervisedLearning) {// supervised learning - String trainingDataPath = Configuration.TRAINING_DATA; - FileHelper.deleteFile(trainingDataPath); - String testingDataPath = Configuration.TESTING_DATA; - FileHelper.deleteDirectory(testingDataPath); - - Map commonClustersMappingLabel = DataPreparation.readCommonCLusters(); - DataPreparation.prepareDataForFeatureLearningOfEvaluation2(commonClustersMappingLabel); - } else { // un-supervised learning - String outputData = Configuration.VECTORIED_ALL_SOURCE_CODE1; - FileHelper.deleteFile(outputData); - // Before embedding tokens. - // List files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list"); - DataPreparation.prepareDataForFeatureLearningOfEvaluation1(); - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step12.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step12.java deleted file mode 100644 index 7abe95e..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step12.java +++ /dev/null @@ -1,40 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import java.io.File; -import java.util.List; - -import edu.lu.uni.serval.FixPatternMining.FeatureLearner; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Evaluation: extract features of testing data and predict their labels. - * - * @author kui.liu - * - */ -public class Step12 { - - public static void main(String[] args) { - boolean isSupervisedLearning = true; - if (isSupervisedLearning) {// supervised learning - List testingDataFiles = FileHelper.getAllFilesInCurrentDiectory(Configuration.TESTING_DATA, ".csv"); - for (int i = 0, size = testingDataFiles.size(); i < size; i ++) { - if (i == 0) { - // TODO: we can test this model by our clustered resutls. - FeatureLearner learner2 = new FeatureLearner(); - learner2.learnFeaturesOfSourceCode2(testingDataFiles.get(i)); - } else { - FeatureLearner learner2 = new FeatureLearner(); - learner2.learnFeaturesOfSourceCode3(testingDataFiles.get(i)); - } - } - } else { // un-supervised learning - - FeatureLearner learner2 = new FeatureLearner(); - learner2.learnFeaturesOfSourceCode(); - // Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING; - // Compute the similarity: cosin similarity - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step13.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step13.java deleted file mode 100644 index 61b997e..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step13.java +++ /dev/null @@ -1,165 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Scanner; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Separate features of training data cluster by cluster. - * - * Classify testing data by possibilities: 90%, 80%, 70%, and 60%, ignore others. - * - * @author kui.liu - * - */ -public class Step13 { - - public static void main(String[] args) { - boolean isSupervisedLearning = true; - if (isSupervisedLearning) {// supervised learning - // features of training data - - DataPreparation.separateTrainingDataFeatures(); - - String positionFiles = Configuration.TEST_POSITION_FILE; - String featureFiles = Configuration.FEATURES_OF_TESTING_DATA; - String labelFiles = Configuration.PREDICTED_RESULTS_OF_TESTING_DATA; - String possibilitiesFilePath = Configuration.POSSIBILITIES_OF_TESTING_DATA; - List possibilitiesFiles = FileHelper.getAllFilesInCurrentDiectory(possibilitiesFilePath, ".csv"); - - String bugs90 = Configuration.TESTING_DATA_BUGS90; - String bugs80 = Configuration.TESTING_DATA_BUGS80; - String bugs70 = Configuration.TESTING_DATA_BUGS70; - String bugs60 = Configuration.TESTING_DATA_BUGS60; - - StringBuilder builder90 = new StringBuilder(); - int counter90 = 0; - StringBuilder builder80 = new StringBuilder(); - int counter80 = 0; - StringBuilder builder70 = new StringBuilder(); - int counter70 = 0; - StringBuilder builder60 = new StringBuilder(); - int counter60 = 0; - for (File possibilitiesFile : possibilitiesFiles) { - String fileName = possibilitiesFile.getName(); - String positionFile = positionFiles + "Positions" + fileName.substring(fileName.lastIndexOf("_"), fileName.lastIndexOf(".")) + ".list"; - String featureFile = featureFiles + fileName; - String labelFile = labelFiles + fileName; - - List possibilities = readData(fileName); - List positions = readData(positionFile); - List features = readData(featureFile); - List labels = readLabel(labelFile); - for (int index = 0, size = possibilities.size(); index < size; index ++) { - String possibilityStr = possibilities.get(index); - String[] array = possibilityStr.split(", "); - int label = labels.get(index); - double possibility = Double.parseDouble(array[label]); - - String position = positions.get(index); - String feature = features.get(index); - // And Label - if (possibility >= 0.9) { - builder90.append("LABEL:" + label + "Feature:" + feature + "Position:" + position + "\n"); - counter90 ++; - if (counter90 % 1000 == 0) { - FileHelper.outputToFile(bugs90, builder90, true); - builder90.setLength(0); - } - } else if (possibility >= 0.8) { - builder80.append("LABEL:" + label + "Feature:" + feature + "Position:" + position + "\n"); - counter80 ++; - if (counter80 % 1000 == 0) { - FileHelper.outputToFile(bugs80, builder80, true); - builder80.setLength(0); - } - } else if (possibility >= 0.7) { - builder70.append("LABEL:" + label + "Feature:" + feature + "Position:" + position + "\n"); - counter70 ++; - if (counter70 % 1000 == 0) { - FileHelper.outputToFile(bugs70, builder70, true); - builder70.setLength(0); - } - } else if (possibility >= 0.6) { - builder60.append("LABEL:" + label + "Feature:" + feature + "Position:" + position + "\n"); - counter60 ++; - if (counter60 % 1000 == 0) { - FileHelper.outputToFile(bugs60, builder60, true); - builder60.setLength(0); - } - } - } - } - FileHelper.outputToFile(bugs90, builder90, true); - FileHelper.outputToFile(bugs80, builder80, true); - FileHelper.outputToFile(bugs70, builder70, true); - FileHelper.outputToFile(bugs60, builder60, true); - // label: clusterNum, re-compute similarity with each element. 90, 80, 70, 60. - // similarity: patches --> fixing bug. - } else { // un-supervised learning - - // Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING; - // Compute the similarity: cosin similarity - } - } - - private static List readLabel(String labelFile) { - List labels = new ArrayList<>(); - String fileContent = FileHelper.readFile(labelFile); - BufferedReader reader = null; - try { - reader = new BufferedReader(new StringReader(fileContent)); - String line = null; - while ((line = reader.readLine()) != null) { - String[] labelsStr = line.split(", "); - for (int i = 0, length = labelsStr.length; i < length; i ++) { - Double d = Double.parseDouble(labelsStr[i]); - labels.add(d.intValue()); - } - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return labels; - } - - private static List readData(String positionFile) { - List positions = new ArrayList<>(); - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(positionFile); - scanner = new Scanner(fis); - while (scanner.hasNextLine()) { - positions.add(scanner.nextLine()); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return null; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step14.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step14.java deleted file mode 100644 index e2e6713..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step14.java +++ /dev/null @@ -1,225 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Scanner; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; -import edu.lu.uni.serval.utils.ListSorter; -import edu.lu.uni.serval.utils.MapSorter; - -/** - * Compute similarities for each potential bug instance by computing the similarities with all instances in related cluster. - * - * List top 5 most similar instances? - * - * @author kui.liu - * - */ -public class Step14 { - - public static void main(String[] args) { - String featuresOfTrainingDataPath = Configuration.FEATURES_OF_COMMON_CLUSTERS; - List featuresOfTrainingDataFiles = FileHelper.getAllFiles(featuresOfTrainingDataPath, ".csv");// TODO: type - Map> features = readFeaturesOfTrainingData(featuresOfTrainingDataFiles); - Map> patches = readPatchesOfTraingData(Configuration.CLUSTERED_PATCHES_FILE); - Map labelMapClusterNum = DataPreparation.readLabelMapClusterNum(); - - // potential bugs' information - String bugs90 = Configuration.TESTING_DATA_BUGS90; - String bugs80 = Configuration.TESTING_DATA_BUGS80; - String bugs70 = Configuration.TESTING_DATA_BUGS70; - String bugs60 = Configuration.TESTING_DATA_BUGS60; - List bugsList = new ArrayList<>(); - bugsList.add(bugs90); - bugsList.add(bugs80); - bugsList.add(bugs70); - bugsList.add(bugs60); - - for (String bugs : bugsList) { - List bugsInfo = readData(bugs); - String filePath = bugs.substring(0, bugs.lastIndexOf(".")) + "/"; - StringBuilder builder = new StringBuilder(); - for (String singleBugInfo : bugsInfo) { - String[] infoArray = singleBugInfo.split(":"); - String label = infoArray[0];// TODO - String feature = infoArray[1]; // TODO - String position = infoArray[2]; // TODO - - int labelInt = Integer.parseInt(label); - int clusterNum = labelMapClusterNum.get(labelInt); - Map mostSimilarIndex = computeSimilarities(feature, features.get(clusterNum)); - List patchesIndex = new ArrayList<>(); - if (mostSimilarIndex.size() > 0) { - for (Map.Entry entry : mostSimilarIndex.entrySet()) { - patchesIndex.add(entry.getValue()); - } - - String bug = "BUG####" + position + "\n"; - String patchesStr = readPatches(patchesIndex, patches.get(Integer.parseInt(label))); - // output: bug + patchesStr; - builder.append(bug).append(patchesStr); - } - } - FileHelper.outputToFile(filePath + "patches.list", builder, false); - } - - } - - private static Map> readPatchesOfTraingData(String clusteredPatchesFile) { - Map> map = new HashMap<>(); - List files = FileHelper.getAllFiles(clusteredPatchesFile, ".list"); - for (File file : files) { - String fileName = file.getName(); - int clusterNum = Integer.parseInt(fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf("."))); - List patches = readPatches(file); - map.put(clusterNum, patches); - } - return map; - } - - private static List readPatches(File file) { - List patches = new ArrayList<>(); - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(file); - scanner = new Scanner(fis); - String singlePatch = ""; - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if (Configuration.PATCH_SIGNAL.equals(line)) { - if (!"".equals(singlePatch)) { - patches.add(singlePatch); - singlePatch = ""; - } - } - singlePatch += line + "\n"; - } - patches.add(singlePatch); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return patches; - } - - private static String readPatches(List patchesIndex, List patchesTrainingData) { - String patches = ""; - for(Integer index : patchesIndex) { - patches += patchesTrainingData.get(index) + "\n"; - } - return patches; - } - - private static Map computeSimilarities(String feature, List trainingFeatures) { - Map mostSimilarIndex = new HashMap<>(); - List similarities = new ArrayList<>(); - for (int i = 0; i < 10; i ++) similarities.add(0.0); - - for (int index = 0, size = trainingFeatures.size(); index < size; index ++) { - String trainingFeature = trainingFeatures.get(index); - double similarity = computeSimilarity(feature, trainingFeature); - double aborted = addToSimilarityies(similarity, similarities); - if (aborted > 0.0) { - mostSimilarIndex.put(similarity, index); - if (aborted == 0.1) mostSimilarIndex.remove(aborted); - } - } - - if (mostSimilarIndex.size() > 0) { - MapSorter sorter = new MapSorter<>(); - mostSimilarIndex = sorter.sortByKeyDescending(mostSimilarIndex); - } - return mostSimilarIndex; - } - - private static double addToSimilarityies(double similarity, List similarities) { - double lastSimilarity = similarities.get(9); - if (similarity >= 0.8 && similarity > lastSimilarity) { // TODO : 9 ? - similarities.set(9, similarity); - ListSorter sorter = new ListSorter(similarities); - similarities = sorter.sortDescending(); - return lastSimilarity == 0.0 ? 0.1 : lastSimilarity; - } - return 0.0; - } - - private static double computeSimilarity(String feature, String trainingFeature) { - // TODO Auto-generated method stub - return 0; - } - - private static Map> readFeaturesOfTrainingData(List featureFiles) { - Map> features = new HashMap<>(); - for (File file : featureFiles) { - String fileName = file.getName(); - String label = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".")); - int clusterNum = Integer.parseInt(label); - List featuresList = readData(file.getPath()); - features.put(clusterNum, featuresList); - } - return features; - } - - private static List readData(String positionFile) { - List positions = new ArrayList<>(); - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(positionFile); - scanner = new Scanner(fis); - while (scanner.hasNextLine()) { - positions.add(scanner.nextLine()); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return null; - } - - public static void quickSort(List arr){ - qsort(arr, 0, arr.size() - 1); - } - private static void qsort(List arr, int low, int high){ - if (low < high){ - int pivot=partition(arr, low, high); //将数组分为两部分 - qsort(arr, low, pivot-1); //递归排序左子数组 - qsort(arr, pivot+1, high); //递归排序右子数组 - } - } - private static int partition(List arr, int low, int high){ - double pivot = arr.get(low); //枢轴记录 - while (low=pivot) --high; - arr.set(low, arr.get(high)); //交换比枢轴小的记录到左端 - while (low clusterResults = analyser.getClusterResults(); - - // Common patterns. - CommonPatterns commonPatterns = new CommonPatterns(); // Metrics TODO - // : - Map commonClustersMappingLabel = commonPatterns.identifyCommonPatterns(clusterResults); - String clusterMappingLabel = "Label : ClusterNum\n"; - for (Map.Entry entry : commonClustersMappingLabel.entrySet()) { - clusterMappingLabel += entry.getValue() + " : " + entry.getKey() + "\n"; - } - FileHelper.outputToFile(Configuration.CLUSTERNUMBER_LABEL_MAP, clusterMappingLabel, false); - - int totalNumberOfTrainingData = commonPatterns.getTotalNumberofTrainingData(); - FileHelper.outputToFile(Configuration.NUMBER_OF_TRAINING_DATA, "" + totalNumberOfTrainingData, false); - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step9.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step9.java deleted file mode 100644 index 474e9ca..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step9.java +++ /dev/null @@ -1,33 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.App; - -import java.util.Map; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Prepare data for evaluation. - * - * Merge token vectors of source code of training data and testing data. - * - * @author kui.liu - * - */ -public class Step9 { - - public static void main(String[] args) { - boolean isSupervisedLearning = true; - if (isSupervisedLearning) {// supervised learning - Map commonClustersMappingLabel = DataPreparation.readCommonCLusters(); - - String outputFile = Configuration.EMBEDDING_DATA_TOKENS2; - FileHelper.deleteFile(outputFile); - // Data merge - DataPreparation.prepareTokensForEvaluation2(commonClustersMappingLabel); - } else { // un-supervised learning - // Data merge - DataPreparation.prepareTokensForEvaluation1(); - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/Cluster.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/Cluster.java deleted file mode 100644 index 55beafa..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/Cluster.java +++ /dev/null @@ -1,45 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import edu.lu.uni.serval.Clusters.XMeansCluster; -import edu.lu.uni.serval.config.Configuration; -import weka.core.EuclideanDistance; - -/** - * Cluster features with X-means clustering algorithm. - * - * @author kui.liu - * - */ -public class Cluster { - - public void cluster() { - String arffFile = Configuration.CLUSTER_INPUT; - String clusterResults = Configuration.CLUSTER_OUTPUT; - - XMeansCluster cluster = new XMeansCluster(); - try { - /* - * The below 5 parameters have default values. - */ - cluster.setDistanceF(new EuclideanDistance()); - cluster.setUseKDTree(true); - cluster.setMaxNumberOfIterations(1000); - // The below 2 parameters are recommended to be the same. - cluster.setMaxKMeans(200); - cluster.setMaxKMeansForChildren(200); - - /* - * The values of the below 3 parameters should be set by developers. - */ - cluster.setSeed(200); - cluster.setMaxNumClusters(100); - cluster.setMinNumClusters(1); - - // X-means clustering is beginning. - cluster.cluster(arffFile, clusterResults); - // X-means clustering is finished. - } catch (Exception e) { - e.printStackTrace(); - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterAnalyser.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterAnalyser.java deleted file mode 100644 index bd9061b..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterAnalyser.java +++ /dev/null @@ -1,146 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Scanner; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; - -public class ClusterAnalyser { - - private List clusterResults; // each element is a cluster number. - - public void readClusterResutls() { - clusterResults = DataPreparation.readClusterResults(); - } - - public void clusterBuggyCodeTokens() { - String selectedTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE; - String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE; - - FileInputStream fis = null; - Scanner scanner = null; - - Map builderMap = new HashMap<>(); - Map countersMap = new HashMap<>(); - try { - fis = new FileInputStream(selectedTokens); - scanner = new Scanner(fis); - int index = 0; - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - int clusterNum = clusterResults.get(index); - StringBuilder builder = getBuilder(builderMap, clusterNum); - builder.append(line).append("\n"); - int counter = getCounter(countersMap, clusterNum); - if (counter % 1000 == 0) { - FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true); - builder.setLength(0); - builderMap.put(clusterNum, builder); - } - index ++; - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - for (Map.Entry entry : builderMap.entrySet()) { - int clusterNum = entry.getKey(); - StringBuilder builder = entry.getValue(); - FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true); - builder.setLength(0); - } - } - - public void clusterPatchSourceCode() { - String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE; - String clusteredPatches = Configuration.CLUSTERED_PATCHES_FILE; - - FileInputStream fis = null; - Scanner scanner = null; - - Map builderMap = new HashMap<>(); - Map countersMap = new HashMap<>(); - try { - fis = new FileInputStream(selectedPatches); - scanner = new Scanner(fis); - String singlePatch = ""; - int index = -1; - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if ("".equals(line)) continue; - if ("PATCH###".equals(line)) { - if (!"".equals(singlePatch)) { - int clusterNum = clusterResults.get(index); - StringBuilder builder = getBuilder(builderMap, clusterNum); - builder.append(singlePatch); - int counter = getCounter(countersMap, clusterNum); - if (counter % 1000 == 0) { - FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true); - builder.setLength(0); - builderMap.put(clusterNum, builder); - } - } - singlePatch = ""; - index ++; - } - singlePatch += line + "\n"; - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - for (Map.Entry entry : builderMap.entrySet()) { - int clusterNum = entry.getKey(); - StringBuilder builder = entry.getValue(); - FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true); - builder.setLength(0); - } - } - - private int getCounter(Map countersMap, int clusterNum) { - int counter = 1; - if (countersMap.containsKey(clusterNum)) { - counter += countersMap.get(clusterNum); - } - countersMap.put(clusterNum, counter); - return counter; - } - - private StringBuilder getBuilder(Map builderMap, int clusterNum) { - if (builderMap.containsKey(clusterNum)) { - return builderMap.get(clusterNum); - } else { - StringBuilder builder = new StringBuilder(); - builderMap.put(clusterNum, builder); - return builder; - } - } - - public List getClusterResults() { - return clusterResults; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterResults.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterResults.java deleted file mode 100644 index 481c89b..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/ClusterResults.java +++ /dev/null @@ -1,35 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import edu.lu.uni.serval.utils.FileHelper; - -public class ClusterResults { - - /** - * Read the cluster results from the file of cluster results. - * - * @param clusterResultsFile, the file of cluster results. - * @return List, each integer is a cluster number. - * @throws IOException - */ - public static List readClusterResults(File clusterResultsFile) throws IOException { - List clusterResultsList = new ArrayList<>(); - String clusterResults = FileHelper.readFile(clusterResultsFile); - BufferedReader reader = new BufferedReader(new StringReader(clusterResults)); - - String line = null; - while ((line = reader.readLine()) != null) { - int cluster = Integer.parseInt(line); - clusterResultsList.add(cluster); - } - - reader.close(); - return clusterResultsList; - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/CommonPatterns.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/CommonPatterns.java deleted file mode 100644 index 3df6d3b..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/CommonPatterns.java +++ /dev/null @@ -1,86 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.utils.FileHelper; -import edu.lu.uni.serval.utils.MapSorter; - -public class CommonPatterns { - - private static final int LEAST_NUMBER = 100; - private int totalNumberofTrainingData = 0; - - public Map identifyCommonPatterns(List clusterResults) { - Map> clusterMap = DataPreparation.readClusterResult(clusterResults); - // TODO how to select the common patterns, number or ratio? - List commonClusterNum = getCommonClustersByNumber(clusterMap); // Integer: clusterNum. - - Map clusterNumMapLabel = new HashMap<>(); // - for (int i = 0, size = commonClusterNum.size(); i < size; i ++) { - clusterNumMapLabel.put(commonClusterNum.get(i), i); - } - - return clusterNumMapLabel; - } - - private List getCommonClustersByNumber(Map> clusterMap) { - List commonClusterNum = new ArrayList<>(); - String numbersMapStr = "";// numbers of instances in each common cluster. - - for (Map.Entry> entry : clusterMap.entrySet()) { - List elements = entry.getValue(); - int size = elements.size(); - if (size >= LEAST_NUMBER) { // TODO how to set this threshold? - int key = entry.getKey(); - commonClusterNum.add(key); - totalNumberofTrainingData += size; - numbersMapStr += key + ":" + size + "\n"; - } - } - - FileHelper.outputToFile(Configuration.COMMON_CLUSTERS_SIZES, numbersMapStr, false); - - return commonClusterNum; - } - - private List getCommonClustersByRatio(Map> clusterMap, List clusterResults) { - List commonClusterNum = new ArrayList<>(); - - double sizes = (double) clusterResults.size(); - Map ratios = new HashMap<>(); - for (Map.Entry> entry : clusterMap.entrySet()) { - List elements = entry.getValue(); - ratios.put(entry.getKey(), (double) elements.size() / sizes); - } - - String numbersMapStr = "";// numbers of instances in each common cluster. - - MapSorter sorter = new MapSorter(); - ratios = sorter.sortByValueDescending(ratios); - double counterRatio = 0.0; - for (Map.Entry entry : ratios.entrySet()) { - counterRatio += entry.getValue(); - int key = entry.getKey(); - commonClusterNum.add(key); - numbersMapStr += key + ":" + clusterMap.get(key).size() + "\n"; - totalNumberofTrainingData += clusterMap.get(entry.getKey()).size(); - if (counterRatio >= 0.8) { // TODO: how to set the value of this threshold? - break; - } - } - - FileHelper.outputToFile(Configuration.COMMON_CLUSTERS_SIZES, numbersMapStr, false); - - return commonClusterNum; - } - - public int getTotalNumberofTrainingData() { - return totalNumberofTrainingData; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/DataPreparation.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/DataPreparation.java deleted file mode 100644 index 7eac160..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/DataPreparation.java +++ /dev/null @@ -1,661 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.DataPrepare; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Scanner; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.MaxSizeSelector.MaxSizeType; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.data.DataPreparer; -import edu.lu.uni.serval.utils.FileHelper; - -/** - * Prepare data for fix patterns mining and evaluation. - * - * @author kui.liu - * - */ -public class DataPreparation { - - /** - * Prepare data for token embedding in the process of fix patterns mining. - */ - public static void prepareDataForTokenEmbedding() { - // Collect all data into one file. - String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE_PATH; - String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH; - String buggyTokensFilePath = Configuration.BUGGYTREE_FILE_PATH; - String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH; - - String editScriptsFile = Configuration.EDITSCRIPT_SIZES_FILE; - String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE; - String buggyTokensFile = Configuration.BUGGY_CODE_TOKENS_FILE; - String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE; - File file = new File(editScriptsFilePath); - File[] subFiles = file.listFiles(); - - // Merge results of parsed patches. - for (File subFile : subFiles) { - String fileName = subFile.getName(); // edistScripts file - String id = fileName.substring(fileName.lastIndexOf("_")); - FileHelper.outputToFile(editScriptsFile, FileHelper.readFile(subFile), true); - String patchesSourceCode = patchesSourceCodeFilePath + "patches" + id; - FileHelper.outputToFile(patchesSourceCodeFile, FileHelper.readFile(patchesSourceCode), true); - String sizes = editScriptSizesFile + "sizes" + id; - FileHelper.outputToFile(editScriptSizesFilePath, FileHelper.readFile(sizes), true); - String buggyTokens = buggyTokensFilePath + "tokens" + id; - FileHelper.outputToFile(buggyTokensFile, FileHelper.readFile(buggyTokens), true); - } - - - // Select data by the size of edit script vectors. - List sizesList; - try { - sizesList = MaxSizeSelector.readSizes(editScriptSizesFile); - int maxSize = MaxSizeSelector.selectMaxSize(MaxSizeType.ThirdQuartile, sizesList); - List outlierIndexes = new ArrayList<>(); - for (int i = 0, size = sizesList.size(); i < size; i ++) { - if (sizesList.get(i) > maxSize) { - outlierIndexes.add(i); - } - } - FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS, "" + maxSize, false); - - selectData(editScriptsFile, outlierIndexes, Configuration.SELECTED_EDITSCRIPTES_FILE); - selectData(patchesSourceCodeFile, outlierIndexes, Configuration.PATCH_SIGNAL, Configuration.SELECTED_PATCHES_SOURE_CODE_FILE); - int maxTokenVectorSize = selectDataOfSourceCodeTokens(buggyTokensFile, outlierIndexes, Configuration.SELECTED_BUGGY_TOKEN_FILE); - FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE, "" + maxTokenVectorSize, false); - } catch (IOException e) { - e.printStackTrace(); - } - } - - private static void selectData(String intputFile, List outlierIndexList, String outputFile) { - List outlierIndexes = new ArrayList<>(); - outlierIndexes.addAll(outlierIndexList); - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(intputFile); - scanner = new Scanner(fis); - int index = 0; - StringBuilder builder = new StringBuilder(); - int counter = 0; - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if (outlierIndexes.contains(index)) { - outlierIndexes.remove(new Integer(index)); - } else { - builder.append(line + "\n"); - if (++ counter % 100000 == 0) { - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - } - index ++; - } - - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - if (scanner != null) { - scanner.close(); - scanner = null; - } - if (fis != null) { - fis.close(); - fis = null; - } - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - private static void selectData(String inputFile, List outlierIndexes, String startingSignal, String outputFile) { - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(inputFile); - scanner = new Scanner(fis); - int index = -1; - StringBuilder builder = new StringBuilder(); - int counter = 0; - String singleEntity = ""; - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if (line.equals(startingSignal)) { - if (!"".equals(singleEntity)) { - if (outlierIndexes.contains(index)) { - outlierIndexes.remove(new Integer(index)); - } else { - builder.append(singleEntity + "\n"); - if (++ counter % 100000 == 0) { - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - } - singleEntity = ""; - } - index ++; - } - singleEntity += line + "\n"; - } - - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - if (scanner != null) { - scanner.close(); - scanner = null; - } - if (fis != null) { - fis.close(); - fis = null; - } - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - private static int selectDataOfSourceCodeTokens(String inputFile, List outlierIndexList, String outputFile) { - List outlierIndexes = new ArrayList<>(); - outlierIndexes.addAll(outlierIndexList); - FileInputStream fis = null; - Scanner scanner = null; - int size = 0; - try { - fis = new FileInputStream(inputFile); - scanner = new Scanner(fis); - int index = 0; - StringBuilder builder = new StringBuilder(); - int counter = 0; - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if (outlierIndexes.contains(index)) { - outlierIndexes.remove(new Integer(index)); - } else { - builder.append(line + "\n"); - if (++ counter % 100000 == 0) { - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - String[] tokens = line.split(" "); - if (tokens.length > size) size = tokens.length; - } - index ++; - } - - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - if (scanner != null) { - scanner.close(); - scanner = null; - } - if (fis != null) { - fis.close(); - fis = null; - } - } catch (IOException e) { - e.printStackTrace(); - } - } - - return size; - } - - /** - * Prepare data for feature learning. - */ - public static void prepareDataForFeatureLearning() { - String zeroVector = ""; - for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1 - 1; i < length; i ++) { - zeroVector += "0, "; - } - zeroVector += "0"; - int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim()); - - String embeddedTokensFile = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS; - Map embeddedTokens = readEmbeddedTokens(embeddedTokensFile); - - String editScriptsFile = Configuration.SELECTED_EDITSCRIPTES_FILE; - String outputFile = Configuration.VECTORIED_EDIT_SCRIPTS; - dataPrepare(editScriptsFile, maxSize, outputFile, embeddedTokens, zeroVector); - } - - private static Map readEmbeddedTokens(String embeddedTokensFile) { - Map embeddedTokens = new HashMap<>(); - File file = new File(embeddedTokensFile); - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(file); - scanner = new Scanner(fis); - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - int firstBlankIndex = line.indexOf(" "); - String token = line.substring(0, firstBlankIndex); - String value = line.substring(firstBlankIndex + 1).replaceAll(" ", ", "); - embeddedTokens.put(token, value); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - return embeddedTokens; - } - - private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map embeddedTokens, String zeroVector) { - File file = new File(inputFile); - FileInputStream fis = null; - Scanner scanner = null; - StringBuilder builder = new StringBuilder(); - int counter = 0; - - try { - fis = new FileInputStream(file); - scanner = new Scanner(fis); - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector); - builder.append(vectorStr); - if (++ counter % 10000 == 0) { - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - - private static StringBuilder convertToVector(Map embeddedTokens, String line, int maxSize, String zeroVector) { - String[] tokens = line.split(" "); - StringBuilder vectorStr = new StringBuilder(); - int length = tokens.length; - if (length == maxSize) { - for (int i = 0; i < length - 1; i ++) { - String token = tokens[i]; - vectorStr.append(embeddedTokens.get(token) + ", "); - } - vectorStr.append(embeddedTokens.get(tokens[length - 1]) + "\n"); - } else { - for (int i = 0; i < length; i ++) { - String token = tokens[i]; - vectorStr.append(embeddedTokens.get(token) + ", "); - } - for (int i = length; i < maxSize - 1; i ++) { - vectorStr.append(zeroVector + ", "); - } - vectorStr.append(zeroVector + "\n"); - } - - return vectorStr; - } - - /** - * Prepare data for clustering. - */ - public static void prepareDataForClustering() { - String featureFile = Configuration.EXTRACTED_FEATURES + "vectorizedEditScripts.csv"; - String arffFile = Configuration.CLUSTER_INPUT; - DataPreparer.prepareData(featureFile, arffFile); - } - - /** - * Read cluster results. - */ - public static List readClusterResults() { - List clusterResults = new ArrayList<>(); - String clusterResultsFile = Configuration.CLUSTER_OUTPUT; - String results = FileHelper.readFile(clusterResultsFile); - BufferedReader reader = null; - try { - reader = new BufferedReader(new StringReader(results)); - String line = null; - while ((line = reader.readLine()) != null) { - clusterResults.add(Integer.parseInt(line)); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return clusterResults; - } - - public static Map> readClusterResult(List clusterResults) { - Map> clusters = new HashMap<>(); - - for (int i = 0, size = clusterResults.size(); i < size; i ++) { - int clusterNo = clusterResults.get(i); - if (clusters.containsKey(clusterNo)) { - clusters.get(clusterNo).add(i + 1); - } else { - List newCLuster = new ArrayList<>(); - newCLuster.add(i + 1); - clusters.put(clusterNo, newCLuster); - } - } - - return clusters; - } - - /** - * Data for un-supervised learning. - */ - public static void prepareTokensForEvaluation1() { - String outputFile = Configuration.EMBEDDING_DATA_TOKENS1; - FileHelper.outputToFile(outputFile, FileHelper.readFile(Configuration.SELECTED_BUGGY_TOKEN_FILE), false); - List files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list"); - for (File file : files) { - FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true); - } - } - - public static void prepareDataForFeatureLearningOfEvaluation1() { - String zeroVector = ""; - for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) { - zeroVector += "0, "; - } - zeroVector += "0"; - int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE)); - - String allEmbeddedTokens = Configuration.EMBEDDED_ALL_TOKENS1; - Map embeddedTokens = readEmbeddedTokens(allEmbeddedTokens); - - // Testing data - String clusteredTokens = Configuration.TEST_DATA_FILE; - List files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list"); - for (File file : files) { - - } - String allTokensOfSourceCode = Configuration.EMBEDDING_DATA_TOKENS1; // TODO testing data should be separated. - dataPrepare(allTokensOfSourceCode, maxSize, Configuration.VECTORIED_ALL_SOURCE_CODE1, embeddedTokens, zeroVector); - } - - /** - * Data for supervised learning. - */ - public static void prepareTokensForEvaluation2(Map commonClustersMappingLabel) { - String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE; - String outputFile = Configuration.EMBEDDING_DATA_TOKENS2; - - List files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list"); - for (File file : files) { - String fileName = file.getName(); - String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list")); - int clusterNum = Integer.parseInt(clusterNumStr); - if (commonClustersMappingLabel.containsKey(clusterNum)) { - String content = FileHelper.readFile(file); - FileHelper.outputToFile(outputFile, content, true); - } - } - files.clear(); - files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list"); - for (File file : files) { - FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true); - } - } - - public static void prepareDataForFeatureLearningOfEvaluation2(Map commonClustersMappingLabel) { - String zeroVector = ""; - for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) { - zeroVector += "0, "; - } - zeroVector += "0"; - - String allEmbeddedTokensOfEvaluation = Configuration.EMBEDDED_ALL_TOKENS2; - Map embeddedTokens = readEmbeddedTokens(allEmbeddedTokensOfEvaluation); - - int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE)); - // Training data - String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE; - List files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list"); - for (File file : files) { - String fileName = file.getName(); - String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list")); - int clusterNum = Integer.parseInt(clusterNumStr); - if (commonClustersMappingLabel.containsKey(clusterNum)) { - dataPrepare(file.getPath(), maxSize, Configuration.TRAINING_DATA, embeddedTokens, zeroVector, clusterNum); - } - } - // Testing data - files.clear(); - String testingData = Configuration.TEST_DATA_FILE; - files = FileHelper.getAllFilesInCurrentDiectory(testingData, ".list"); - String testingDataPath = Configuration.TESTING_DATA; - for (File file : files) { - String fileName = file.getName(); - fileName.replace(".list", ".csv"); - dataPrepare(file.getPath(), maxSize, testingDataPath + fileName, embeddedTokens, zeroVector, 0); - } - } - - private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map embeddedTokens, - String zeroVector, int clusterNum) { - FileInputStream fis = null; - Scanner scanner = null; - StringBuilder builder = new StringBuilder(); - int counter = 0; - - try { - fis = new FileInputStream(inputFile); - scanner = new Scanner(fis); - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector, clusterNum); - builder.append(vectorStr); - if (++ counter % 10000 == 0) { - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - FileHelper.outputToFile(outputFile, builder, true); - builder.setLength(0); - } - - private static StringBuilder convertToVector(Map embeddedTokens, String line, int maxSize, String zeroVector, int clusterNum) { - String[] tokens = line.split(" "); - StringBuilder vectorStr = new StringBuilder(); - int length = tokens.length; - if (length == maxSize) { - for (int i = 0; i < length; i ++) { - String token = tokens[i]; - vectorStr.append(embeddedTokens.get(token) + ", "); - } - } else { - for (int i = 0; i < length; i ++) { - String token = tokens[i]; - vectorStr.append(embeddedTokens.get(token) + ", "); - } - for (int i = length; i < maxSize; i ++) { - vectorStr.append(zeroVector + ", "); - } - } - - vectorStr.append(clusterNum + "\n"); - - return vectorStr; - } - - public static Map readCommonCLusters() { - Map commonClustersMappingLabel = new HashMap<>(); - String commonClusters = FileHelper.readFile(Configuration.CLUSTERNUMBER_LABEL_MAP); - BufferedReader reader = null; - try { - reader = new BufferedReader(new StringReader(commonClusters)); - String line = reader.readLine(); - while ((line = reader.readLine()) != null) { - String[] strArray = line.split(" : "); - int key = Integer.parseInt(strArray[1]); - int value = Integer.parseInt(strArray[0]); - commonClustersMappingLabel.put(key, value); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return commonClustersMappingLabel; - } - - public static void separateTrainingDataFeatures() { - String trainingDataFeatures = Configuration.FEATURES_OF_TRAINING_DATA; - List featureFiles = FileHelper.getAllFilesInCurrentDiectory(trainingDataFeatures, ".csv"); - File featureFile = featureFiles.get(0); - // File featureFile = new File(Configuration.FEATURES_OF_TRAINING_DATA + ""); - - Map numbersMap = readNumberOfInstances(); // : - Map orders = new HashMap<>(); // : - Map fileNames = new HashMap<>(); - String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE; - List files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list"); - int order = 1; - for (File file : files) { - String fileName = file.getName(); - String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list")); - int clusterNum = Integer.parseInt(clusterNumStr); - if (numbersMap.containsKey(clusterNum)) { - orders.put(order, clusterNum); - fileNames.put(order, fileName); - order ++; - } - } - - String featuresOfClusterPath = Configuration.FEATURES_OF_COMMON_CLUSTERS; - order = 1; - FileInputStream fis = null; - Scanner scanner = null; - try { - fis = new FileInputStream(featureFile); - scanner = new Scanner(fis); - int counter = 0; - StringBuilder features = new StringBuilder(); - while (scanner.hasNextLine()) { - features.append(scanner.nextLine() + "\n"); - counter ++; - if (counter == numbersMap.get(orders.get(order))) { - FileHelper.outputToFile(featuresOfClusterPath + fileNames.get(order), features, false); - features.setLength(0); - counter = 0; - order ++; - } - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } finally { - try { - scanner.close(); - fis.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - private static Map readNumberOfInstances() { - Map numbersMap = new HashMap<>(); - String fileContent = FileHelper.readFile(Configuration.COMMON_CLUSTERS_SIZES); - BufferedReader reader = null; - try { - reader = new BufferedReader(new StringReader(fileContent)); - String line = null; - while ((line = reader.readLine()) != null) { - String[] numbers = line.split(":"); - numbersMap.put(Integer.parseInt(numbers[0]), Integer.parseInt(numbers[1])); - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return numbersMap; - } - - public static Map readLabelMapClusterNum() { - Map labelMapClusterNumMap = new HashMap<>(); - String fileContent = FileHelper.readFile(Configuration.CLUSTERNUMBER_LABEL_MAP); - BufferedReader reader = null; - reader = new BufferedReader(new StringReader(fileContent)); - String line = null; - try { - while ((line = reader.readLine()) != null) { - String[] labelMapClusterNum = line.split(":"); - labelMapClusterNumMap.put(Integer.parseInt(labelMapClusterNum[0]), Integer.parseInt(labelMapClusterNum[1])); - } - } catch (IOException e) { - e.printStackTrace(); - }finally { - try { - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - return null; - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/MaxSizeSelector.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/MaxSizeSelector.java deleted file mode 100644 index 1724dde..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/DataPrepare/MaxSizeSelector.java +++ /dev/null @@ -1,66 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining.DataPrepare; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import edu.lu.uni.serval.utils.FileHelper; -import edu.lu.uni.serval.utils.ListSorter; - -public class MaxSizeSelector { - - public enum MaxSizeType { - UpperWhisker, ThirdQuartile - } - - public static List readSizes(String sizeFilePath) throws IOException { - List sizes = new ArrayList<>(); - String sizesStr = FileHelper.readFile(sizeFilePath); - BufferedReader br = new BufferedReader(new StringReader(sizesStr)); - String line = null; - - while ((line = br.readLine()) != null) { - sizes.add(Integer.parseInt(line.trim())); - } - - return sizes; - } - - public static int selectMaxSize(MaxSizeType maxSizeType, List sizesDistribution) { - int maxSize = 0; - switch (maxSizeType) { - case UpperWhisker: - maxSize = upperWhisker(sizesDistribution); - break; - case ThirdQuartile: - maxSize = thirdQuarter(sizesDistribution); - break; - } - return maxSize; - } - - private static int upperWhisker(List sizesDistribution) { - List sizes = new ArrayList<>(); - sizes.addAll(sizesDistribution); - ListSorter sorter = new ListSorter(sizes); - sizesDistribution = sorter.sortAscending(); - int firstQuarterIndex = sizesDistribution.size() * 25 / 100; - int firstQuarter = sizesDistribution.get(firstQuarterIndex); - int thirdQuarterIndex = sizesDistribution.size() * 75 / 100; - int thirdQuarter = sizesDistribution.get(thirdQuarterIndex); - int upperWhisker = thirdQuarter + (int) (1.5 * (thirdQuarter - firstQuarter)); - return upperWhisker; - } - - private static int thirdQuarter(List sizesDistribution) { - List sizes = new ArrayList<>(); - sizes.addAll(sizesDistribution); - ListSorter sorter = new ListSorter(sizes); - sizesDistribution = sorter.sortAscending(); - int thirdQuarterIndex = sizesDistribution.size() * 75 / 100; - int thirdQuarter = sizesDistribution.get(thirdQuarterIndex); - return thirdQuarter; - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/FeatureLearner.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/FeatureLearner.java deleted file mode 100644 index 6d717ec..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/FeatureLearner.java +++ /dev/null @@ -1,121 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; - -import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation; -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.deeplearner.CNNFeatureExtractor2; -import edu.lu.uni.serval.deeplearner.CNNSupervisedLearning; -import edu.lu.uni.serval.utils.FileHelper; - -public class FeatureLearner { - - /** - * Learn features of edit scripts for fix patterns mining. - */ - public void learnFeatures() { - String editScriptsVectorFile = Configuration.VECTORIED_EDIT_SCRIPTS; // input - int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim()); - int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1; - int batchSize = 1000; - int sizeOfFeatureVector = 200; - - try { - CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(editScriptsVectorFile), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfFeatureVector); - learner.setNumberOfEpochs(20); - learner.setSeed(123); - learner.setNumOfOutOfLayer1(20); - learner.setNumOfOutOfLayer2(50); - learner.setOutputPath(Configuration.EXTRACTED_FEATURES); - - learner.extracteFeaturesWithCNN(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - public void learnFeaturesOfSourceCode() { - int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE)); - int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2; - int batchSize = 1000; - int sizeOfExtractedFeatureVector = 200; - - try { - CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(Configuration.VECTORIED_ALL_SOURCE_CODE1), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector); - learner.setNumberOfEpochs(20); - learner.setSeed(123); - learner.setNumOfOutOfLayer1(20); - learner.setNumOfOutOfLayer2(50); - learner.setOutputPath(Configuration.EXTRACTED_FEATURES_EVALUATION); - - learner.extracteFeaturesWithCNN(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - /** - * Supervised learning. - */ - public void learnFeaturesOfSourceCode2(File testingData) { - int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE)); - int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2; - int batchSize = 1000; - int sizeOfExtractedFeatureVector = 200; - - try { - int clusterNum = DataPreparation.readCommonCLusters().size(); - File trainingData = new File(Configuration.TRAINING_DATA); - CNNSupervisedLearning learner = new CNNSupervisedLearning(trainingData, sizeOfVector, - sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector, clusterNum, testingData); - learner.setNumberOfEpochs(20); - learner.setSeed(123); - learner.setNumOfOutOfLayer1(20); - learner.setNumOfOutOfLayer2(50); - learner.setOutputPath(Configuration.FEATURES_OF_TRAINING_DATA); - learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA); - learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA); - learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA); - learner.setModelFile(Configuration.SUPERVISED_LEARNING_MODEL); - learner.extracteFeaturesWithCNN(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - /** - * Supervised learning by loading a model. - */ - public void learnFeaturesOfSourceCode3(File testingData) { - int batchSize = 1000; - - try { - String modelFile = Configuration.SUPERVISED_LEARNING_MODEL; - CNNSupervisedLearning learner = new CNNSupervisedLearning(batchSize, testingData, modelFile); - learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA); - learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA); - learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA); - learner.extracteFeaturesWithCNNByLoadingModel(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/TokenEmbedder.java b/src/main/java/edu/lu/uni/serval/FixPatternMining/TokenEmbedder.java deleted file mode 100644 index b980ded..0000000 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/TokenEmbedder.java +++ /dev/null @@ -1,65 +0,0 @@ -package edu.lu.uni.serval.FixPatternMining; - -import java.io.File; -import java.io.IOException; - -import edu.lu.uni.serval.config.Configuration; -import edu.lu.uni.serval.deeplearner.Word2VecEncoder; - -/** - * Encode tokens of edit scripts with Word2Vec. - * - * @author kui.liu - * - */ -public class TokenEmbedder { - - /** - * Embed tokens for fix patterns mining. - */ - public void embedTokensOfEditScripts() { - Word2VecEncoder encoder = new Word2VecEncoder(); - int windowSize = 2; - encoder.setWindowSize(windowSize); - try { - File inputFile = new File(Configuration.SELECTED_EDITSCRIPTES_FILE); - int minWordFrequency = 1; - int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1; - String outputFileName = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS; - encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName); - } catch (IOException e) { - e.printStackTrace(); - } - } - - public void embedTokensOfSourceCodeForSupervisedTesting() { - Word2VecEncoder encoder = new Word2VecEncoder(); - int windowSize = 2; - encoder.setWindowSize(windowSize); - try { - File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS2); - int minWordFrequency = 1; - int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2; - String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2; - encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName); - } catch (IOException e) { - e.printStackTrace(); - } - } - - public void embedTokensOfSourceCodeForUnsupervisedTesting() { - Word2VecEncoder encoder = new Word2VecEncoder(); - int windowSize = 2; - encoder.setWindowSize(windowSize); - try { - File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS1); - int minWordFrequency = 1; - int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2; - String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1; - encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName); - } catch (IOException e) { - e.printStackTrace(); - } - } - -} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/Tokenizer.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/Tokenizer.java index 87d6911..daa978a 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/Tokenizer.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/Tokenizer.java @@ -26,6 +26,8 @@ public class Tokenizer { tokens += astNodeType + " charLiteral "; } else if ("ArrayInitializer".equals(astNodeType)) { tokens += astNodeType + " arrayInitializer "; + } else if ("LambdaExpression".equals(astNodeType)) { + tokens += astNodeType + " lambda "; } else { tokens += astNodeType + " " + simpleTree.getLabel() + " "; } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/patch/CommitPatchSingleStatementParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/patch/CommitPatchSingleStatementParser.java index 795d628..a4595e2 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/patch/CommitPatchSingleStatementParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/patch/CommitPatchSingleStatementParser.java @@ -61,8 +61,7 @@ public class CommitPatchSingleStatementParser extends CommitPatchParser { startPosition = firstAndLastMov.get(0).getNode().getPos(); ITree lastTree = firstAndLastMov.get(1).getNode(); endPosition = lastTree.getPos() + lastTree.getLength(); - } else { // Ignore the pure insert actions without any move - // actions. + } else { // Ignore the pure insert actions without any move actions. continue; } } else if (actionStr.startsWith("UPD")) { @@ -124,7 +123,7 @@ public class CommitPatchSingleStatementParser extends CommitPatchParser { // 1. First level: AST node type. String astEditScripts = getASTEditScripts(actionSet); int size = astEditScripts.split(" ").length; - if (size == 1) { + if (size < 2) { // System.out.println(actionSet); continue; } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java index b497088..18dbbd5 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationHunkParser.java @@ -1,12 +1,8 @@ package edu.lu.uni.serval.FixPatternParser.violations; -import java.io.BufferedReader; import java.io.File; -import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.List; -import java.util.Map; import edu.lu.uni.serval.FixPatternParser.Tokenizer; import edu.lu.uni.serval.config.Configuration; @@ -14,75 +10,106 @@ import edu.lu.uni.serval.diffentry.DiffEntryHunk; import edu.lu.uni.serval.diffentry.DiffEntryReader; import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; import edu.lu.uni.serval.gumtree.regroup.HunkActionFilter; -import edu.lu.uni.serval.gumtree.regroup.HunkFixPattern; import edu.lu.uni.serval.gumtree.regroup.SimpleTree; import edu.lu.uni.serval.gumtree.regroup.SimplifyTree; -import edu.lu.uni.serval.utils.MapSorter; +import edu.lu.uni.serval.utils.ListSorter; /** - * Parse fix violations with GumTree. + * Parse fix violations with GumTree in terms of multiple statements. * * @author kui.liu * */ public class FixedViolationHunkParser extends FixedViolationParser { + int counter; @Override public void parseFixPatterns(File prevFile, File revFile, File diffentryFile) { - - // GumTree results - List actionSets = parseChangedSourceCodeWithGumTree(prevFile, revFile); + // GumTree results + // TODO remove the modification of variable names or not? + List actionSets = parseChangedSourceCodeWithGumTree2(prevFile, revFile); // only remove non-statement source code, eg. method declaration if (actionSets.size() > 0) { - Map positions = readPositions(); - if (positions.size() > 1) { - MapSorter sorter = new MapSorter<>(); - positions = sorter.sortByKeyAscending(positions); + List violations = readPositionsAndAlarmTypes(); + if (violations.size() > 1) { + ListSorter sorter = new ListSorter<>(violations); + violations = sorter.sortAscending(); } - List diffentryHunks1 = new DiffEntryReader().readHunks(diffentryFile); - int index = 0; - int hunkListSize = diffentryHunks1.size(); -// Map diffentryHunks = new HashMap<>(); + List diffentryHunks1 = new DiffEntryReader().readHunks2(diffentryFile); // Select hunks by positions of violations. - List diffentryHunks = new ArrayList<>(); - for (Map.Entry entry : positions.entrySet()) { - int startRange = entry.getKey(); - int endRange = entry.getValue(); - for (; index < hunkListSize; index ++) { + for (Violation violation : violations) { + int startLineNum = violation.getStartLineNum(); + int endLineNum = violation.getEndLineNum(); + for (int index = 0, hunkListSize = diffentryHunks1.size(); index < hunkListSize; index ++) { DiffEntryHunk hunk = diffentryHunks1.get(index); int startLine = hunk.getBugLineStartNum(); int range = hunk.getBugRange(); - if (startRange > startLine + range) continue; - if (endRange < startLine) break; - // startRange and endRange -// diffentryHunks.put(startRange, hunk); - diffentryHunks.add(hunk); + if (startLineNum > startLine + range) continue; + if (endLineNum < startLine) break; + + if (violation.getBugStartLineNum() == 0) { + violation.setBugStartLineNum(startLine); + violation.setFixStartLineNum(hunk.getFixLineStartNum()); + } + violation.setBugEndLineNum(startLine + range); + violation.setFixEndLineNum(hunk.getFixLineStartNum() + hunk.getFixRange()); + violation.getHunks().add(hunk); } } //Filter out the modify actions, which are not in the DiffEntry hunks. HunkActionFilter hunkFilter = new HunkActionFilter(); - List allHunkFixPatterns = hunkFilter.filterActionsByDiffEntryHunk2(diffentryHunks, actionSets, revFile, prevFile); + List selectedViolations = hunkFilter.filterActionsByModifiedRange2(violations, actionSets, revFile, prevFile); - for (HunkFixPattern hunkFixPattern : allHunkFixPatterns) { + for (Violation violation : selectedViolations) { + List hunkActionSets = violation.getActionSets(); + // Remove overlapped UPD and INS + List addActions = new ArrayList<>(); + List insertActions = new ArrayList<>(); + for (HierarchicalActionSet hunkActionSet : hunkActionSets) { + if (hunkActionSet.getActionString().startsWith("INS")) insertActions.add(hunkActionSet); + if (hunkActionSet.getActionString().startsWith("UPD")) addActions.add(hunkActionSet); + } + List selectedActionSets = new ArrayList<>(); + for (HierarchicalActionSet hunkActionSet : hunkActionSets) { + if (insertActions.contains(hunkActionSet)) { + boolean isIntersection = false; + int bugStartL1 = hunkActionSet.getBugStartLineNum(); + int bugEndL1 = hunkActionSet.getBugEndLineNum(); + for (HierarchicalActionSet addAction : addActions) { + int bugStartL = addAction.getBugStartLineNum(); + int bugEndL = addAction.getBugEndLineNum(); + if (bugEndL1 < bugStartL || bugStartL1 > bugEndL) { + continue; + } + isIntersection = true; + break; + } + if (!isIntersection) { + selectedActionSets.add(hunkActionSet); + } + continue; + } + selectedActionSets.add(hunkActionSet); + } + // Range of buggy source code - int startLine = 0; - int endLine = 0; + int bugStartLine = 0; + int bugEndLine = 0; // Range of fixing source code - int startLine2 = 0; - int endLine2 = 0; + int fixStartLine = 0; + int fixEndLine = 0; /* * Convert the ITree of buggy code to a simple tree. * It will be used to compute the similarity. */ - List hunkActionSets = hunkFixPattern.getHunkActionSets(); SimpleTree simpleTree = new SimpleTree(); simpleTree.setLabel("Block"); simpleTree.setNodeType("Block"); List children = new ArrayList<>(); String astEditScripts = ""; - for (HierarchicalActionSet hunkActionSet : hunkActionSets) { + for (HierarchicalActionSet hunkActionSet : selectedActionSets) { SimplifyTree abstractIdentifier = new SimplifyTree(); abstractIdentifier.abstractTree(hunkActionSet); SimpleTree simpleT = hunkActionSet.getSimpleTree(); @@ -101,33 +128,41 @@ public class FixedViolationHunkParser extends FixedViolationParser { // 3. abstract identifiers: // 4. semi-source code: - if (startLine == 0) { - startLine = hunkActionSet.getBugStartLineNum(); - endLine = hunkActionSet.getBugEndLineNum(); - startLine2 = hunkActionSet.getFixStartLineNum(); - endLine2 = hunkActionSet.getFixEndLineNum(); - } else { - if (startLine > hunkActionSet.getBugStartLineNum()) startLine = hunkActionSet.getBugStartLineNum(); - if (startLine2 > hunkActionSet.getFixStartLineNum()) startLine2 = hunkActionSet.getFixStartLineNum(); - if (endLine < hunkActionSet.getBugEndLineNum()) endLine = hunkActionSet.getBugEndLineNum(); - if (endLine2 < hunkActionSet.getFixEndLineNum()) endLine2 = hunkActionSet.getFixEndLineNum(); + int actionBugStart = hunkActionSet.getBugStartLineNum(); + int actionBugEnd = hunkActionSet.getBugEndLineNum(); + int actionFixStart = hunkActionSet.getFixStartLineNum(); + int actionFixEnd = hunkActionSet.getFixEndLineNum(); + if (bugStartLine == 0) { + bugStartLine = actionBugStart; } + if (fixStartLine == 0) { + fixStartLine = actionFixStart; + } + if (bugEndLine < actionBugEnd) bugEndLine = actionBugEnd; + if (fixEndLine < actionFixEnd) fixEndLine = actionFixEnd; } + if (children.size() == 0) continue; - if (endLine - startLine >= Configuration.HUNK_SIZE - 2 || endLine2 - startLine2 >= Configuration.HUNK_SIZE - 2 ) continue; + + if (bugStartLine == 0) { + bugStartLine = violation.getStartLineNum(); + if (bugEndLine < bugStartLine) bugEndLine = violation.getEndLineNum(); + } + if (bugEndLine - bugStartLine >= Configuration.HUNK_SIZE || fixEndLine - fixStartLine >= Configuration.HUNK_SIZE) continue; simpleTree.setChildren(children); simpleTree.setParent(null); // Source Code of patches. - String patchSourceCode = getPatchSourceCode(hunkFixPattern.getHunk(), startLine, endLine, startLine2, endLine2); + String patchSourceCode = getPatchSourceCode(prevFile, revFile, bugStartLine, bugEndLine, fixStartLine, fixEndLine); if ("".equals(patchSourceCode)) continue; - - this.patchesSourceCode += "PATCH###\n" + patchSourceCode + "\n"; + counter ++; + String patchPosition = "";//"###:" + counter + "\n" + revFile.getName() + "\nPosition: " + violation.getStartLineNum() + " --> " + violation.getEndLineNum() + "\n@@ -" + bugStartLine + ", " + bugEndLine + " +" + fixStartLine + ", " + fixEndLine + "@@\n"; + this.patchesSourceCode += Configuration.PATCH_SIGNAL + "\n" + patchPosition + patchSourceCode + "\n"; int size = astEditScripts.split(" ").length; this.sizes += size + "\n"; this.astEditScripts += astEditScripts + "\n"; - + this.alarmTypes += violation.getAlarmType() + "\n"; // this.buggyTrees += Configuration.BUGGY_TREE_TOKEN + "\n" + simpleTree.toString() + "\n"; this.tokensOfSourceCode += Tokenizer.getTokensDeepFirst(simpleTree).trim() + "\n"; // this.actionSets += Configuration.BUGGY_TREE_TOKEN + "\n" + readActionSet(actionSet, "") + "\n"; @@ -137,52 +172,4 @@ public class FixedViolationHunkParser extends FixedViolationParser { } } - private String getPatchSourceCode(DiffEntryHunk hunk, int startLineNum, int endLineNum, int startLineNum2, int endLineNum2) { - String sourceCode = hunk.getHunk(); - int bugStartLine = hunk.getBugLineStartNum(); - int fixStartLine = hunk.getFixLineStartNum();String buggyStatements = ""; - String fixedStatements = ""; - BufferedReader reader = null; - try { - reader = new BufferedReader(new StringReader(sourceCode)); - String line = null; - int bugLines = 0; - int fixLines = 0; - int contextLines = 0; // counter of non-buggy code line. - while ((line = reader.readLine()) != null) { - int bugLineIndex = bugLines + contextLines; - int fixLineIndex = fixLines + contextLines; - if (line.startsWith("-")) { - if (bugStartLine + bugLineIndex >= startLineNum && bugStartLine + bugLineIndex <= endLineNum) { - buggyStatements += line + "\n"; - } - bugLines ++; - } else if (line.startsWith("+")) { - if (fixStartLine + fixLineIndex >= startLineNum2 && fixStartLine + fixLineIndex <= endLineNum2) { - fixedStatements += line + "\n"; - } - fixLines ++; - } else { - contextLines ++; - } - - if (bugStartLine + bugLineIndex > endLineNum && fixStartLine + fixLineIndex > endLineNum2) { - break; - } - } - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - if (reader != null) { - reader.close(); - reader = null; - } - } catch (IOException e) { - e.printStackTrace(); - } - } - return buggyStatements + fixedStatements; - } - } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java index 56c98f9..7335de8 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationParser.java @@ -4,10 +4,15 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.StringReader; -import java.util.HashMap; -import java.util.Map; +import java.util.ArrayList; +import java.util.List; + +import com.github.gumtreediff.actions.model.Action; import edu.lu.uni.serval.FixPatternParser.Parser; +import edu.lu.uni.serval.gumtree.GumTreeComparer; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper; import edu.lu.uni.serval.utils.FileHelper; /** @@ -18,7 +23,8 @@ import edu.lu.uni.serval.utils.FileHelper; */ public class FixedViolationParser extends Parser { - File positionFile = null; + private File positionFile = null; + protected String alarmTypes = ""; public void setPositionFile(File positionFile) { this.positionFile = positionFile; @@ -28,17 +34,33 @@ public class FixedViolationParser extends Parser { public void parseFixPatterns(File prevFile, File revFile, File diffentryFile) { } - protected boolean inPositions(int startLine, int endLine, Map positions) { - for (Map.Entry entry : positions.entrySet()) { - int startPosi = entry.getKey(); - int endPosi = entry.getValue(); - if (endLine >= startPosi && startLine <= endPosi) return true; + /** + * Regroup GumTree results without remove the modification of variable names. + * + * @param prevFile + * @param revFile + * @return + */ + protected List parseChangedSourceCodeWithGumTree2(File prevFile, File revFile) { + List actionSets = new ArrayList<>(); + // GumTree results + List gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile); + if (gumTreeResults != null && gumTreeResults.size() > 0) { + // Regroup GumTre results. + List allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults); + for (HierarchicalActionSet actionSet : allActionSets) { + String astNodeType = actionSet.getAstNodeType(); + if (astNodeType.endsWith("Statement") || "FieldDeclaration".equals(astNodeType)) { + actionSets.add(actionSet); + } + } } - return false; + + return actionSets; } - protected Map readPositions() { - Map positions = new HashMap<>(); + protected List readPositionsAndAlarmTypes() { + List violations = new ArrayList<>(); String fileContent = FileHelper.readFile(positionFile); BufferedReader reader = null; reader = new BufferedReader(new StringReader(fileContent)); @@ -48,7 +70,10 @@ public class FixedViolationParser extends Parser { String[] positionStr = line.split(":"); int startLine = Integer.parseInt(positionStr[0]); int endLine = Integer.parseInt(positionStr[1]); - positions.put(startLine, endLine); + String alarmType = positionStr[2]; + + Violation violation = new Violation(startLine, endLine, alarmType); + violations.add(violation); } } catch (IOException e) { e.printStackTrace(); @@ -59,10 +84,9 @@ public class FixedViolationParser extends Parser { e.printStackTrace(); } } - return positions; + return violations; } - protected String getPatchSourceCode(File prevFile, File revFile, int startLineNum, int endLineNum, int startLineNum2, int endLineNum2) { String buggyStatements = readSourceCode(prevFile, startLineNum, endLineNum, "-"); String fixedStatements = readSourceCode(revFile, startLineNum2, endLineNum2, "+"); @@ -96,4 +120,8 @@ public class FixedViolationParser extends Parser { return sourceCode; } + public String getAlarmTypes() { + return alarmTypes; + } + } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationSingleStatementParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationSingleStatementParser.java index c4d6fa5..370c950 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationSingleStatementParser.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/FixedViolationSingleStatementParser.java @@ -2,7 +2,6 @@ package edu.lu.uni.serval.FixPatternParser.violations; import java.io.File; import java.util.List; -import java.util.Map; import org.eclipse.jdt.core.dom.CompilationUnit; @@ -19,7 +18,7 @@ import edu.lu.uni.serval.gumtree.regroup.SimpleTree; import edu.lu.uni.serval.gumtree.regroup.SimplifyTree; /** - * Parse fixed violations with GumTree. + * Parse fixed violations with GumTree in terms of single statement. * * @author kui.liu * @@ -40,7 +39,7 @@ public class FixedViolationSingleStatementParser extends FixedViolationParser { } // Read the positions of checked violations - Map positions = readPositions(); + List violations = readPositionsAndAlarmTypes(); for (HierarchicalActionSet actionSet : actionSets) { // position of buggy statements int startPosition = 0; @@ -96,9 +95,9 @@ public class FixedViolationSingleStatementParser extends FixedViolationParser { int startLine2 = revUnit.getLineNumber(startPosition2); int endLine2 = revUnit.getLineNumber(endPosition2); - if (!inPositions(startLine, endLine, positions)) { - continue; - } + Violation violation = findViolation(startLine, endLine, violations); + if (violation == null) continue; + if (endLine - startLine >= Configuration.HUNK_SIZE - 2 || endLine2 - startLine2 >= Configuration.HUNK_SIZE - 2 ) continue; /* @@ -128,6 +127,7 @@ public class FixedViolationSingleStatementParser extends FixedViolationParser { this.patchesSourceCode += Configuration.PATCH_SIGNAL + "\n" + revFile.getName() + "\n" + patchSourceCode + "\n"; this.sizes += size + "\n"; this.astEditScripts += astEditScripts + "\n"; + this.alarmTypes += violation.getAlarmType() + "\n"; // 2. source code: raw tokens // String rawTokenEditScripts = getRawTokenEditScripts(actionSet); // // 3. abstract identifiers: @@ -146,4 +146,12 @@ public class FixedViolationSingleStatementParser extends FixedViolationParser { } } + protected Violation findViolation(int startLine, int endLine, List violations) { + for (Violation violation : violations) { + int vStartLine = violation.getStartLineNum(); + int vEndLine = violation.getEndLineNum(); + if (!(startLine > vEndLine && endLine< vStartLine)) return violation; + } + return null; + } } diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java new file mode 100644 index 0000000..e759e93 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/TestHunkParser.java @@ -0,0 +1,269 @@ +package edu.lu.uni.serval.FixPatternParser.violations; + +import static java.lang.System.err; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import edu.lu.uni.serval.FixPatternParser.RunnableParser; +import edu.lu.uni.serval.MultipleThreadsParser.MessageFile; +import edu.lu.uni.serval.config.Configuration; +import edu.lu.uni.serval.utils.FileHelper; + +public class TestHunkParser { + + + public static void main(String[] args) { + // input data + final List msgFiles = getMessageFiles("GumTreeInput/"); + System.out.println(msgFiles.size()); + + // output path + final String editScriptsFilePath = "GumTreeResults/editScripts.list"; + final String patchesSourceCodeFilePath = "GumTreeResults/patchSourceCode.list"; + final String buggyTokensFilePath = "GumTreeResults/tokens.list"; + final String editScriptSizesFilePath = "GumTreeResults/editScriptSizes.list"; + final String alarmTypesFilePath = "GumTreeResults/alarmTypes.list"; + + FileHelper.deleteDirectory(editScriptsFilePath); + FileHelper.deleteDirectory(patchesSourceCodeFilePath); + FileHelper.deleteDirectory(buggyTokensFilePath); + FileHelper.deleteDirectory(editScriptSizesFilePath); + FileHelper.deleteDirectory(alarmTypesFilePath); + + StringBuilder astEditScripts = new StringBuilder(); + StringBuilder tokens = new StringBuilder(); + StringBuilder sizes = new StringBuilder(); + StringBuilder patches = new StringBuilder(); + StringBuilder alarmTypes = new StringBuilder(); + + int a = 0; + int counter = 0; + for (MessageFile msgFile : msgFiles) { + FixedViolationHunkParser parser = new FixedViolationHunkParser(); + parser.counter = counter; + parser.setPositionFile(msgFile.getPositionFile()); + + final ExecutorService executor = Executors.newSingleThreadExecutor(); + // schedule the work + final Future future = executor.submit(new RunnableParser(msgFile.getPrevFile(), + msgFile.getRevFile(), msgFile.getDiffEntryFile(), parser)); + try { + // where we wait for task to complete +// future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS); + future.get(20L, TimeUnit.SECONDS); + String editScripts = parser.getAstEditScripts(); + if (!editScripts.equals("")) { + astEditScripts.append(editScripts); + tokens.append(parser.getTokensOfSourceCode()); + sizes.append(parser.getSizes()); + patches.append(parser.getPatchesSourceCode()); + alarmTypes.append(parser.getAlarmTypes()); + counter = parser.counter; + + a ++; + if (a % 100 == 0) { + FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); + FileHelper.outputToFile(buggyTokensFilePath, tokens, true); + FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); + FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); + FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); + astEditScripts.setLength(0); + tokens.setLength(0); + sizes.setLength(0); + patches.setLength(0); + alarmTypes.setLength(0); + System.out.println("Finish of parsing " + a + " files......"); + } + } + } catch (TimeoutException e) { + err.println("task timed out"); + future.cancel(true /* mayInterruptIfRunning */ ); + } catch (InterruptedException e) { + err.println("task interrupted"); + } catch (ExecutionException e) { + err.println("task aborted"); + } finally { + executor.shutdownNow(); + } + } + + FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true); + FileHelper.outputToFile(buggyTokensFilePath, tokens, true); + FileHelper.outputToFile(editScriptSizesFilePath, sizes, true); + FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true); + FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true); + astEditScripts.setLength(0); + tokens.setLength(0); + sizes.setLength(0); + patches.setLength(0); + alarmTypes.setLength(0); + System.out.println(a); + +// classifyByAlarmTypes(); + } + + + private static List getMessageFiles(String gumTreeInput) { + String inputPath = gumTreeInput; // prevFiles revFiles diffentryFile positionsFile + File revFilesPath = new File(inputPath + "revFiles/"); + File[] revFiles = revFilesPath.listFiles(); // project folders + List msgFiles = new ArrayList<>(); + + for (File revFile : revFiles) { + if (revFile.getName().endsWith(".java")) { + String fileName = revFile.getName(); + File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file + fileName = fileName.replace(".java", ".txt"); + File diffentryFile = new File(gumTreeInput + "diffentries/" + fileName); // DiffEntry file + File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file + MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile); + msgFile.setPositionFile(positionFile); + msgFiles.add(msgFile); + } + } + + return msgFiles; + } + + public static void classifyByAlarmTypes() { + + final String alarmTypesFilePath = Configuration.ALARM_TYPES_FILE; + List alarmTypes = readStringList(alarmTypesFilePath); + //edit scripts, sizes of edit scripts, buggy tokens, patches. + classifyByAlarmTypes(alarmTypes, Configuration.EDITSCRIPT_SIZES_FILE); + classifyByAlarmTypes(alarmTypes, Configuration.EDITSCRIPTS_FILE); + classifyByAlarmTypes(alarmTypes, Configuration.BUGGY_CODE_TOKENS_FILE); + classifyByAlarmTypes2(alarmTypes, Configuration.PATCH_SOURCECODE_FILE); + } + + private static void classifyByAlarmTypes(List alarmTypes, String file) { + Map buildersMap = new HashMap<>(); + FileInputStream fis = null; + Scanner scanner = null; + try { + fis = new FileInputStream(file); + scanner = new Scanner(fis); + int counter = 0; + while (scanner.hasNextLine()) { + String alarmType = alarmTypes.get(counter); + StringBuilder builder = getBuilder(buildersMap, alarmType); + builder.append(scanner.nextLine() + "\n"); + counter ++; + if (counter % 1000 == 0) { + outputBuilders(buildersMap, file); + } + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } finally { + try { + scanner.close(); + fis.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + outputBuilders(buildersMap, file); + } + + private static void classifyByAlarmTypes2(List alarmTypes, String patchSourcecodeFile) { + Map buildersMap = new HashMap<>(); + FileInputStream fis = null; + Scanner scanner = null; + try { + fis = new FileInputStream(patchSourcecodeFile); + scanner = new Scanner(fis); + int counter = 0; + String singlePatch = ""; + while (scanner.hasNextLine()) { + String line = scanner.nextLine(); + if (Configuration.PATCH_SIGNAL.equals(line)) { + if (!"".equals(singlePatch)) { + String alarmType = alarmTypes.get(counter); + StringBuilder builder = getBuilder(buildersMap, alarmType); + builder.append(scanner.nextLine() + "\n"); + counter ++; + if (counter % 2000 == 0) { + outputBuilders(buildersMap, patchSourcecodeFile); + } + } + singlePatch = line + "\n"; + } + singlePatch += line + "\n"; + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } finally { + try { + scanner.close(); + fis.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + outputBuilders(buildersMap, patchSourcecodeFile); + } + + private static void outputBuilders(Map map, String fileNameStr) { + File file = new File(fileNameStr); + String fileName = file.getName(); + String parentPath = file.getParent(); + for (Map.Entry entry : map.entrySet()) { + String alarmType = entry.getKey(); + StringBuilder builder = entry.getValue(); + + FileHelper.outputToFile(parentPath + "/" + alarmType + "/" + fileName, builder, true); + + builder.setLength(0); + entry.setValue(builder); + } + } + + public static List readStringList(String inputFile) { + List list = new ArrayList<>(); + FileInputStream fis = null; + Scanner scanner = null; + try { + fis = new FileInputStream(inputFile); + scanner = new Scanner(fis); + while(scanner.hasNextLine()) { + list.add(scanner.nextLine()); + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } finally { + try { + scanner.close(); + fis.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + return list; + } + + private static StringBuilder getBuilder(Map buildersMap, String alarmType) { + if (buildersMap.containsKey(alarmType)) { + return buildersMap.get(alarmType); + } else { + StringBuilder builder = new StringBuilder(); + buildersMap.put(alarmType, builder); + return builder; + } + } +} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/Violation.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/Violation.java new file mode 100644 index 0000000..409af52 --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/Violation.java @@ -0,0 +1,95 @@ +package edu.lu.uni.serval.FixPatternParser.violations; + +import java.util.ArrayList; +import java.util.List; + +import edu.lu.uni.serval.diffentry.DiffEntryHunk; +import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet; + +public class Violation implements Comparable { + + private Integer startLineNum; + private int endLineNum; + private int bugStartLineNum; + private int bugEndLineNum; + private int fixStartLineNum; + private int fixEndLineNum; + private String alarmType; + private List hunks = new ArrayList<>(); + private List actionSets; + + public Violation(Integer startLineNum, int endLineNum, String alarmType) { + super(); + this.startLineNum = startLineNum; + this.endLineNum = endLineNum; + this.alarmType = alarmType; + this.actionSets = new ArrayList<>(); + } + + public Integer getStartLineNum() { + return startLineNum; + } + + public int getEndLineNum() { + return endLineNum; + } + + public int getBugStartLineNum() { + return bugStartLineNum; + } + + public void setBugStartLineNum(int bugStartLineNum) { + this.bugStartLineNum = bugStartLineNum; + } + + public int getBugEndLineNum() { + return bugEndLineNum; + } + + public void setBugEndLineNum(int bugEndLineNum) { + this.bugEndLineNum = bugEndLineNum; + } + + public int getFixStartLineNum() { + return fixStartLineNum; + } + + public void setFixStartLineNum(int fixStartLineNum) { + this.fixStartLineNum = fixStartLineNum; + } + + public int getFixEndLineNum() { + return fixEndLineNum; + } + + public void setFixEndLineNum(int fixEndLineNum) { + this.fixEndLineNum = fixEndLineNum; + } + + public List getHunks() { + return hunks; + } + + public void setHunks(List hunks) { + this.hunks = hunks; + } + + public String getAlarmType() { + return alarmType; + } + + public List getActionSets() { + return actionSets; + } + + @Override + public int compareTo(Violation v) { + return this.startLineNum.compareTo(v.startLineNum); + } + + @Override + public String toString() { + return this.startLineNum + " : " + this.endLineNum + " : " + this.alarmType; + } + +} diff --git a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser.java b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser.java index aec1eb3..a585385 100644 --- a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser.java +++ b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/AkkaParser.java @@ -12,14 +12,22 @@ import akka.actor.ActorSystem; import edu.lu.uni.serval.config.Configuration; import edu.lu.uni.serval.utils.FileHelper; +/** + * Multi-thread parser of parsing the difference between buggy code file and fixed code file. + * + * @author kui.liu + * + */ public class AkkaParser { private static Logger log = LoggerFactory.getLogger(AkkaParser.class); + @SuppressWarnings("deprecation") public static void main(String[] args) { // input data log.info("Get the input data..."); - final List msgFiles = getMessageFiles(); +// final List msgFiles = getMessageFiles(); + final List msgFiles = getMessageFiles("GumTreeInput/"); log.info("MessageFiles: " + msgFiles.size()); // output path @@ -27,27 +35,37 @@ public class AkkaParser { final String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH; final String buggyTokensFilePath = Configuration.BUGGY_CODE_TOKEN_FILE_PATH; final String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH; + final String alarmTypesFilePath = Configuration.ALARM_TYPES_FILE_PATH; FileHelper.deleteDirectory(editScriptsFilePath); FileHelper.deleteDirectory(patchesSourceCodeFilePath); FileHelper.deleteDirectory(buggyTokensFilePath); FileHelper.deleteDirectory(editScriptSizesFilePath); - + FileHelper.deleteDirectory(alarmTypesFilePath); + ActorSystem system = null; ActorRef parsingActor = null; - final int numberOfWorkers = 200; + int numberOfWorkers = 20; final WorkMessage msg = new WorkMessage(0, msgFiles); try { log.info("Akka begins..."); system = ActorSystem.create("Mining-FixPattern-System"); - parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath), "mine-fix-pattern-actor"); + parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, + patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath, alarmTypesFilePath), "mine-fix-pattern-actor"); parsingActor.tell(msg, ActorRef.noSender()); } catch (Exception e) { system.shutdown(); e.printStackTrace(); } + } + - private static List getMessageFiles() { + /** + * Get bug commit-related files. + * + * @return + */ + public static List getMessageFiles() { String inputPath = Configuration.GUM_TREE_INPUT; //DiffEntries prevFiles revFiles File inputFileDirector = new File(inputPath); File[] files = inputFileDirector.listFiles(); // project folders @@ -56,6 +74,16 @@ public class AkkaParser { for (File file : files) { if (!file.isDirectory()) continue; +// if (!(file.getName().startsWith("k") || file.getName().startsWith("l"))) continue; + if (file.getName().startsWith("a") || file.getName().startsWith("b") + || file.getName().startsWith("c") || file.getName().startsWith("d") + || file.getName().startsWith("e") || file.getName().startsWith("f") + || file.getName().startsWith("g") || file.getName().startsWith("h") + ||file.getName().startsWith("h") || file.getName().startsWith("i") + || file.getName().startsWith("k") || file.getName().startsWith("l") + || file.getName().startsWith("j") || file.getName().startsWith("t")) continue; +// if (!file.getName().startsWith("j")) continue; + log.info("Project name: " + file.getName()); String projectFolder = file.getPath(); File revFileFolder = new File(projectFolder + "/revFiles/");// revised file folder File[] revFiles = revFileFolder.listFiles(); @@ -71,4 +99,32 @@ public class AkkaParser { return msgFiles; } + + /** + * Get violation-related files. + * + * @param gumTreeInput + * @return + */ + public static List getMessageFiles(String gumTreeInput) { + String inputPath = gumTreeInput; // prevFiles revFiles diffentryFile positionsFile + File revFilesPath = new File(inputPath + "revFiles/"); + File[] revFiles = revFilesPath.listFiles(); // project folders + List msgFiles = new ArrayList<>(); + + for (File revFile : revFiles) { + if (revFile.getName().endsWith(".java")) { + String fileName = revFile.getName(); + File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file + fileName = fileName.replace(".java", ".txt"); + File diffentryFile = new File(gumTreeInput + "diffentries/" + fileName); // DiffEntry file + File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file + MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile); + msgFile.setPositionFile(positionFile); + msgFiles.add(msgFile); + } + } + + return msgFiles; + } } diff --git a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternActor.java b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternActor.java index 866e52b..3f7703a 100644 --- a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternActor.java +++ b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternActor.java @@ -20,13 +20,16 @@ public class ParseFixPatternActor extends UntypedActor { private final int numberOfWorkers; private int counter = 0; - public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) { + public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, + String buggyTokensFilePath, String editScriptSizesFilePath, String alarmTypesFilePath) { mineRouter = this.getContext().actorOf(new RoundRobinPool(numberOfWorkers) - .props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath)), "mine-fix-pattern-router"); + .props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, + buggyTokensFilePath, editScriptSizesFilePath, alarmTypesFilePath)), "mine-fix-pattern-router"); this.numberOfWorkers = numberOfWorkers; } - public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, final String buggyTokensFilePath, final String editScriptSizesFilePath) { + public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, + final String buggyTokensFilePath, final String editScriptSizesFilePath, final String alarmTypesFilePath) { return Props.create(new Creator() { @@ -34,12 +37,14 @@ public class ParseFixPatternActor extends UntypedActor { @Override public ParseFixPatternActor create() throws Exception { - return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath); + return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, + buggyTokensFilePath, editScriptSizesFilePath, alarmTypesFilePath); } }); } + @SuppressWarnings("deprecation") @Override public void onReceive(Object message) throws Exception { if (message instanceof WorkMessage) { @@ -61,7 +66,10 @@ public class ParseFixPatternActor extends UntypedActor { logger.info("Assign a task to worker #" + (i + 1) + "..."); } } else if ("STOP".equals(message.toString())) { - if (++ counter >= numberOfWorkers) { + counter ++; + logger.info(counter + " workers finished their work..."); + if (counter >= numberOfWorkers) { + logger.info("All workers finished their work..."); this.getContext().stop(mineRouter); this.getContext().stop(getSelf()); this.getContext().system().shutdown(); diff --git a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java index 33da0e9..01bbbce 100644 --- a/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java +++ b/src/main/java/edu/lu/uni/serval/MultipleThreadsParser/ParseFixPatternWorker.java @@ -17,8 +17,10 @@ import org.slf4j.LoggerFactory; import akka.actor.Props; import akka.actor.UntypedActor; import akka.japi.Creator; +import edu.lu.uni.serval.FixPatternParser.Parser; import edu.lu.uni.serval.FixPatternParser.RunnableParser; import edu.lu.uni.serval.FixPatternParser.patch.CommitPatchSingleStatementParser; +import edu.lu.uni.serval.FixPatternParser.violations.FixedViolationHunkParser; import edu.lu.uni.serval.config.Configuration; import edu.lu.uni.serval.utils.FileHelper; @@ -29,22 +31,27 @@ public class ParseFixPatternWorker extends UntypedActor { private String patchesSourceCodeFilePath; private String editScriptSizesFilePath; private String buggyTokensFilePath; + private String alarmTypesFilePath; - public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) { + public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, + String buggyTokensFilePath, String editScriptSizesFilePath, String alarmTypesFilePath) { this.editScriptsFilePath = editScriptsFilePath; this.patchesSourceCodeFilePath = patchesSourceCodeFilePath; this.editScriptSizesFilePath = editScriptSizesFilePath; this.buggyTokensFilePath = buggyTokensFilePath; + this.alarmTypesFilePath = alarmTypesFilePath; } - public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTokensFilePath, final String editScriptSizesFilePath) { + public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTokensFilePath, + final String editScriptSizesFilePath, final String alarmTypesFilePath) { return Props.create(new Creator() { private static final long serialVersionUID = -7615153844097275009L; @Override public ParseFixPatternWorker create() throws Exception { - return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, buggyTokensFilePath, editScriptSizesFilePath); + return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, + buggyTokensFilePath, editScriptSizesFilePath, alarmTypesFilePath); } }); @@ -58,16 +65,25 @@ public class ParseFixPatternWorker extends UntypedActor { StringBuilder editScripts = new StringBuilder(); StringBuilder patchesSourceCode = new StringBuilder(); StringBuilder sizes = new StringBuilder(); -// StringBuilder buggyTrees = new StringBuilder(); StringBuilder tokens = new StringBuilder(); + StringBuilder alarmTypes = new StringBuilder(); int id = msg.getId(); int counter = 0; + boolean containsAlarmTypes = false; for (MessageFile msgFile : files) { + counter ++; File revFile = msgFile.getRevFile(); File prevFile = msgFile.getPrevFile(); File diffentryFile = msgFile.getDiffEntryFile(); - CommitPatchSingleStatementParser parser = new CommitPatchSingleStatementParser(); + File positionFile = msgFile.getPositionFile(); + Parser parser = null; + if (positionFile == null) { + parser = new CommitPatchSingleStatementParser(); + } else { + parser = new FixedViolationHunkParser(); + containsAlarmTypes = true; + } final ExecutorService executor = Executors.newSingleThreadExecutor(); // schedule the work @@ -76,28 +92,33 @@ public class ParseFixPatternWorker extends UntypedActor { // wait for task to complete future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS); - editScripts.append(parser.getAstEditScripts()); - patchesSourceCode.append(parser.getPatchesSourceCode()); - sizes.append(parser.getSizes()); -// buggyTrees.append(parser.getBuggyTrees()); - tokens.append(parser.getTokensOfSourceCode()); - counter ++; - if (counter % 100 == 0) { - FileHelper.outputToFile(editScriptsFilePath + "edistScripts_" + id + ".list", editScripts, true); - FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true); - FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true); -// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true); - FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true); - editScripts.setLength(0); - patchesSourceCode.setLength(0); - sizes.setLength(0); -// buggyTrees.setLength(0); - tokens.setLength(0); - log.info("Worker #" + id +"Finish of parsing " + counter + " files..."); + String editScript = parser.getAstEditScripts(); + if (!"".equals(editScript)) { + editScripts.append(editScript); + patchesSourceCode.append(parser.getPatchesSourceCode()); + sizes.append(parser.getSizes()); + tokens.append(parser.getTokensOfSourceCode()); + if (positionFile == null) alarmTypes.append(((FixedViolationHunkParser) parser).getAlarmTypes()); + + if (counter % 100 == 0) { + FileHelper.outputToFile(editScriptsFilePath + "edistScripts_" + id + ".list", editScripts, true); + FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true); + FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true); + FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true); + editScripts.setLength(0); + patchesSourceCode.setLength(0); + sizes.setLength(0); + tokens.setLength(0); + if (containsAlarmTypes) { + FileHelper.outputToFile(alarmTypesFilePath + "alarmTypes_" + id + ".list", alarmTypes, true); + alarmTypes.setLength(0); + } + log.info("Worker #" + id +"Finish of parsing " + counter + " files..."); + } } } catch (TimeoutException e) { err.println("task timed out"); - future.cancel(true /* mayInterruptIfRunning */ ); + future.cancel(true); } catch (InterruptedException e) { err.println("task interrupted"); } catch (ExecutionException e) { @@ -111,8 +132,15 @@ public class ParseFixPatternWorker extends UntypedActor { FileHelper.outputToFile(editScriptsFilePath + "edistScripts_" + id + ".list", editScripts, true); FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true); FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true); -// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true); FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true); + editScripts.setLength(0); + patchesSourceCode.setLength(0); + sizes.setLength(0); + tokens.setLength(0); + if (containsAlarmTypes) { + FileHelper.outputToFile(alarmTypesFilePath + "alarmTypes_" + id + ".list", alarmTypes, true); + alarmTypes.setLength(0); + } } log.info("Worker #" + id +"Finish of parsing " + counter + " files..."); diff --git a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step8.java b/src/main/java/edu/lu/uni/serval/bugLocalization/Step8.java similarity index 89% rename from src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step8.java rename to src/main/java/edu/lu/uni/serval/bugLocalization/Step8.java index 4f7664d..586d6c3 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternMining/App/Step8.java +++ b/src/main/java/edu/lu/uni/serval/bugLocalization/Step8.java @@ -1,8 +1,7 @@ -package edu.lu.uni.serval.FixPatternMining.App; +package edu.lu.uni.serval.bugLocalization; import java.io.File; -import edu.lu.uni.serval.bugLocalization.ProjectScanner; import edu.lu.uni.serval.config.Configuration; import edu.lu.uni.serval.utils.FileHelper; diff --git a/src/main/java/edu/lu/uni/serval/config/Configuration.java b/src/main/java/edu/lu/uni/serval/config/Configuration.java index 2030495..e86083e 100644 --- a/src/main/java/edu/lu/uni/serval/config/Configuration.java +++ b/src/main/java/edu/lu/uni/serval/config/Configuration.java @@ -2,9 +2,9 @@ package edu.lu.uni.serval.config; public class Configuration { - public static final long SECONDS_TO_WAIT = 60L; + public static final long SECONDS_TO_WAIT = 20L; - private static final String ROOT_PATH = "../"; // The root path of all output data. + private static final String ROOT_PATH = "../FPM_Violations/"; // The root path of all output data. public static final int HUNK_SIZE = 7; // The limitation of source code lines of each DiffEntry, which will be selected as training data. public static final String BUGGY_TREE_SIGNAL = "BUGGY_TREE###"; // The starting signal of the tree of buggy source code . @@ -13,6 +13,7 @@ public class Configuration { // input path of GumTree. (i.e., Fix patterns parser) public static final String GUM_TREE_INPUT = ROOT_PATH + "GumTreeInput/";// Buggy version file VS. Fixing version file, (DiffEntry File) + // the output path of GumTree results. private static final String GUM_TREE_OUTPUT = ROOT_PATH + "GumTreeResults/"; public static final String EDITSCRIPTS_FILE_PATH = GUM_TREE_OUTPUT + "editScripts/"; @@ -20,12 +21,14 @@ public class Configuration { public static final String BUGGYTREE_FILE_PATH = GUM_TREE_OUTPUT + "buggyTrees/"; public static final String BUGGY_CODE_TOKEN_FILE_PATH = GUM_TREE_OUTPUT + "tokens/"; public static final String EDITSCRIPT_SIZES_FILE_PATH = GUM_TREE_OUTPUT + "editScriptSizes/"; + public static final String ALARM_TYPES_FILE_PATH = GUM_TREE_OUTPUT + "alarmTypes/"; public static final String EDITSCRIPTS_FILE = GUM_TREE_OUTPUT + "editScripts.list"; public static final String PATCH_SOURCECODE_FILE = GUM_TREE_OUTPUT + "patchSourceCode.list"; public static final String BUGGYTREES_FILE = GUM_TREE_OUTPUT + "buggyTrees.list"; public static final String BUGGY_CODE_TOKENS_FILE = GUM_TREE_OUTPUT + "tokens.list"; public static final String EDITSCRIPT_SIZES_FILE = GUM_TREE_OUTPUT + "editScriptSizes.list"; + public static final String ALARM_TYPES_FILE = GUM_TREE_OUTPUT + "alarmTypes.list"; public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN1 = 100; // tokens of edit scripts. public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN2 = 200; // tokens of source code @@ -40,6 +43,7 @@ public class Configuration { public static final String SELECTED_BUGGY_TREE_FILE = EMBEDDING_INPUT + "buggyTrees.list"; public static final String SELECTED_BUGGY_TOKEN_FILE = EMBEDDING_INPUT + "tokens.list"; // Selected token vectors of buggy source code. public static final String SELECTED_EDITSCRIPTES_FILE = EMBEDDING_INPUT + "editScripts.list"; // Selected edit script vectors. + public static final String SELECTED_ALARM_TYPES_FILE = EMBEDDING_INPUT + "alarmTypes.list"; // Selected edit script vectors. // the input path of feature learning. public static final String FEATURE_LEARNING_INPUT = MINING_INPUT + "FeatureLearning/"; public static final String EMBEDDED_EDIT_SCRIPT_TOKENS = FEATURE_LEARNING_INPUT + "embeddedEditScriptTokens.list"; // All embedded tokens of selected edit scripts. diff --git a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java index f0c85c2..e1497a1 100644 --- a/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java +++ b/src/main/java/edu/lu/uni/serval/diffentry/DiffEntryHunk.java @@ -40,4 +40,9 @@ public class DiffEntryHunk { this.hunk = hunk; } + @Override + public String toString() { + return "@@ -" + this.bugLineStartNum + ", " + this.bugRange + " +" + this.fixLineStartNum + ", " + this.fixRange + "\n" + this.hunk; + } + } diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java index 11b5ff9..5a55c22 100644 --- a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalActionSet.java @@ -20,7 +20,7 @@ public class HierarchicalActionSet implements Comparable private String actionString; private int startPosition; private int length; - private int bugStartLineNum; + private int bugStartLineNum = 0; private int bugEndLineNum; private int fixStartLineNum; private int fixEndLineNum; diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java index 536a233..680d50a 100644 --- a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HierarchicalRegrouper.java @@ -114,7 +114,10 @@ public class HierarchicalRegrouper { actionSet.setParent(actSet); actSet.getSubActions().add(actionSet); ListSorter sorter = new ListSorter(actSet.getSubActions()); - actSet.setSubActions(sorter.sortAscending()); + List subActions = sorter.sortAscending(); + if (subActions != null) { + actSet.setSubActions(subActions); + } break; } else { if ((!(action instanceof Insert) && !(act instanceof Insert)) diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java index ae67b75..6efe2ae 100644 --- a/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/HunkActionFilter.java @@ -1,6 +1,9 @@ package edu.lu.uni.serval.gumtree.regroup; +import java.io.BufferedReader; import java.io.File; +import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.List; @@ -11,12 +14,13 @@ import com.github.gumtreediff.actions.model.Update; import com.github.gumtreediff.tree.ITree; import edu.lu.uni.serval.FixPatternParser.CUCreator; +import edu.lu.uni.serval.FixPatternParser.violations.Violation; import edu.lu.uni.serval.diffentry.DiffEntryHunk; public class HunkActionFilter { /** - * Filter out the modify actions, which are not in the DiffEntry hunks. + * Filter out the modify actions, which are not in the DiffEntry hunks, without considering the same parent node. * * @param hunks * @param actionSets @@ -66,10 +70,10 @@ public class HunkActionFilter { endPosition2 = startPosition2 + newNode.getLength(); } } - startLine = prevUnit.getLineNumber(startPosition); - endLine = prevUnit.getLineNumber(endPosition); - startLine2 = revUnit.getLineNumber(startPosition2); - endLine2 = revUnit.getLineNumber(endPosition2); + startLine = startPosition == 0 ? 0 : prevUnit.getLineNumber(startPosition); + endLine = endPosition == 0 ? 0 : prevUnit.getLineNumber(endPosition); + startLine2 = startPosition2 == 0 ? 0 : revUnit.getLineNumber(startPosition2); + endLine2 = endPosition2 == 0 ? 0 : revUnit.getLineNumber(endPosition2); for (DiffEntryHunk hunk : hunks) { int bugStartLine = hunk.getBugLineStartNum(); @@ -84,7 +88,6 @@ public class HunkActionFilter { if (endLine2 < fixStartLine ) { uselessActions.add(actionSet); } - break; } else { if (bugStartLine + bugRange < startLine) { continue; @@ -107,7 +110,7 @@ public class HunkActionFilter { } /** - * Filter out the modify actions, which are not in the DiffEntry hunks. + * Filter out the modify actions, which are not in the DiffEntry hunks, with considering the same parent node. * * @param hunks * @param actionSets @@ -127,107 +130,58 @@ public class HunkActionFilter { int i = 0; int size = actionSets.size(); for (DiffEntryHunk hunk : hunks) { - int bugStartLine = hunk.getBugLineStartNum(); - int bugRange = hunk.getBugRange(); - int fixStartLine = hunk.getFixLineStartNum(); - int fixRange = hunk.getFixRange(); + int hunkBugStartLine = hunk.getBugLineStartNum(); + int hunkBugRange = hunk.getBugRange(); + int hunkFixStartLine = hunk.getFixLineStartNum(); + int hunkFixRange = hunk.getFixRange(); for (; i < size; i ++) { - // position of buggy statements - int startPosition = 0; - int endPosition = 0; - int startLine = 0; - int endLine = 0; - // position of fixed statements - int startPosition2 = 0; - int endPosition2 = 0; - int startLine2 = 0; - int endLine2 = 0; - HierarchicalActionSet actionSet = actionSets.get(i); + int actionBugStartLine = actionSet.getBugStartLineNum(); + if (actionBugStartLine == 0) { + actionBugStartLine = setLineNumbers(actionSet, prevUnit, revUnit); + } + int actionBugEndLine = actionSet.getBugEndLineNum(); + int actionFixStartLine = actionSet.getFixStartLineNum(); + int actionFixEndLine = actionSet.getFixEndLineNum(); + String actionStr = actionSet.getActionString(); - ITree parentITree = null; + ITree previousParent = null; List hunkActionSets = new ArrayList<>(); - if (actionStr.startsWith("INS")) { - startPosition2 = actionSet.getStartPosition(); - endPosition2 = startPosition2 + actionSet.getLength(); - - List firstAndLastMov = getFirstAndLastMoveAction(actionSet); - if (firstAndLastMov != null) { - startPosition = firstAndLastMov.get(0).getNode().getPos(); - ITree lastTree = firstAndLastMov.get(1).getNode(); - endPosition = lastTree.getPos() + lastTree.getLength(); - } - } else { - startPosition = actionSet.getStartPosition(); // range of actions - endPosition = startPosition + actionSet.getLength(); - if (actionStr.startsWith("UPD")) { - Update update = (Update) actionSet.getAction(); - ITree newNode = update.getNewNode(); - startPosition2 = newNode.getPos(); - endPosition2 = startPosition2 + newNode.getLength(); - - String astNodeType = actionSet.getAstNodeType(); - if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) - || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) - || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) - || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { - List children = update.getNode().getChildren(); - endPosition = getEndPosition(children); - List newChildren = newNode.getChildren(); - endPosition2 = getEndPosition(newChildren); - - if (endPosition == 0) { - endPosition = startPosition + actionSet.getLength(); - } - if (endPosition2 == 0) { - endPosition2 = startPosition2 + newNode.getLength(); - } - } - } - } - startLine = prevUnit.getLineNumber(startPosition); - endLine = prevUnit.getLineNumber(endPosition); - startLine2 = revUnit.getLineNumber(startPosition2); - endLine2 = revUnit.getLineNumber(endPosition2); - actionSet.setBugStartLineNum(startLine); - actionSet.setBugEndLineNum(endLine); - actionSet.setFixStartLineNum(startLine2); - actionSet.setFixEndLineNum(endLine2); if (actionStr.startsWith("INS")) { - if (fixStartLine + fixRange < startLine2) { - addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + if (hunkFixStartLine + hunkFixRange < actionFixStartLine) { + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk);// save the previous non-null hunkFixPattern. break; } - if (endLine2 >= fixStartLine ) { - ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, startLine, startLine2, endLine, endLine2, parentITree, hunk); + if (actionFixEndLine >= hunkFixStartLine ) { + ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, previousParent, hunk); if (parent != null) { - if (parent != parentITree) { + if (parent != previousParent) { hunkActionSets = new ArrayList<>(); } hunkActionSets.add(actionSet); } else if (hunkActionSets.size() > 0) { hunkActionSets = new ArrayList<>(); } - parentITree = parent; + previousParent = parent; } } else { // UPD, DEL, MOV - if (bugStartLine + bugRange < startLine) { - addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); + if (hunkBugStartLine + hunkBugRange < actionBugStartLine) { + addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk);// save the previous non-null hunkFixPattern. break; } - if (endLine >= bugStartLine ) { - ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, startLine, startLine2, endLine, endLine2, parentITree, hunk); - if (parent != null) { - if (parent != parentITree) { + if (actionBugEndLine >= hunkBugStartLine ) { + ITree parent = addToHunkActionSets(actionSet, hunkActionSets, allHunkFixPatterns, previousParent, hunk); + if (parent != null) { // same parent + if (parent != previousParent) { hunkActionSets = new ArrayList<>(); } hunkActionSets.add(actionSet); } else if (hunkActionSets.size() > 0) { hunkActionSets = new ArrayList<>(); } - parentITree = parent; + previousParent = parent; } } addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); @@ -255,8 +209,8 @@ public class HunkActionFilter { } } - private ITree addToHunkActionSets(HierarchicalActionSet actionSet, List hunkActionSets, List allHunkFixPatterns, - int startLine, int startLine2, int endLine, int endLine2, ITree parentITree, DiffEntryHunk hunk) { + private ITree addToHunkActionSets(HierarchicalActionSet actionSet, List hunkActionSets, + List allHunkFixPatterns, ITree previousParent, DiffEntryHunk hunk) { String astNodeType = actionSet.getAstNodeType(); if ("FieldDeclaration".equals(astNodeType)) { addHunkActionSets(hunkActionSets, allHunkFixPatterns, hunk); @@ -267,16 +221,16 @@ public class HunkActionFilter { return null; } else { ITree currentParent = actionSet.getNode().getParent(); - if (parentITree == null) { - parentITree = currentParent; + if (previousParent == null) { + previousParent = currentParent; } else { - if (!parentITree.equals(currentParent)) { + if (!previousParent.equals(currentParent)) { HunkFixPattern hunkFixPattern = new HunkFixPattern(hunk, hunkActionSets); allHunkFixPatterns.add(hunkFixPattern); - parentITree = currentParent; + previousParent = currentParent; } } - return parentITree; + return previousParent; } } @@ -323,4 +277,372 @@ public class HunkActionFilter { return firstAndLastMoveActions; } + /** + * Filter out the modify actions, which are not in the DiffEntry hunks, without considering the same parent node. + * + * @param violations + * @param actionSets + * @param revFile + * @param prevFile + * @return + */ + public List filterActionsByModifiedRange(List violations, + List actionSets, File revFile, File prevFile) { + + List selectedViolations = new ArrayList<>(); + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + return selectedViolations; + } + + for (Violation violation : violations) { + int startLine = violation.getStartLineNum(); + int endLine = violation.getEndLineNum(); + int bugStartLine = violation.getBugStartLineNum(); + int bugEndLine = violation.getBugEndLineNum(); + int fixStartLine = violation.getFixStartLineNum(); + int fixEndLine = violation.getFixEndLineNum(); + + for (HierarchicalActionSet actionSet : actionSets) { + int actionBugStartLine = actionSet.getBugStartLineNum(); + if (actionBugStartLine == 0) { + actionBugStartLine = setLineNumbers(actionSet, prevUnit, revUnit); + } + int actionBugEndLine = actionSet.getBugEndLineNum(); + int actionFixStartLine = actionSet.getFixStartLineNum(); + int actionFixEndLine = actionSet.getFixEndLineNum(); + + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("INS")) { + if (fixStartLine <= actionFixStartLine && actionFixEndLine <= fixEndLine) { + if (actionBugStartLine != 0) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } else { + + violation.getActionSets().add(actionSet); + } + } + } else { + if (bugEndLine < actionBugStartLine) { + break; + } + if (bugStartLine <= actionBugStartLine && actionBugEndLine <= bugEndLine) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } + } + } + + if (violation.getActionSets().size() > 0) { + selectedViolations.add(violation); + } + } + return selectedViolations; + } + + /** + * Filter out the modify actions, which are not in the DiffEntry hunks, with considering the same parent node. + * + * @param violations + * @param actionSets + * @param revFile + * @param prevFile + * @return + */ + public List filterActionsByModifiedRange2(List violations, + List actionSets, File revFile, File prevFile) { + + List selectedViolations = new ArrayList<>(); + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + return selectedViolations; + } + + for (Violation violation : violations) { + int startLine = violation.getStartLineNum(); + int endLine = violation.getEndLineNum(); + int bugStartLine = violation.getBugStartLineNum(); + int bugEndLine = violation.getBugEndLineNum(); + int fixStartLine = violation.getFixStartLineNum(); + int fixEndLine = violation.getFixEndLineNum(); + + for (HierarchicalActionSet actionSet : actionSets) { + int actionBugStartLine = actionSet.getBugStartLineNum(); + if (actionBugStartLine == 0) { + actionBugStartLine = setLineNumbers(actionSet, prevUnit, revUnit); + } + int actionBugEndLine = actionSet.getBugEndLineNum(); + int actionFixStartLine = actionSet.getFixStartLineNum(); + int actionFixEndLine = actionSet.getFixEndLineNum(); + + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("INS")) { + if (fixStartLine <= actionFixStartLine && actionFixEndLine <= fixEndLine) { + if (actionBugStartLine != 0) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } else { + if (isRanged(actionSet, violation)) violation.getActionSets().add(actionSet); + } + } + } else { + if (bugEndLine < actionBugStartLine) { + break; + } + if (bugStartLine <= actionBugStartLine && actionBugEndLine <= bugEndLine) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } + } + } + + if (violation.getActionSets().size() > 0) { + selectedViolations.add(violation); + } + } + return selectedViolations; + } + + private boolean isRanged(HierarchicalActionSet actionSet, Violation violation) { + int actionStartLine = actionSet.getFixStartLineNum(); + int actionEndLine = actionSet.getFixEndLineNum(); + int violationStartLine = violation.getStartLineNum(); + int violationEndLine = violation.getEndLineNum(); + List hunks = violation.getHunks(); + + for (DiffEntryHunk hunk : hunks) { + int bugStartLine = hunk.getBugLineStartNum(); + int bugEndLine = bugStartLine + hunk.getBugRange(); + int fixStartLine = hunk.getFixLineStartNum(); + int fixEndLine = fixStartLine + hunk.getFixRange(); + if (fixStartLine > actionEndLine || bugStartLine > violationEndLine) break; + if (fixEndLine < actionStartLine || bugEndLine < violationStartLine) continue; + + String hunkContent = hunk.getHunk(); + BufferedReader reader = null; + int counterOfContext = 0; + int counterOfDeletedLines = 0; + int counterOfAddedLines = 0; + int bugStarts = 0; + int bugEnds = 0; + int fixStarts = 0; + int fixEnds = 0; + int contextStarts = 0; + try { + reader = new BufferedReader(new StringReader(hunkContent)); + String line = null; + while ((line = reader.readLine()) != null) { + if (line.startsWith("-")) { + counterOfDeletedLines ++; + if (bugStarts == 0) { + bugStarts = bugStartLine + counterOfContext + counterOfDeletedLines - 1; + } + contextStarts = 0; + + if (fixStarts != 0) { + fixEnds = fixStartLine + counterOfContext + counterOfAddedLines - 1; + if (fixStarts > actionEndLine) break; + if (fixEnds < actionStartLine) { + fixStarts = 0; + continue; + } + return true; + } + } + else if (line.startsWith("+")) { + counterOfAddedLines ++; + if (bugStarts == 0) { + bugStarts = contextStarts; + } + bugEnds = bugStartLine + counterOfContext + counterOfDeletedLines - 1; + if (violationEndLine < bugStarts) break; + if (violationStartLine > bugEnds) { + bugStarts = 0; + } + if (bugStarts != 0 && fixStarts == 0) { + fixStarts = fixStartLine + counterOfContext + counterOfAddedLines - 1; + } + } + else { + counterOfContext ++; + bugStarts = 0; + if (contextStarts == 0) { + contextStarts = bugStartLine + counterOfContext + counterOfDeletedLines - 1; + } + + if (fixStarts != 0) { + fixEnds = fixStartLine + counterOfContext + counterOfAddedLines - 1; + if (fixStarts > actionEndLine) break; + if (fixEnds < actionStartLine) { + fixStarts = 0; + continue; + } + return true; + } + } + } + + if (fixStarts != 0) { + fixEnds = fixStartLine + counterOfContext + counterOfAddedLines - 1; + if (fixStarts > actionEndLine) break; + if (fixEnds < actionStartLine) { + fixStarts = 0; + continue; + } + return true; + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + return false; + } + + /** + * Filter out the modify actions, which are not in the DiffEntry hunks, without considering DiffEntry hunks. + * + * @param violations + * @param actionSets + * @param revFile + * @param prevFile + * @return + */ + public List filterActionsByModifiedRange3(List violations, + List actionSets, File revFile, File prevFile) { + + List selectedViolations = new ArrayList<>(); + + CUCreator cuCreator = new CUCreator(); + CompilationUnit prevUnit = cuCreator.createCompilationUnit(prevFile); + CompilationUnit revUnit = cuCreator.createCompilationUnit(revFile); + if (prevUnit == null || revUnit == null) { + return selectedViolations; + } + + for (Violation violation : violations) { + int startLine = violation.getStartLineNum(); + int endLine = violation.getEndLineNum(); + +// ITree parent = null; +// List actionSetsWithSameParent = new ArrayList<>(); //TODO + for (HierarchicalActionSet actionSet : actionSets) { + int actionBugStartLine = actionSet.getBugStartLineNum(); + if (actionBugStartLine == 0) { + actionBugStartLine = setLineNumbers(actionSet, prevUnit, revUnit); + } + int actionBugEndLine = actionSet.getBugEndLineNum(); + int actionFixStartLine = actionSet.getFixStartLineNum(); + int actionFixEndLine = actionSet.getFixEndLineNum(); + + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("INS")) { // FIXME It is impossible to locate the INS action by the buggy line range. + if (startLine <= actionFixStartLine && actionFixEndLine <= endLine) { + if (actionBugStartLine != 0) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } else { + violation.getActionSets().add(actionSet); + } + } + } else { + if (endLine < actionBugStartLine) { + break; + } + if (startLine <= actionBugStartLine && actionBugEndLine <= endLine) { + if (startLine <= actionBugEndLine && endLine >= actionBugStartLine) { + violation.getActionSets().add(actionSet); + } + } + } + } + + if (violation.getActionSets().size() > 0) { + selectedViolations.add(violation); + } + } + return selectedViolations; + } + + private int setLineNumbers(HierarchicalActionSet actionSet, CompilationUnit prevUnit, CompilationUnit revUnit) { + int actionBugStartLine = actionSet.getBugStartLineNum(); + int actionBugEndLine; + int actionFixStartLine; + int actionFixEndLine; + + // position of buggy statements + int bugStartPosition = 0; + int bugEndPosition = 0; + // position of fixed statements + int fixStartPosition = 0; + int fixEndPosition = 0; + + String actionStr = actionSet.getActionString(); + if (actionStr.startsWith("INS")) { + fixStartPosition = actionSet.getStartPosition(); + fixEndPosition = fixStartPosition + actionSet.getLength(); + + List firstAndLastMov = getFirstAndLastMoveAction(actionSet); + if (firstAndLastMov != null) { + bugStartPosition = firstAndLastMov.get(0).getNode().getPos(); + ITree lastTree = firstAndLastMov.get(1).getNode(); + bugEndPosition = lastTree.getPos() + lastTree.getLength(); + } + } else { + bugStartPosition = actionSet.getStartPosition(); // range of actions + bugEndPosition = bugStartPosition + actionSet.getLength(); + if (actionStr.startsWith("UPD")) { + Update update = (Update) actionSet.getAction(); + ITree newNode = update.getNewNode(); + fixStartPosition = newNode.getPos(); + fixEndPosition = fixStartPosition + newNode.getLength(); + + String astNodeType = actionSet.getAstNodeType(); + if ("EnhancedForStatement".equals(astNodeType) || "ForStatement".equals(astNodeType) + || "DoStatement".equals(astNodeType) || "WhileStatement".equals(astNodeType) + || "LabeledStatement".equals(astNodeType) || "SynchronizedStatement".equals(astNodeType) + || "IfStatement".equals(astNodeType) || "TryStatement".equals(astNodeType)) { + List children = update.getNode().getChildren(); + bugEndPosition = getEndPosition(children); + List newChildren = newNode.getChildren(); + fixEndPosition = getEndPosition(newChildren); + + if (bugEndPosition == 0) { + bugEndPosition = bugStartPosition + actionSet.getLength(); + } + if (fixEndPosition == 0) { + fixEndPosition = fixStartPosition + newNode.getLength(); + } + } + } + } + actionBugStartLine = bugStartPosition == 0 ? 0 : prevUnit.getLineNumber(bugStartPosition); + actionBugEndLine = bugEndPosition == 0 ? 0 : prevUnit.getLineNumber(bugEndPosition); + actionFixStartLine = fixStartPosition == 0 ? 0 : revUnit.getLineNumber(fixStartPosition); + actionFixEndLine = fixEndPosition == 0 ? 0 : revUnit.getLineNumber(fixEndPosition); + actionSet.setBugStartLineNum(actionBugStartLine); + actionSet.setBugEndLineNum(actionBugEndLine); + actionSet.setFixStartLineNum(actionFixStartLine); + actionSet.setFixEndLineNum(actionFixEndLine); + + return actionBugStartLine; + } + } diff --git a/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java index bb3ce6c..7567097 100644 --- a/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java +++ b/src/main/java/edu/lu/uni/serval/gumtree/regroup/SimplifyTree.java @@ -116,8 +116,9 @@ public class SimplifyTree { simpleTree.setLabel(canonicalizeTypeStr(label).replaceAll(" ", "")); } else { if ((astNode.equals("SimpleName") || astNode.equals("MethodInvocation")) && label.startsWith("MethodName:")) { - label = label.substring(11); simpleTree.setNodeType("MethodName"); + label = label.substring(11); + label = label.substring(0, label.indexOf(":[")); simpleTree.setLabel(label); } else { simpleTree.setLabel(astNode); @@ -132,8 +133,9 @@ public class SimplifyTree { if (astNode.endsWith("Name")) { // variableName, methodName, QualifiedName if (label.startsWith("MethodName:")) { // - label = label.substring(11); simpleTree.setNodeType("MethodName"); + label = label.substring(11); + label = label.substring(0, label.indexOf(":[")); simpleTree.setLabel(label); } else if (label.startsWith("Name:")) { label = label.substring(5); @@ -155,9 +157,10 @@ public class SimplifyTree { simpleTree.setLabel(canonicalizeTypeStr(label).replaceAll(" ", "")); } else if (astNode.startsWith("Type")) { simpleTree.setLabel(canonicalizeTypeStr(label).replaceAll(" ", "")); - } else if (astNode.equals("MethodInvocation") && label.startsWith("MethodName:")) { - label = label.substring(11); + } else if ((astNode.equals("SimpleName") || astNode.equals("MethodInvocation")) && label.startsWith("MethodName:")) { simpleTree.setNodeType("MethodName"); + label = label.substring(11); + label = label.substring(0, label.indexOf(":[")); simpleTree.setLabel(label); } else { simpleTree.setLabel(label.replaceAll(" ", "")); @@ -591,7 +594,10 @@ public class SimplifyTree { List allMoveActions = getAllMoveActions2(actionSet); if (allMoveActions != null && allMoveActions.size() > 0) { ListSorter sorter = new ListSorter(allMoveActions); - allMoveActions = sorter.sortAscending(); + List moveActions = sorter.sortAscending(); + if (moveActions != null) { + allMoveActions = moveActions; + } return allMoveActions; } else {// FIXME: pure INS actions. return null; diff --git a/src/main/java/edu/lu/uni/serval/violation/Alarm.java b/src/main/java/edu/lu/uni/serval/violation/Alarm.java index 2fe759c..8c6427f 100644 --- a/src/main/java/edu/lu/uni/serval/violation/Alarm.java +++ b/src/main/java/edu/lu/uni/serval/violation/Alarm.java @@ -1,24 +1,21 @@ package edu.lu.uni.serval.violation; -import java.util.HashMap; import java.util.Map; -public class Alarm implements Comparable { +public class Alarm { private String buggyCommitId; private String buggyFileName; - private Map positions; // : private String fixedCommitId; private String fixedFileName; + private Map positions; - public Alarm(String buggyCommitId, String buggyFileName, String fixedCommitId, - String fixedFileName) { + public Alarm(String buggyCommitId, String buggyFileName, String fixedCommitId, String fixedFileName) { super(); this.buggyCommitId = buggyCommitId; this.buggyFileName = buggyFileName; this.fixedCommitId = fixedCommitId; this.fixedFileName = fixedFileName; - this.positions = new HashMap<>(); } public String getBuggyCommitId() { @@ -29,10 +26,6 @@ public class Alarm implements Comparable { return buggyFileName; } - public Map getPositions() { - return positions; - } - public String getFixedCommitId() { return fixedCommitId; } @@ -41,13 +34,12 @@ public class Alarm implements Comparable { return fixedFileName; } - @Override - public int compareTo(Alarm a) { - int compareResult = this.buggyCommitId.compareTo(a.buggyCommitId); - if (compareResult == 0) { - compareResult = this.buggyFileName.compareTo(a.buggyFileName); - } - return compareResult; + public Map getPositions() { + return positions; + } + + public void setPositions(Map positions) { + this.positions = positions; } @Override @@ -61,6 +53,5 @@ public class Alarm implements Comparable { } return false; } - - + } diff --git a/src/main/java/edu/lu/uni/serval/violation/Violation.java b/src/main/java/edu/lu/uni/serval/violation/Violation.java index 0701034..27fb771 100644 --- a/src/main/java/edu/lu/uni/serval/violation/Violation.java +++ b/src/main/java/edu/lu/uni/serval/violation/Violation.java @@ -3,6 +3,12 @@ package edu.lu.uni.serval.violation; import java.util.ArrayList; import java.util.List; +/** + * Violation: all alarms identified by FindBugs in one java project. + * + * @author kui.liu + * + */ public class Violation { private String project; diff --git a/src/main/java/edu/lu/uni/serval/violation/parse/AlarmsReader.java b/src/main/java/edu/lu/uni/serval/violation/parse/AlarmsReader.java index 3ea81fb..820b17f 100644 --- a/src/main/java/edu/lu/uni/serval/violation/parse/AlarmsReader.java +++ b/src/main/java/edu/lu/uni/serval/violation/parse/AlarmsReader.java @@ -14,7 +14,7 @@ import edu.lu.uni.serval.violation.Violation; public class AlarmsReader { public Map readAlarmsList(String fileName) { - Map violations = new HashMap<>(); + Map violationsMap = new HashMap<>(); FileInputStream fis = null; Scanner scanner = null; try { @@ -29,36 +29,35 @@ public class AlarmsReader { String[] buggyElements = buggyInfo.split(":"); String[] fixedElements = fixedInfo.split(":"); - String projectName = buggyElements[0]; - if (!projectName.equals(fixedElements[0])) continue; - - String commitId = buggyElements[1]; - String buggyFile = buggyElements[2]; - int startLine = Integer.parseInt(buggyElements[3]); - int endLine = Integer.parseInt(buggyElements[4]); + String projectName = buggyElements[1]; + String buggyCommitId = buggyElements[2]; + String buggyFile = buggyElements[3]; + int startLine = Integer.parseInt(buggyElements[4]); + String endLineAndAlarmType = buggyElements[5] + ":" + buggyElements[0]; String fixCommitId = fixedElements[1]; String fixedFile = fixedElements[2]; - Alarm alarm = new Alarm(commitId, buggyFile, fixCommitId, fixedFile); + Alarm alarm = new Alarm(buggyCommitId, buggyFile, fixCommitId, fixedFile); Violation violation; - if (violations.containsKey(projectName)) { - violation = violations.get(projectName); + if (violationsMap.containsKey(projectName)) { + violation = violationsMap.get(projectName); } else { violation = new Violation(projectName); - violations.put(projectName, violation); + violationsMap.put(projectName, violation); } List alarms = violation.getAlarms(); int index = alarms.indexOf(alarm); - if (index != -1) { - Alarm tempA = alarms.get(index); - Map positions = tempA.getPositions(); - positions.put(startLine, endLine); + if (index >= 0) { + Alarm tempAlarm = alarms.get(index); + Map positions = tempAlarm.getPositions(); + positions.put(startLine, endLineAndAlarmType); } else { - alarm.getPositions().put(startLine, endLine); + Map positions = new HashMap<>(); + positions.put(startLine, endLineAndAlarmType); + alarm.setPositions(positions); alarms.add(alarm); } - } } catch (FileNotFoundException e) { e.printStackTrace(); @@ -70,6 +69,7 @@ public class AlarmsReader { e.printStackTrace(); } } - return violations; + return violationsMap; } + } diff --git a/src/main/java/edu/lu/uni/serval/violation/parse/TestViolationParser.java b/src/main/java/edu/lu/uni/serval/violation/parse/TestViolationParser.java index 0d04337..7a8c3f2 100644 --- a/src/main/java/edu/lu/uni/serval/violation/parse/TestViolationParser.java +++ b/src/main/java/edu/lu/uni/serval/violation/parse/TestViolationParser.java @@ -8,7 +8,7 @@ import edu.lu.uni.serval.utils.FileHelper; public class TestViolationParser { - private static final String REPO_PATH = "/Volumes/MacBook/repositories/"; + private static final String REPO_PATH = "/Volumes/MacBook/repos/"; private static final String previousFilesPath = "GumTreeInput/prevFiles/"; private static final String revisedFilesPath = "GumTreeInput/revFiles/"; private static final String positionsFilePath = "GumTreeInput/positions/"; diff --git a/src/main/java/edu/lu/uni/serval/violation/parse/ViolationParser.java b/src/main/java/edu/lu/uni/serval/violation/parse/ViolationParser.java index a38bbd0..13d2235 100644 --- a/src/main/java/edu/lu/uni/serval/violation/parse/ViolationParser.java +++ b/src/main/java/edu/lu/uni/serval/violation/parse/ViolationParser.java @@ -2,10 +2,10 @@ package edu.lu.uni.serval.violation.parse; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.errors.RevisionSyntaxException; import edu.lu.uni.serval.git.exception.GitRepositoryNotFoundException; @@ -23,6 +23,116 @@ import edu.lu.uni.serval.violation.Violation; public class ViolationParser { public void parseViolations(String fixedAlarmFile, List repos, String previousFilesPath, String revisedFilesPath, String positionsFilePath, String diffentryFilePath) { + AlarmsReader reader = new AlarmsReader(); + Map violations = reader.readAlarmsList(fixedAlarmFile); + List throwExpProjs = new ArrayList<>(); + int a = 0; + int exceptionsCounter = 0; + for (Map.Entry entry : violations.entrySet()) { + String projectName = entry.getKey(); + String repoName = ""; + for (File repo : repos) { + if (repo.getName().equals(projectName)) { + repoName = repo.getPath() + "/"; + break; + } + } + if ("".equals(repoName)) { + a ++; + System.out.println(projectName); + continue; + } + Violation violation = entry.getValue(); + List alarms = violation.getAlarms(); + + String repoPath = repoName + "/.git"; + GitRepository gitRepo = new GitRepository(repoPath, revisedFilesPath, previousFilesPath); + try { + gitRepo.open(); + for (Alarm alarm : alarms) { + String buggyCommitId = alarm.getBuggyCommitId(); + String buggyFileName = alarm.getBuggyFileName(); + String buggyFileContent = gitRepo.getFileContentByCommitIdAndFileName(buggyCommitId, buggyFileName); + if (buggyFileContent == null || "".equals(buggyFileContent)) { +// System.out.println(projectName); + throwExpProjs.add(projectName); + exceptionsCounter ++; + continue; + } + + String fixedCommitId = alarm.getFixedCommitId(); + String fixedFileName = alarm.getFixedFileName(); + String fixedFileContent = gitRepo.getFileContentByCommitIdAndFileName(fixedCommitId, fixedFileName); + if (fixedFileContent == null || "".equals(fixedFileContent)) { +// System.out.println(projectName); + throwExpProjs.add(projectName); + exceptionsCounter ++; + continue; + } + + String diffentry = gitRepo.getDiffentryByTwoCommitIds(buggyCommitId, fixedCommitId, fixedFileName); + if (diffentry == null) { +// System.out.println(projectName); + throwExpProjs.add(projectName); + exceptionsCounter ++; + continue; + } + + String commitId = buggyCommitId.substring(0, 6) + "_" + fixedCommitId.substring(0, 6); + String fileName = fixedFileName.replaceAll("/", "#"); + fileName = projectName + "_" + commitId + fileName; + if (fileName.length() > 240) { + List files = FileHelper.getAllFilesInCurrentDiectory(revisedFilesPath, ".java"); + fileName = files.size() + "TooLongFileName.java"; + } + String buggyFile = previousFilesPath + "prev_" + fileName; + String fixedFile = revisedFilesPath + fileName; + fileName = fileName.replace(".java", ".txt"); + String positionFile = positionsFilePath + fileName; + String diffentryFile = diffentryFilePath + fileName; + FileHelper.outputToFile(buggyFile, buggyFileContent, false); + FileHelper.outputToFile(fixedFile, fixedFileContent, false); + FileHelper.outputToFile(positionFile, readPosition(alarm.getPositions()), false); + FileHelper.outputToFile(diffentryFile, diffentry, false); + } + } catch (GitRepositoryNotFoundException e) { + System.out.println("Exception: " + projectName); + exceptionsCounter ++; + e.printStackTrace(); + } catch (NotValidGitRepositoryException e) { + System.out.println("Exception: " + projectName); + exceptionsCounter ++; + e.printStackTrace(); + } catch (IOException e) { + System.out.println("Exception: " + projectName); + exceptionsCounter ++; + e.printStackTrace(); + } catch (RevisionSyntaxException e) { + System.out.println("Exception: " + projectName); + exceptionsCounter ++; + e.printStackTrace(); + } finally { + gitRepo.close(); + } + } + System.out.println(a); + System.out.println(exceptionsCounter); + System.out.println(throwExpProjs.size()); + System.out.println(throwExpProjs); + + } + + /** + * Output data in terms of alarm types. + * + * @param fixedAlarmFile + * @param repos + * @param previousFilesPath + * @param revisedFilesPath + * @param positionsFilePath + * @param diffentryFilePath + */ + public void parseViolations2(String fixedAlarmFile, List repos, String previousFilesPath, String revisedFilesPath, String positionsFilePath, String diffentryFilePath) { AlarmsReader reader = new AlarmsReader(); Map violations = reader.readAlarmsList(fixedAlarmFile); int a = 0; @@ -51,15 +161,24 @@ public class ViolationParser { String buggyCommitId = alarm.getBuggyCommitId(); String buggyFileName = alarm.getBuggyFileName(); String buggyFileContent = gitRepo.getFileContentByCommitIdAndFileName(buggyCommitId, buggyFileName); - if (buggyFileContent == null || "".equals(buggyFileContent)) continue; + if (buggyFileContent == null || "".equals(buggyFileContent)) { + System.out.println(projectName); + continue; + } String fixedCommitId = alarm.getFixedCommitId(); String fixedFileName = alarm.getFixedFileName(); String fixedFileContent = gitRepo.getFileContentByCommitIdAndFileName(fixedCommitId, fixedFileName); - if (fixedFileContent == null || "".equals(fixedFileContent)) continue; + if (fixedFileContent == null || "".equals(fixedFileContent)) { + System.out.println(projectName); + continue; + } String diffentry = gitRepo.getDiffentryByTwoCommitIds(buggyCommitId, fixedCommitId, fixedFileName); - if (diffentry == null) continue; + if (diffentry == null) { + System.out.println(projectName); + continue; + } String commitId = buggyCommitId.substring(0, 6) + "_" + fixedCommitId.substring(0, 6); String fileName = fixedFileName.replaceAll("/", "#"); @@ -79,14 +198,16 @@ public class ViolationParser { FileHelper.outputToFile(diffentryFile, diffentry, false); } } catch (GitRepositoryNotFoundException e) { + System.out.println("Exception: " + projectName); e.printStackTrace(); } catch (NotValidGitRepositoryException e) { + System.out.println("Exception: " + projectName); e.printStackTrace(); } catch (IOException e) { + System.out.println("Exception: " + projectName); e.printStackTrace(); } catch (RevisionSyntaxException e) { - e.printStackTrace(); - } catch (GitAPIException e) { + System.out.println("Exception: " + projectName); e.printStackTrace(); } finally { gitRepo.close(); @@ -95,11 +216,12 @@ public class ViolationParser { System.out.println(a); } - private String readPosition(Map positions) { + private String readPosition(Map positions) { String positionsStr = ""; - for (Map.Entry entry : positions.entrySet()) { + for (Map.Entry entry : positions.entrySet()) { positionsStr += entry.getKey() + ":" + entry.getValue() + "\n"; } return positionsStr; } + }