Add deep learning models.
This commit is contained in:
@@ -16,6 +16,18 @@
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>edu.lu.uni.serval</groupId>
|
||||
<artifactId>MyCluster</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>edu.lu.uni.serval</groupId>
|
||||
<artifactId>MyFeatureLearner</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>edu.lu.uni</groupId>
|
||||
<artifactId>simple-utils</artifactId>
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
package edu.lu.uni.serval.FixPattern.info;
|
||||
|
||||
import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet;
|
||||
import edu.lu.uni.serval.gumtree.regroup.SimpleTree;
|
||||
|
||||
public class FixPattern {
|
||||
private SimpleTree buggyCodeTree; // it will be used to compute the similarity.
|
||||
private HierarchicalActionSet editScripts; // it will be used to generate new patches.
|
||||
|
||||
public SimpleTree getBuggyCodeTree() {
|
||||
return buggyCodeTree;
|
||||
}
|
||||
|
||||
public HierarchicalActionSet getEditScripts() {
|
||||
return editScripts;
|
||||
}
|
||||
|
||||
public FixPattern(SimpleTree buggyCodeTree, HierarchicalActionSet editScripts) {
|
||||
super();
|
||||
this.buggyCodeTree = buggyCodeTree;
|
||||
this.editScripts = editScripts;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
package edu.lu.uni.serval.FixPattern.info;
|
||||
|
||||
//import java.io.File;
|
||||
//import java.io.IOException;
|
||||
//import java.util.ArrayList;
|
||||
//import java.util.HashMap;
|
||||
//import java.util.List;
|
||||
//import java.util.Map;
|
||||
|
||||
import org.eclipse.jdt.core.dom.ASTParser;
|
||||
//import org.slf4j.Logger;
|
||||
//import org.slf4j.LoggerFactory;
|
||||
|
||||
//import com.github.gumtreediff.actions.ActionGenerator;
|
||||
//import com.github.gumtreediff.actions.model.Action;
|
||||
//import com.github.gumtreediff.gen.jdt.JdtTreeGenerator;
|
||||
//import com.github.gumtreediff.gen.jdt.cd.CdJdtTreeGenerator;
|
||||
//import com.github.gumtreediff.matchers.Matcher;
|
||||
//import com.github.gumtreediff.matchers.Matchers;
|
||||
import com.github.gumtreediff.tree.ITree;
|
||||
import com.github.gumtreediff.tree.TreeContext;
|
||||
|
||||
import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap;
|
||||
import edu.lu.uni.serval.gen.jdt.exp.ExpJdtTreeGenerator;
|
||||
|
||||
@Deprecated
|
||||
public class GumTreeAnalysis {
|
||||
|
||||
|
||||
// private static void analyzeBugFixes(String gitRepoPath, String outputPath) {
|
||||
// log.info("Repo: " + gitRepoPath);
|
||||
//
|
||||
// GitTraveller gitTraveller = new GitTraveller(gitRepoPath, outputPath);
|
||||
// gitTraveller.travelGitRepo();
|
||||
//// Map<String, List<CommitFile>> commitFiles = gitTraveller.getCommitFiles();
|
||||
//
|
||||
// List<MyDiffEntry> allDiffEntries = gitTraveller.getAllDiffEntries();
|
||||
// String previousFilesPath = gitTraveller.getPreviousFilesPath();
|
||||
// String revisedFilesPath = gitTraveller.getRevisedFilesPath();
|
||||
// for (MyDiffEntry diff : allDiffEntries) {
|
||||
// String fileA = previousFilesPath + diff.getPrevFile();
|
||||
// String fileB = revisedFilesPath + diff.getRevFile();
|
||||
// List<String> gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB);
|
||||
// if (gumTreeResults.size() == 0) {
|
||||
// continue;
|
||||
// }
|
||||
// StringBuilder builder = new StringBuilder();
|
||||
// builder.append("Previous File: " + fileA + "\n");
|
||||
// builder.append("Revised File: " + fileB + "\n");
|
||||
// String diffs = "";
|
||||
// for (ModifiedDetails md : diff.getModifiedDetails()) {
|
||||
// diffs += md.getLineNumber() + "\n";
|
||||
// diffs += md.getFragment() + "\n";
|
||||
// }
|
||||
// builder.append("DiffEntry: " + diffs);
|
||||
// for (String gumTreeResult : gumTreeResults) {
|
||||
// builder.append(gumTreeResult.toString() + "\n");
|
||||
// }
|
||||
// FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + diff.getRevFile().replace(".java", ".txt"), builder, false);
|
||||
// }
|
||||
//
|
||||
//// DiffEntryParser diffEntryParser = new DiffEntryParser(allDiffEntries);
|
||||
//// diffEntryParser.parseDiffEntries();
|
||||
////
|
||||
//// // <String, List>: String ==> revisedFileName.
|
||||
//// Map<String, List<ModifiedFragment>> parsedDiffEntries = diffEntryParser.getParsedDiffEntries();
|
||||
//// diffEntryParser = null;
|
||||
//// allDiffEntries = null;
|
||||
////
|
||||
//// gitTraveller = null;
|
||||
////
|
||||
//// for (Map.Entry<String, List<ModifiedFragment>> entry : parsedDiffEntries.entrySet()) {
|
||||
//// String revisedFileName = entry.getKey();
|
||||
//// String fileA = previousFilesPath + "prev_" + revisedFileName;
|
||||
//// String fileB = revisedFilesPath + revisedFileName;
|
||||
//// System.err.println("FileName" + fileA);
|
||||
//// List<String> gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB);
|
||||
//// StringBuilder builder = new StringBuilder();
|
||||
//// builder.append("Previous File: " + fileA + "\n");
|
||||
//// builder.append("Revised File: " + fileB + "\n");
|
||||
//// builder.append("DiffEntry: ");
|
||||
//// for (String gumTreeResult : gumTreeResults) {
|
||||
//// builder.append(gumTreeResult + "\n");
|
||||
//// }
|
||||
//// FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + revisedFileName.replace(".java", ".txt"), builder, false);
|
||||
//// }
|
||||
//
|
||||
// }
|
||||
//
|
||||
// public static List<String> compareTwoFilesWithGumTree(String prevFile, String revFile) {
|
||||
// List<String> gumTreeResults = new ArrayList<String>();
|
||||
//
|
||||
// try {
|
||||
//// TreeContext tc1 = new ExpJdtTreeGenerator().generateFromFile(prevFile);
|
||||
//// TreeContext tc2 = new ExpJdtTreeGenerator().generateFromFile(revFile);
|
||||
//// TreeContext tc1 = new JdtTreeGenerator().generateFromFile(prevFile);
|
||||
//// TreeContext tc2 = new JdtTreeGenerator().generateFromFile(revFile);
|
||||
// TreeContext tc1 = new RowTokenJdtTreeGenerator().generateFromFile(prevFile);
|
||||
// TreeContext tc2 = new RowTokenJdtTreeGenerator().generateFromFile(revFile);
|
||||
//// TreeContext tc1 = new CdJdtTreeGenerator().generateFromFile(prevFile);
|
||||
//// TreeContext tc2 = new CdJdtTreeGenerator().generateFromFile(revFile);
|
||||
// ITree t1 = tc1.getRoot();
|
||||
// ITree t2 = tc2.getRoot();
|
||||
//
|
||||
// Matcher m = Matchers.getInstance().getMatcher(t1, t2);
|
||||
// m.match();
|
||||
//
|
||||
// ActionGenerator ag = new ActionGenerator(t1, t2, m.getMappings());
|
||||
// ag.generate();
|
||||
//
|
||||
// List<Action> actions = ag.getActions();
|
||||
// for(Action ac : actions){
|
||||
// String actionStr = parseAction(ac.toString());
|
||||
// gumTreeResults.add(actionStr);
|
||||
// }
|
||||
//
|
||||
// } catch (IOException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// return gumTreeResults;
|
||||
// }
|
||||
|
||||
// private static String parseAction(String actStr) {
|
||||
// // UPD 25@@!a from !a to isTrue(a) at 69
|
||||
// String[] actStrArrays = actStr.split("@@");
|
||||
// actStr = "";
|
||||
// int length = actStrArrays.length;
|
||||
// for (int i = 0; i < length - 1; i++) {
|
||||
// String actStrFrag = actStrArrays[i];
|
||||
// int index = actStrFrag.lastIndexOf(" ") + 1;
|
||||
// String nodeType = actStrFrag.substring(index);
|
||||
// String backup = nodeType;
|
||||
// try {
|
||||
// nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType));
|
||||
// } catch (NumberFormatException e) {
|
||||
// nodeType = backup;
|
||||
// log.info(actStr);
|
||||
// }
|
||||
// actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@";
|
||||
// actStr += actStrFrag;
|
||||
// }
|
||||
// actStr += actStrArrays[length - 1];
|
||||
// return actStr;
|
||||
// }
|
||||
|
||||
private static String parseAction(String actStr) {
|
||||
// UPD 25@@!a from !a to isTrue(a) at 69
|
||||
String[] actStrArrays = actStr.split("@@");
|
||||
actStr = "";
|
||||
int length = actStrArrays.length;
|
||||
for (int i =0; i < length - 1; i ++) {
|
||||
String actStrFrag = actStrArrays[i];
|
||||
int index = actStrFrag.lastIndexOf(" ") + 1;
|
||||
String nodeType = actStrFrag.substring(index);
|
||||
nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType));
|
||||
actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@";
|
||||
actStr += actStrFrag;
|
||||
}
|
||||
actStr += actStrArrays[length - 1];
|
||||
return actStr;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for tokens embedding of edit scripts.
|
||||
*
|
||||
* Input data: parsed results of patches with GumTree.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step1 {
|
||||
public static void main(String[] args) {
|
||||
String editScriptsFile = Configuration.EDITSCRIPT_SIZES_FILE;
|
||||
String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE;
|
||||
String buggyTokensFile = Configuration.BUGGY_CODY_TOKENS_FILE;
|
||||
String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE;
|
||||
FileHelper.deleteFile(editScriptsFile);
|
||||
FileHelper.deleteFile(patchesSourceCodeFile);
|
||||
FileHelper.deleteFile(buggyTokensFile);
|
||||
FileHelper.deleteFile(editScriptSizesFile);
|
||||
|
||||
String selectedEditScripts = Configuration.SELECTED_EDITSCRIPTES_FILE;
|
||||
String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE;
|
||||
String selectedBuggyTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE;
|
||||
FileHelper.deleteFile(selectedEditScripts);
|
||||
FileHelper.deleteFile(selectedPatches);
|
||||
FileHelper.deleteFile(selectedBuggyTokens);
|
||||
|
||||
DataPreparation.prepareDataForTokenEmbedding();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.TokenEmbedder;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for evaluation.
|
||||
*
|
||||
* Embed tokens of source code vectors of training data and testing data.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step10 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean isSupervisedLearning = true;
|
||||
if (isSupervisedLearning) {// supervised learning
|
||||
String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2;
|
||||
FileHelper.deleteFile(outputFileName);
|
||||
// Data pre-processing
|
||||
TokenEmbedder embedder2 = new TokenEmbedder();
|
||||
embedder2.embedTokensOfSourceCodeForSupervisedTesting();
|
||||
} else { // un-supervised learning
|
||||
String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1;
|
||||
FileHelper.deleteFile(outputFileName);
|
||||
// Data pre-processing
|
||||
TokenEmbedder embedder2 = new TokenEmbedder();
|
||||
embedder2.embedTokensOfSourceCodeForUnsupervisedTesting();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for evaluation.
|
||||
*
|
||||
* Vectorize data for deep learning.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step11 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean isSupervisedLearning = true;
|
||||
if (isSupervisedLearning) {// supervised learning
|
||||
String trainingDataPath = Configuration.TRAINING_DATA;
|
||||
FileHelper.deleteFile(trainingDataPath);
|
||||
String testingDataPath = Configuration.TESTING_DATA;
|
||||
FileHelper.deleteDirectory(testingDataPath);
|
||||
|
||||
Map<Integer, Integer> commonClustersMappingLabel = DataPreparation.readCommonCLusters();
|
||||
DataPreparation.prepareDataForFeatureLearningOfEvaluation2(commonClustersMappingLabel);
|
||||
} else { // un-supervised learning
|
||||
String outputData = Configuration.VECTORIED_ALL_SOURCE_CODE1;
|
||||
FileHelper.deleteFile(outputData);
|
||||
// Before embedding tokens.
|
||||
// List<File> files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
|
||||
DataPreparation.prepareDataForFeatureLearningOfEvaluation1();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Evaluation: extract features of testing data and predict their labels.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step12 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean isSupervisedLearning = true;
|
||||
if (isSupervisedLearning) {// supervised learning
|
||||
List<File> testingDataFiles = FileHelper.getAllFilesInCurrentDiectory(Configuration.TESTING_DATA, ".csv");
|
||||
for (int i = 0, size = testingDataFiles.size(); i < size; i ++) {
|
||||
if (i == 0) {
|
||||
// TODO: we can test this model by our clustered resutls.
|
||||
FeatureLearner learner2 = new FeatureLearner();
|
||||
learner2.learnFeaturesOfSourceCode2(testingDataFiles.get(i));
|
||||
} else {
|
||||
FeatureLearner learner2 = new FeatureLearner();
|
||||
learner2.learnFeaturesOfSourceCode3(testingDataFiles.get(i));
|
||||
}
|
||||
}
|
||||
} else { // un-supervised learning
|
||||
|
||||
FeatureLearner learner2 = new FeatureLearner();
|
||||
learner2.learnFeaturesOfSourceCode();
|
||||
// Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING;
|
||||
// Compute the similarity: cosin similarity
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Evaluation: extract features of testing data and predict their labels.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step13 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean isSupervisedLearning = true;
|
||||
if (isSupervisedLearning) {// supervised learning
|
||||
// label --> possibility --> 90, 80, 70, 60 others ignored, level one localization
|
||||
// label: clusterNum, re-compute similarity with each element. 90, 80, 70, 60.
|
||||
// similarity: patches --> fixing bug.
|
||||
List<File> testingDataFiles = FileHelper.getAllFilesInCurrentDiectory(Configuration.TESTING_DATA, ".csv");
|
||||
for (int i = 0, size = testingDataFiles.size(); i < size; i ++) {
|
||||
}
|
||||
} else { // un-supervised learning
|
||||
|
||||
// Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING;
|
||||
// Compute the similarity: cosin similarity
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.TokenEmbedder;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Embed tokens of all selected edit scripts.
|
||||
*
|
||||
* Input data: all tokens of selected edit scripts.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String outputFileName = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
|
||||
FileHelper.deleteFile(outputFileName);
|
||||
|
||||
TokenEmbedder embedder = new TokenEmbedder();
|
||||
embedder.embedTokensOfEditScripts();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for features learning of selected edit scripts.
|
||||
*
|
||||
* Vectorize edit scripts with embedded tokens of edit scripts.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step3 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String vectorizedEditScripts = Configuration.VECTORIED_EDIT_SCRIPTS;
|
||||
FileHelper.deleteFile(vectorizedEditScripts);
|
||||
|
||||
DataPreparation.prepareDataForFeatureLearning();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Learn features of all selected edit scripts with CNN algorithm.
|
||||
*
|
||||
* Input data: vectorized edit scripts.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step4 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String extractedFeatures = Configuration.EXTRACTED_FEATURES;
|
||||
FileHelper.deleteDirectory(extractedFeatures);
|
||||
|
||||
FeatureLearner learner = new FeatureLearner();
|
||||
learner.learnFeatures();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for clustering of edit scripts.
|
||||
*
|
||||
* Input data: learned features of edit scripts by CNN.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step5 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String clusterInput = Configuration.CLUSTER_INPUT;
|
||||
FileHelper.deleteFile(clusterInput);
|
||||
|
||||
DataPreparation.prepareDataForClustering();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.Cluster;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Clustering of edit scripts with extracted features of edit scripts.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step6 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String clusterOutput = Configuration.CLUSTER_OUTPUT;
|
||||
FileHelper.deleteFile(clusterOutput);
|
||||
|
||||
Cluster cluster = new Cluster();
|
||||
cluster.cluster();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.ClusterAnalyser;
|
||||
import edu.lu.uni.serval.FixPatternMining.CommonPatterns;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Analyze cluster results to obtain common fix patterns.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step7 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String clusteredPatches = Configuration.CLUSTERED_PATCHES_FILE;
|
||||
String clusteredBuggyTokens = Configuration.CLUSTERED_TOKENSS_FILE;
|
||||
FileHelper.deleteDirectory(clusteredPatches);
|
||||
FileHelper.deleteDirectory(clusteredBuggyTokens);
|
||||
|
||||
// analyze cluster results.
|
||||
ClusterAnalyser analyser = new ClusterAnalyser();
|
||||
analyser.readClusterResutls();
|
||||
analyser.clusterPatchSourceCode();
|
||||
analyser.clusterBuggyCodeTokens(); // the results will be used to compute similarity with target java code to localize bugs.
|
||||
|
||||
List<Integer> clusterResults = analyser.getClusterResults();
|
||||
|
||||
// Common patterns.
|
||||
CommonPatterns commonPatterns = new CommonPatterns(); // Metrics TODO
|
||||
// <Integer, Integer>: <ClusterNum, Label for supervised learning>
|
||||
Map<Integer, Integer> commonClustersMappingLabel = commonPatterns.identifyCommonPatterns(clusterResults);
|
||||
String clusterMappingLabel = "Label : ClusterNum\n";
|
||||
for (Map.Entry<Integer, Integer> entry : commonClustersMappingLabel.entrySet()) {
|
||||
clusterMappingLabel += entry.getValue() + " : " + entry.getKey() + "\n";
|
||||
}
|
||||
FileHelper.outputToFile(Configuration.CLUSTERNUMBER_LABEL_MAP, clusterMappingLabel, false);
|
||||
|
||||
int totalNumberOfTrainingData = commonPatterns.getTotalNumberofTrainingData();
|
||||
FileHelper.outputToFile(Configuration.NUMBER_OF_TRAINING_DATA, "" + totalNumberOfTrainingData, false);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.evaluation.ProjectScanner;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare testing data for evaluation.
|
||||
*
|
||||
* Parse java projects to get the token vectors of all statements.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step8 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String outputLocalizeFile = Configuration.TEST_POSITION_FILE;
|
||||
String outputTokensFile = Configuration.TEST_DATA_FILE;
|
||||
FileHelper.deleteDirectory(outputLocalizeFile);
|
||||
FileHelper.deleteDirectory(outputTokensFile);
|
||||
|
||||
int limitationOfTestingInstances = Integer.parseInt(FileHelper.readFile(Configuration.NUMBER_OF_TRAINING_DATA).trim()) / 10;
|
||||
|
||||
File testProjects = new File(Configuration.TEST_INPUT);
|
||||
File[] projects = testProjects.listFiles();
|
||||
ProjectScanner scanner = new ProjectScanner();
|
||||
scanner.scanJavaProject(projects, outputLocalizeFile, outputTokensFile, limitationOfTestingInstances);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.App;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for evaluation.
|
||||
*
|
||||
* Merge token vectors of source code of training data and testing data.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Step9 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean isSupervisedLearning = true;
|
||||
if (isSupervisedLearning) {// supervised learning
|
||||
Map<Integer, Integer> commonClustersMappingLabel = DataPreparation.readCommonCLusters();
|
||||
|
||||
String outputFile = Configuration.EMBEDDING_DATA_TOKENS2;
|
||||
FileHelper.deleteFile(outputFile);
|
||||
// Data merge
|
||||
DataPreparation.prepareTokensForEvaluation2(commonClustersMappingLabel);
|
||||
} else { // un-supervised learning
|
||||
// Data merge
|
||||
DataPreparation.prepareTokensForEvaluation1();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import edu.lu.uni.serval.Clusters.XMeansCluster;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import weka.core.EuclideanDistance;
|
||||
|
||||
/**
|
||||
* Cluster features with X-means clustering algorithm.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class Cluster {
|
||||
|
||||
public void cluster() {
|
||||
String arffFile = Configuration.CLUSTER_INPUT;
|
||||
String clusterResults = Configuration.CLUSTER_OUTPUT;
|
||||
|
||||
XMeansCluster cluster = new XMeansCluster();
|
||||
try {
|
||||
/*
|
||||
* The below 5 parameters have default values.
|
||||
*/
|
||||
cluster.setDistanceF(new EuclideanDistance());
|
||||
cluster.setUseKDTree(true);
|
||||
cluster.setMaxNumberOfIterations(1000);
|
||||
// The below 2 parameters are recommended to be the same.
|
||||
cluster.setMaxKMeans(200);
|
||||
cluster.setMaxKMeansForChildren(200);
|
||||
|
||||
/*
|
||||
* The values of the below 3 parameters should be set by developers.
|
||||
*/
|
||||
cluster.setSeed(200);
|
||||
cluster.setMaxNumClusters(100);
|
||||
cluster.setMinNumClusters(1);
|
||||
|
||||
// X-means clustering is beginning.
|
||||
cluster.cluster(arffFile, clusterResults);
|
||||
// X-means clustering is finished.
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
public class ClusterAnalyser {
|
||||
|
||||
private List<Integer> clusterResults; // each element is a cluster number.
|
||||
|
||||
public void readClusterResutls() {
|
||||
clusterResults = DataPreparation.readClusterResults();
|
||||
}
|
||||
|
||||
public void clusterBuggyCodeTokens() {
|
||||
String selectedTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE;
|
||||
String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
|
||||
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
|
||||
Map<Integer, StringBuilder> builderMap = new HashMap<>();
|
||||
Map<Integer, Integer> countersMap = new HashMap<>();
|
||||
try {
|
||||
fis = new FileInputStream(selectedTokens);
|
||||
scanner = new Scanner(fis);
|
||||
int index = 0;
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
int clusterNum = clusterResults.get(index);
|
||||
StringBuilder builder = getBuilder(builderMap, clusterNum);
|
||||
builder.append(line).append("\n");
|
||||
int counter = getCounter(countersMap, clusterNum);
|
||||
if (counter % 1000 == 0) {
|
||||
FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true);
|
||||
builder.setLength(0);
|
||||
builderMap.put(clusterNum, builder);
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
scanner.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, StringBuilder> entry : builderMap.entrySet()) {
|
||||
int clusterNum = entry.getKey();
|
||||
StringBuilder builder = entry.getValue();
|
||||
FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
public void clusterPatchSourceCode() {
|
||||
String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE;
|
||||
String clusteredPatches = Configuration.CLUSTERED_PATCHES_FILE;
|
||||
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
|
||||
Map<Integer, StringBuilder> builderMap = new HashMap<>();
|
||||
Map<Integer, Integer> countersMap = new HashMap<>();
|
||||
try {
|
||||
fis = new FileInputStream(selectedPatches);
|
||||
scanner = new Scanner(fis);
|
||||
String singlePatch = "";
|
||||
int index = -1;
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
if ("".equals(line)) continue;
|
||||
if ("PATCH###".equals(line)) {
|
||||
if (!"".equals(singlePatch)) {
|
||||
int clusterNum = clusterResults.get(index);
|
||||
StringBuilder builder = getBuilder(builderMap, clusterNum);
|
||||
builder.append(singlePatch);
|
||||
int counter = getCounter(countersMap, clusterNum);
|
||||
if (counter % 1000 == 0) {
|
||||
FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true);
|
||||
builder.setLength(0);
|
||||
builderMap.put(clusterNum, builder);
|
||||
}
|
||||
}
|
||||
singlePatch = "";
|
||||
index ++;
|
||||
}
|
||||
singlePatch += line + "\n";
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
scanner.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, StringBuilder> entry : builderMap.entrySet()) {
|
||||
int clusterNum = entry.getKey();
|
||||
StringBuilder builder = entry.getValue();
|
||||
FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
private int getCounter(Map<Integer, Integer> countersMap, int clusterNum) {
|
||||
int counter = 1;
|
||||
if (countersMap.containsKey(clusterNum)) {
|
||||
counter += countersMap.get(clusterNum);
|
||||
}
|
||||
countersMap.put(clusterNum, counter);
|
||||
return counter;
|
||||
}
|
||||
|
||||
private StringBuilder getBuilder(Map<Integer, StringBuilder> builderMap, int clusterNum) {
|
||||
if (builderMap.containsKey(clusterNum)) {
|
||||
return builderMap.get(clusterNum);
|
||||
} else {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builderMap.put(clusterNum, builder);
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
public List<Integer> getClusterResults() {
|
||||
return clusterResults;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
public class ClusterResults {
|
||||
|
||||
/**
|
||||
* Read the cluster results from the file of cluster results.
|
||||
*
|
||||
* @param clusterResultsFile, the file of cluster results.
|
||||
* @return List<Integer>, each integer is a cluster number.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<Integer> readClusterResults(File clusterResultsFile) throws IOException {
|
||||
List<Integer> clusterResultsList = new ArrayList<>();
|
||||
String clusterResults = FileHelper.readFile(clusterResultsFile);
|
||||
BufferedReader reader = new BufferedReader(new StringReader(clusterResults));
|
||||
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
int cluster = Integer.parseInt(line);
|
||||
clusterResultsList.add(cluster);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
return clusterResultsList;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.utils.MapSorter;
|
||||
|
||||
public class CommonPatterns {
|
||||
|
||||
private static final int LEAST_NUMBER = 100;
|
||||
private int totalNumberofTrainingData = 0;
|
||||
|
||||
public Map<Integer, Integer> identifyCommonPatterns(List<Integer> clusterResults) {
|
||||
Map<Integer, List<Integer>> clusterMap = DataPreparation.readClusterResult(clusterResults);
|
||||
// TODO how to select the common patterns, number or ratio?
|
||||
List<Integer> commonClusterNum = getCommonClustersByNumber(clusterMap); // Integer: clusterNum.
|
||||
|
||||
Map<Integer, Integer> clusterNumMapLabel = new HashMap<>(); // <ClusterNum, Label for supervised learning>
|
||||
for (int i = 0, size = commonClusterNum.size(); i < size; i ++) {
|
||||
clusterNumMapLabel.put(commonClusterNum.get(i), i);
|
||||
}
|
||||
|
||||
return clusterNumMapLabel;
|
||||
}
|
||||
|
||||
private List<Integer> getCommonClustersByNumber(Map<Integer, List<Integer>> clusterMap) {
|
||||
List<Integer> commonClusterNum = new ArrayList<>();
|
||||
|
||||
for (Map.Entry<Integer, List<Integer>> entry : clusterMap.entrySet()) {
|
||||
List<Integer> elements = entry.getValue();
|
||||
int size = elements.size();
|
||||
if (size >= LEAST_NUMBER) { // TODO how to set this threshold?
|
||||
commonClusterNum.add(entry.getKey());
|
||||
totalNumberofTrainingData += size;
|
||||
}
|
||||
}
|
||||
|
||||
return commonClusterNum;
|
||||
}
|
||||
|
||||
private List<Integer> getCommonClustersByRatio(Map<Integer, List<Integer>> clusterMap, List<Integer> clusterResults) {
|
||||
List<Integer> commonClusterNum = new ArrayList<>();
|
||||
|
||||
double sizes = (double) clusterResults.size();
|
||||
Map<Integer, Double> ratios = new HashMap<>();
|
||||
for (Map.Entry<Integer, List<Integer>> entry : clusterMap.entrySet()) {
|
||||
List<Integer> elements = entry.getValue();
|
||||
ratios.put(entry.getKey(), (double) elements.size() / sizes);
|
||||
}
|
||||
MapSorter<Integer, Double> sorter = new MapSorter<Integer, Double>();
|
||||
ratios = sorter.sortByValueDescending(ratios);
|
||||
double counterRatio = 0.0;
|
||||
for (Map.Entry<Integer, Double> entry : ratios.entrySet()) {
|
||||
counterRatio += entry.getValue();
|
||||
commonClusterNum.add(entry.getKey());
|
||||
totalNumberofTrainingData += clusterMap.get(entry.getKey()).size();
|
||||
if (counterRatio >= 0.8) { // TODO: how to set the value of this threshold?
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return commonClusterNum;
|
||||
}
|
||||
|
||||
public int getTotalNumberofTrainingData() {
|
||||
return totalNumberofTrainingData;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,560 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.DataPrepare;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.MaxSizeSelector.MaxSizeType;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.data.DataPreparer;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
/**
|
||||
* Prepare data for fix patterns mining and evaluation.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class DataPreparation {
|
||||
|
||||
/**
|
||||
* Prepare data for token embedding in the process of fix patterns mining.
|
||||
*/
|
||||
public static void prepareDataForTokenEmbedding() {
|
||||
// Collect all data into one file.
|
||||
String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE_PATH;
|
||||
String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH;
|
||||
String buggyTokensFilePath = Configuration.BUGGYTREE_FILE_PATH;
|
||||
String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH;
|
||||
|
||||
String editScriptsFile = Configuration.EDITSCRIPT_SIZES_FILE;
|
||||
String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE;
|
||||
String buggyTokensFile = Configuration.BUGGY_CODY_TOKENS_FILE;
|
||||
String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE;
|
||||
File file = new File(editScriptsFilePath);
|
||||
File[] subFiles = file.listFiles();
|
||||
|
||||
// Merge results of parsed patches.
|
||||
for (File subFile : subFiles) {
|
||||
String fileName = subFile.getName(); // edistScripts file
|
||||
String id = fileName.substring(fileName.lastIndexOf("_"));
|
||||
FileHelper.outputToFile(editScriptsFile, FileHelper.readFile(subFile), true);
|
||||
String patchesSourceCode = patchesSourceCodeFilePath + "patches" + id;
|
||||
FileHelper.outputToFile(patchesSourceCodeFile, FileHelper.readFile(patchesSourceCode), true);
|
||||
String sizes = editScriptSizesFile + "sizes" + id;
|
||||
FileHelper.outputToFile(editScriptSizesFilePath, FileHelper.readFile(sizes), true);
|
||||
String buggyTokens = buggyTokensFilePath + "tokens" + id;
|
||||
FileHelper.outputToFile(buggyTokensFile, FileHelper.readFile(buggyTokens), true);
|
||||
}
|
||||
|
||||
|
||||
// Select data by the size of edit script vectors.
|
||||
List<Integer> sizesList;
|
||||
try {
|
||||
sizesList = MaxSizeSelector.readSizes(editScriptSizesFile);
|
||||
int maxSize = MaxSizeSelector.selectMaxSize(MaxSizeType.ThirdQuartile, sizesList);
|
||||
List<Integer> outlierIndexes = new ArrayList<>();
|
||||
for (int i = 0, size = sizesList.size(); i < size; i ++) {
|
||||
if (sizesList.get(i) > maxSize) {
|
||||
outlierIndexes.add(i);
|
||||
}
|
||||
}
|
||||
FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS, "" + maxSize, false);
|
||||
|
||||
selectData(editScriptsFile, outlierIndexes, Configuration.SELECTED_EDITSCRIPTES_FILE);
|
||||
selectData(patchesSourceCodeFile, outlierIndexes, Configuration.PATCH_SIGNAL, Configuration.SELECTED_PATCHES_SOURE_CODE_FILE);
|
||||
int maxTokenVectorSize = selectDataOfSourceCodeTokens(buggyTokensFile, outlierIndexes, Configuration.SELECTED_BUGGY_TOKEN_FILE);
|
||||
FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE, "" + maxTokenVectorSize, false);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void selectData(String intputFile, List<Integer> outlierIndexList, String outputFile) {
|
||||
List<Integer> outlierIndexes = new ArrayList<>();
|
||||
outlierIndexes.addAll(outlierIndexList);
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
try {
|
||||
fis = new FileInputStream(intputFile);
|
||||
scanner = new Scanner(fis);
|
||||
int index = 0;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int counter = 0;
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
if (outlierIndexes.contains(index)) {
|
||||
outlierIndexes.remove(new Integer(index));
|
||||
} else {
|
||||
builder.append(line + "\n");
|
||||
if (++ counter % 100000 == 0) {
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (scanner != null) {
|
||||
scanner.close();
|
||||
scanner = null;
|
||||
}
|
||||
if (fis != null) {
|
||||
fis.close();
|
||||
fis = null;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void selectData(String inputFile, List<Integer> outlierIndexes, String startingSignal, String outputFile) {
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
try {
|
||||
fis = new FileInputStream(inputFile);
|
||||
scanner = new Scanner(fis);
|
||||
int index = -1;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int counter = 0;
|
||||
String singleEntity = "";
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
if (line.equals(startingSignal)) {
|
||||
if (!"".equals(singleEntity)) {
|
||||
if (outlierIndexes.contains(index)) {
|
||||
outlierIndexes.remove(new Integer(index));
|
||||
} else {
|
||||
builder.append(singleEntity + "\n");
|
||||
if (++ counter % 100000 == 0) {
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
singleEntity = "";
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
singleEntity += line + "\n";
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (scanner != null) {
|
||||
scanner.close();
|
||||
scanner = null;
|
||||
}
|
||||
if (fis != null) {
|
||||
fis.close();
|
||||
fis = null;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int selectDataOfSourceCodeTokens(String inputFile, List<Integer> outlierIndexList, String outputFile) {
|
||||
List<Integer> outlierIndexes = new ArrayList<>();
|
||||
outlierIndexes.addAll(outlierIndexList);
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
int size = 0;
|
||||
try {
|
||||
fis = new FileInputStream(inputFile);
|
||||
scanner = new Scanner(fis);
|
||||
int index = 0;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int counter = 0;
|
||||
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
if (outlierIndexes.contains(index)) {
|
||||
outlierIndexes.remove(new Integer(index));
|
||||
} else {
|
||||
builder.append(line + "\n");
|
||||
if (++ counter % 100000 == 0) {
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
String[] tokens = line.split(" ");
|
||||
if (tokens.length > size) size = tokens.length;
|
||||
}
|
||||
index ++;
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
if (scanner != null) {
|
||||
scanner.close();
|
||||
scanner = null;
|
||||
}
|
||||
if (fis != null) {
|
||||
fis.close();
|
||||
fis = null;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare data for feature learning.
|
||||
*/
|
||||
public static void prepareDataForFeatureLearning() {
|
||||
String zeroVector = "";
|
||||
for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1 - 1; i < length; i ++) {
|
||||
zeroVector += "0, ";
|
||||
}
|
||||
zeroVector += "0";
|
||||
int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim());
|
||||
|
||||
String embeddedTokensFile = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
|
||||
Map<String, String> embeddedTokens = readEmbeddedTokens(embeddedTokensFile);
|
||||
|
||||
String editScriptsFile = Configuration.SELECTED_EDITSCRIPTES_FILE;
|
||||
String outputFile = Configuration.VECTORIED_EDIT_SCRIPTS;
|
||||
dataPrepare(editScriptsFile, maxSize, outputFile, embeddedTokens, zeroVector);
|
||||
}
|
||||
|
||||
private static Map<String, String> readEmbeddedTokens(String embeddedTokensFile) {
|
||||
Map<String, String> embeddedTokens = new HashMap<>();
|
||||
File file = new File(embeddedTokensFile);
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
try {
|
||||
fis = new FileInputStream(file);
|
||||
scanner = new Scanner(fis);
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
int firstBlankIndex = line.indexOf(" ");
|
||||
String token = line.substring(0, firstBlankIndex);
|
||||
String value = line.substring(firstBlankIndex + 1).replaceAll(" ", ", ");
|
||||
embeddedTokens.put(token, value);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
scanner.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
return embeddedTokens;
|
||||
}
|
||||
|
||||
private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map<String, String> embeddedTokens, String zeroVector) {
|
||||
File file = new File(inputFile);
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int counter = 0;
|
||||
|
||||
try {
|
||||
fis = new FileInputStream(file);
|
||||
scanner = new Scanner(fis);
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector);
|
||||
builder.append(vectorStr);
|
||||
if (++ counter % 10000 == 0) {
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
scanner.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
|
||||
private static StringBuilder convertToVector(Map<String, String> embeddedTokens, String line, int maxSize, String zeroVector) {
|
||||
String[] tokens = line.split(" ");
|
||||
StringBuilder vectorStr = new StringBuilder();
|
||||
int length = tokens.length;
|
||||
if (length == maxSize) {
|
||||
for (int i = 0; i < length - 1; i ++) {
|
||||
String token = tokens[i];
|
||||
vectorStr.append(embeddedTokens.get(token) + ", ");
|
||||
}
|
||||
vectorStr.append(embeddedTokens.get(tokens[length - 1]) + "\n");
|
||||
} else {
|
||||
for (int i = 0; i < length; i ++) {
|
||||
String token = tokens[i];
|
||||
vectorStr.append(embeddedTokens.get(token) + ", ");
|
||||
}
|
||||
for (int i = length; i < maxSize - 1; i ++) {
|
||||
vectorStr.append(zeroVector + ", ");
|
||||
}
|
||||
vectorStr.append(zeroVector + "\n");
|
||||
}
|
||||
|
||||
return vectorStr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare data for clustering.
|
||||
*/
|
||||
public static void prepareDataForClustering() {
|
||||
String featureFile = Configuration.EXTRACTED_FEATURES + "vectorizedEditScripts.csv";
|
||||
String arffFile = Configuration.CLUSTER_INPUT;
|
||||
DataPreparer.prepareData(featureFile, arffFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read cluster results.
|
||||
*/
|
||||
public static List<Integer> readClusterResults() {
|
||||
List<Integer> clusterResults = new ArrayList<>();
|
||||
String clusterResultsFile = Configuration.CLUSTER_OUTPUT;
|
||||
String results = FileHelper.readFile(clusterResultsFile);
|
||||
BufferedReader reader = null;
|
||||
try {
|
||||
reader = new BufferedReader(new StringReader(results));
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
clusterResults.add(Integer.parseInt(line));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return clusterResults;
|
||||
}
|
||||
|
||||
public static Map<Integer, List<Integer>> readClusterResult(List<Integer> clusterResults) {
|
||||
Map<Integer, List<Integer>> clusters = new HashMap<>();
|
||||
|
||||
for (int i = 0, size = clusterResults.size(); i < size; i ++) {
|
||||
int clusterNo = clusterResults.get(i);
|
||||
if (clusters.containsKey(clusterNo)) {
|
||||
clusters.get(clusterNo).add(i + 1);
|
||||
} else {
|
||||
List<Integer> newCLuster = new ArrayList<>();
|
||||
newCLuster.add(i + 1);
|
||||
clusters.put(clusterNo, newCLuster);
|
||||
}
|
||||
}
|
||||
|
||||
return clusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Data for un-supervised learning.
|
||||
*/
|
||||
public static void prepareTokensForEvaluation1() {
|
||||
String outputFile = Configuration.EMBEDDING_DATA_TOKENS1;
|
||||
FileHelper.outputToFile(outputFile, FileHelper.readFile(Configuration.SELECTED_BUGGY_TOKEN_FILE), false);
|
||||
List<File> files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
|
||||
for (File file : files) {
|
||||
FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void prepareDataForFeatureLearningOfEvaluation1() {
|
||||
String zeroVector = "";
|
||||
for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) {
|
||||
zeroVector += "0, ";
|
||||
}
|
||||
zeroVector += "0";
|
||||
int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
|
||||
|
||||
String allEmbeddedTokens = Configuration.EMBEDDED_ALL_TOKENS1;
|
||||
Map<String, String> embeddedTokens = readEmbeddedTokens(allEmbeddedTokens);
|
||||
|
||||
// Testing data
|
||||
String clusteredTokens = Configuration.TEST_DATA_FILE;
|
||||
List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
|
||||
for (File file : files) {
|
||||
|
||||
}
|
||||
String allTokensOfSourceCode = Configuration.EMBEDDING_DATA_TOKENS1; // TODO testing data should be separated.
|
||||
dataPrepare(allTokensOfSourceCode, maxSize, Configuration.VECTORIED_ALL_SOURCE_CODE1, embeddedTokens, zeroVector);
|
||||
}
|
||||
|
||||
/**
|
||||
* Data for supervised learning.
|
||||
*/
|
||||
public static void prepareTokensForEvaluation2(Map<Integer, Integer> commonClustersMappingLabel) {
|
||||
String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
|
||||
String outputFile = Configuration.EMBEDDING_DATA_TOKENS2;
|
||||
|
||||
List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
|
||||
for (File file : files) {
|
||||
String fileName = file.getName();
|
||||
String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list"));
|
||||
int clusterNum = Integer.parseInt(clusterNumStr);
|
||||
if (commonClustersMappingLabel.containsKey(clusterNum)) {
|
||||
String content = FileHelper.readFile(file);
|
||||
FileHelper.outputToFile(outputFile, content, true);
|
||||
}
|
||||
}
|
||||
files.clear();
|
||||
files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
|
||||
for (File file : files) {
|
||||
FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void prepareDataForFeatureLearningOfEvaluation2(Map<Integer, Integer> commonClustersMappingLabel) {
|
||||
String zeroVector = "";
|
||||
for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) {
|
||||
zeroVector += "0, ";
|
||||
}
|
||||
zeroVector += "0";
|
||||
|
||||
String allEmbeddedTokensOfEvaluation = Configuration.EMBEDDED_ALL_TOKENS2;
|
||||
Map<String, String> embeddedTokens = readEmbeddedTokens(allEmbeddedTokensOfEvaluation);
|
||||
|
||||
int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
|
||||
// Training data
|
||||
String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
|
||||
List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
|
||||
for (File file : files) {
|
||||
String fileName = file.getName();
|
||||
String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list"));
|
||||
int clusterNum = Integer.parseInt(clusterNumStr);
|
||||
if (commonClustersMappingLabel.containsKey(clusterNum)) {
|
||||
dataPrepare(file.getPath(), maxSize, Configuration.TRAINING_DATA, embeddedTokens, zeroVector, clusterNum);
|
||||
}
|
||||
}
|
||||
// Testing data
|
||||
files.clear();
|
||||
String testingData = Configuration.TEST_DATA_FILE;
|
||||
files = FileHelper.getAllFilesInCurrentDiectory(testingData, ".list");
|
||||
String testingDataPath = Configuration.TESTING_DATA;
|
||||
for (File file : files) {
|
||||
String fileName = file.getName();
|
||||
fileName.replace(".list", ".csv");
|
||||
dataPrepare(file.getPath(), maxSize, testingDataPath + fileName, embeddedTokens, zeroVector, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map<String, String> embeddedTokens,
|
||||
String zeroVector, int clusterNum) {
|
||||
FileInputStream fis = null;
|
||||
Scanner scanner = null;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
int counter = 0;
|
||||
|
||||
try {
|
||||
fis = new FileInputStream(inputFile);
|
||||
scanner = new Scanner(fis);
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector, clusterNum);
|
||||
builder.append(vectorStr);
|
||||
if (++ counter % 10000 == 0) {
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
scanner.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputFile, builder, true);
|
||||
builder.setLength(0);
|
||||
}
|
||||
|
||||
private static StringBuilder convertToVector(Map<String, String> embeddedTokens, String line, int maxSize, String zeroVector, int clusterNum) {
|
||||
String[] tokens = line.split(" ");
|
||||
StringBuilder vectorStr = new StringBuilder();
|
||||
int length = tokens.length;
|
||||
if (length == maxSize) {
|
||||
for (int i = 0; i < length; i ++) {
|
||||
String token = tokens[i];
|
||||
vectorStr.append(embeddedTokens.get(token) + ", ");
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < length; i ++) {
|
||||
String token = tokens[i];
|
||||
vectorStr.append(embeddedTokens.get(token) + ", ");
|
||||
}
|
||||
for (int i = length; i < maxSize; i ++) {
|
||||
vectorStr.append(zeroVector + ", ");
|
||||
}
|
||||
}
|
||||
|
||||
vectorStr.append(clusterNum + "\n");
|
||||
|
||||
return vectorStr;
|
||||
}
|
||||
|
||||
public static Map<Integer, Integer> readCommonCLusters() {
|
||||
Map<Integer, Integer> commonClustersMappingLabel = new HashMap<>();
|
||||
String commonClusters = FileHelper.readFile(Configuration.CLUSTERNUMBER_LABEL_MAP);
|
||||
BufferedReader reader = null;
|
||||
try {
|
||||
reader = new BufferedReader(new StringReader(commonClusters));
|
||||
String line = reader.readLine();
|
||||
while ((line = reader.readLine()) != null) {
|
||||
String[] strArray = line.split(" : ");
|
||||
int key = Integer.parseInt(strArray[1]);
|
||||
int value = Integer.parseInt(strArray[0]);
|
||||
commonClustersMappingLabel.put(key, value);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return commonClustersMappingLabel;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
package edu.lu.uni.serval.FixPatternMining.DataPrepare;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
import edu.lu.uni.serval.utils.ListSorter;
|
||||
|
||||
public class MaxSizeSelector {
|
||||
|
||||
public enum MaxSizeType {
|
||||
UpperWhisker, ThirdQuartile
|
||||
}
|
||||
|
||||
public static List<Integer> readSizes(String sizeFilePath) throws IOException {
|
||||
List<Integer> sizes = new ArrayList<>();
|
||||
String sizesStr = FileHelper.readFile(sizeFilePath);
|
||||
BufferedReader br = new BufferedReader(new StringReader(sizesStr));
|
||||
String line = null;
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
sizes.add(Integer.parseInt(line.trim()));
|
||||
}
|
||||
|
||||
return sizes;
|
||||
}
|
||||
|
||||
public static int selectMaxSize(MaxSizeType maxSizeType, List<Integer> sizesDistribution) {
|
||||
int maxSize = 0;
|
||||
switch (maxSizeType) {
|
||||
case UpperWhisker:
|
||||
maxSize = upperWhisker(sizesDistribution);
|
||||
break;
|
||||
case ThirdQuartile:
|
||||
maxSize = thirdQuarter(sizesDistribution);
|
||||
break;
|
||||
}
|
||||
return maxSize;
|
||||
}
|
||||
|
||||
private static int upperWhisker(List<Integer> sizesDistribution) {
|
||||
List<Integer> sizes = new ArrayList<>();
|
||||
sizes.addAll(sizesDistribution);
|
||||
ListSorter<Integer> sorter = new ListSorter<Integer>(sizes);
|
||||
sizesDistribution = sorter.sortAscending();
|
||||
int firstQuarterIndex = sizesDistribution.size() * 25 / 100;
|
||||
int firstQuarter = sizesDistribution.get(firstQuarterIndex);
|
||||
int thirdQuarterIndex = sizesDistribution.size() * 75 / 100;
|
||||
int thirdQuarter = sizesDistribution.get(thirdQuarterIndex);
|
||||
int upperWhisker = thirdQuarter + (int) (1.5 * (thirdQuarter - firstQuarter));
|
||||
return upperWhisker;
|
||||
}
|
||||
|
||||
private static int thirdQuarter(List<Integer> sizesDistribution) {
|
||||
List<Integer> sizes = new ArrayList<>();
|
||||
sizes.addAll(sizesDistribution);
|
||||
ListSorter<Integer> sorter = new ListSorter<Integer>(sizes);
|
||||
sizesDistribution = sorter.sortAscending();
|
||||
int thirdQuarterIndex = sizesDistribution.size() * 75 / 100;
|
||||
int thirdQuarter = sizesDistribution.get(thirdQuarterIndex);
|
||||
return thirdQuarter;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.deeplearner.CNNFeatureExtractor2;
|
||||
import edu.lu.uni.serval.deeplearner.CNNSupervisedLearning;
|
||||
import edu.lu.uni.serval.utils.FileHelper;
|
||||
|
||||
public class FeatureLearner {
|
||||
|
||||
/**
|
||||
* Learn features of edit scripts for fix patterns mining.
|
||||
*/
|
||||
public void learnFeatures() {
|
||||
String editScriptsVectorFile = Configuration.VECTORIED_EDIT_SCRIPTS; // input
|
||||
int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim());
|
||||
int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1;
|
||||
int batchSize = 1000;
|
||||
int sizeOfFeatureVector = 200;
|
||||
|
||||
try {
|
||||
CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(editScriptsVectorFile), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfFeatureVector);
|
||||
learner.setNumberOfEpochs(20);
|
||||
learner.setSeed(123);
|
||||
learner.setNumOfOutOfLayer1(20);
|
||||
learner.setNumOfOutOfLayer2(50);
|
||||
learner.setOutputPath(Configuration.EXTRACTED_FEATURES);
|
||||
|
||||
learner.extracteFeaturesWithCNN();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void learnFeaturesOfSourceCode() {
|
||||
int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
|
||||
int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
|
||||
int batchSize = 1000;
|
||||
int sizeOfExtractedFeatureVector = 200;
|
||||
|
||||
try {
|
||||
CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(Configuration.VECTORIED_ALL_SOURCE_CODE1), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector);
|
||||
learner.setNumberOfEpochs(20);
|
||||
learner.setSeed(123);
|
||||
learner.setNumOfOutOfLayer1(20);
|
||||
learner.setNumOfOutOfLayer2(50);
|
||||
learner.setOutputPath(Configuration.EXTRACTED_FEATURES_EVALUATION);
|
||||
|
||||
learner.extracteFeaturesWithCNN();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Supervised learning.
|
||||
*/
|
||||
public void learnFeaturesOfSourceCode2(File testingData) {
|
||||
int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
|
||||
int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
|
||||
int batchSize = 1000;
|
||||
int sizeOfExtractedFeatureVector = 200;
|
||||
|
||||
try {
|
||||
int clusterNum = DataPreparation.readCommonCLusters().size();
|
||||
File trainingData = new File(Configuration.TRAINING_DATA);
|
||||
CNNSupervisedLearning learner = new CNNSupervisedLearning(trainingData, sizeOfVector,
|
||||
sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector, clusterNum, testingData);
|
||||
learner.setNumberOfEpochs(20);
|
||||
learner.setSeed(123);
|
||||
learner.setNumOfOutOfLayer1(20);
|
||||
learner.setNumOfOutOfLayer2(50);
|
||||
learner.setOutputPath(Configuration.FEATURES_OF_TRAINING_DATA);
|
||||
learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA);
|
||||
learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA);
|
||||
learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA);
|
||||
learner.setModelFile(Configuration.SUPERVISED_LEARNING_MODEL);
|
||||
learner.extracteFeaturesWithCNN();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Supervised learning by loading a model.
|
||||
*/
|
||||
public void learnFeaturesOfSourceCode3(File testingData) {
|
||||
int batchSize = 1000;
|
||||
|
||||
try {
|
||||
String modelFile = Configuration.SUPERVISED_LEARNING_MODEL;
|
||||
CNNSupervisedLearning learner = new CNNSupervisedLearning(batchSize, testingData, modelFile);
|
||||
learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA);
|
||||
learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA);
|
||||
learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA);
|
||||
learner.extracteFeaturesWithCNNByLoadingModel();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
package edu.lu.uni.serval.FixPatternMining;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import edu.lu.uni.serval.config.Configuration;
|
||||
import edu.lu.uni.serval.deeplearner.Word2VecEncoder;
|
||||
|
||||
/**
|
||||
* Encode tokens of edit scripts with Word2Vec.
|
||||
*
|
||||
* @author kui.liu
|
||||
*
|
||||
*/
|
||||
public class TokenEmbedder {
|
||||
|
||||
/**
|
||||
* Embed tokens for fix patterns mining.
|
||||
*/
|
||||
public void embedTokensOfEditScripts() {
|
||||
Word2VecEncoder encoder = new Word2VecEncoder();
|
||||
int windowSize = 2;
|
||||
encoder.setWindowSize(windowSize);
|
||||
try {
|
||||
File inputFile = new File(Configuration.SELECTED_EDITSCRIPTES_FILE);
|
||||
int minWordFrequency = 1;
|
||||
int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1;
|
||||
String outputFileName = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
|
||||
encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void embedTokensOfSourceCodeForSupervisedTesting() {
|
||||
Word2VecEncoder encoder = new Word2VecEncoder();
|
||||
int windowSize = 2;
|
||||
encoder.setWindowSize(windowSize);
|
||||
try {
|
||||
File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS2);
|
||||
int minWordFrequency = 1;
|
||||
int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
|
||||
String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2;
|
||||
encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void embedTokensOfSourceCodeForUnsupervisedTesting() {
|
||||
Word2VecEncoder encoder = new Word2VecEncoder();
|
||||
int windowSize = 2;
|
||||
encoder.setWindowSize(windowSize);
|
||||
try {
|
||||
File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS1);
|
||||
int minWordFrequency = 1;
|
||||
int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
|
||||
String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1;
|
||||
encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -24,13 +24,13 @@ public class Configuration {
|
||||
public static final String BUGGY_CODY_TOKENS_FILE = GUM_TREE_OUTPUT + "tokens.list";
|
||||
public static final String EDITSCRIPT_SIZES_FILE = GUM_TREE_OUTPUT + "editScriptSizes.list";
|
||||
|
||||
public static int MAX_EDIT_SCRIPT_VECTOR_SIZE = 0; // The max size of edit script vectors.
|
||||
public static int MAX_SOURCE_CODE_TOKEN_VECTOR_SIZE = 0; // The max size of all buggy source code token vectors.
|
||||
public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN1 = 100; // tokens of edit scripts.
|
||||
public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN2 = 200; // tokens of source code
|
||||
|
||||
// the input path of fix patterns mining.
|
||||
private static final String MINING_INPUT = ROOT_PATH + "MiningInput/";
|
||||
public static final String MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS = MINING_INPUT + "/MaxTokenVectorSizeOfEditScripts.list"; // The max size of edit scripts: upper limitation of max size.
|
||||
public static final String MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE = MINING_INPUT + "/MaxTokenVectorSizeOfBuggySourceCode.list"; // The max size of all buggy source code token vectors.
|
||||
// the input path of token embedding.
|
||||
public static final String EMBEDDING_INPUT = MINING_INPUT + "Embedding/";
|
||||
public static final String SELECTED_PATCHES_SOURE_CODE_FILE = EMBEDDING_INPUT + "patchSourceCode.list";// Selected patches.
|
||||
@@ -42,7 +42,7 @@ public class Configuration {
|
||||
public static final String EMBEDDED_EDIT_SCRIPT_TOKENS = FEATURE_LEARNING_INPUT + "embeddedEditScriptTokens.list"; // All embedded tokens of selected edit scripts.
|
||||
public static final String VECTORIED_EDIT_SCRIPTS = FEATURE_LEARNING_INPUT + "vectorizedEditScripts.csv"; // Embedded and vectorized edit script vectors.
|
||||
// the input path of clustering.
|
||||
public static final String EXTRACTED_FEATURES = MINING_INPUT + "ExtractedFeatures/"; // Extracted features of edit scripts.
|
||||
public static final String EXTRACTED_FEATURES = MINING_INPUT + "ExtractedFeatures/"; // Extracted features of all edit scripts.
|
||||
public static final String CLUSTER_INPUT = MINING_INPUT + "ClusteringInput/input.arff";
|
||||
|
||||
// the output path of fix patterns mining.
|
||||
@@ -53,20 +53,28 @@ public class Configuration {
|
||||
|
||||
// evaluation data
|
||||
public static final String TEST_INPUT = ROOT_PATH + "TestProjects/";
|
||||
public static final String TEST_LOCALIZATION_FILE = ROOT_PATH + "TestData/Localization.list"; // Positions of all test statements.
|
||||
public static final String TEST_DATA_FILE = ROOT_PATH + "TestData/TestStatements.list"; // Token vectors of all test statements.
|
||||
public static final String TEST_POSITION_FILE = ROOT_PATH + "TestData/Positions/"; // Positions of all test statements.
|
||||
public static final String TEST_DATA_FILE = ROOT_PATH + "TestData/TestStatements/"; // Token vectors of all test statements.
|
||||
|
||||
public static final String NUMBER_OF_TRAINING_DATA = ROOT_PATH + "TestData/NumberOfTrainingData.list";;
|
||||
|
||||
// data of unsupervised learning
|
||||
public static final String EMBEDDING_DATA_TOKENS1 = ROOT_PATH + "TestData/AllTokenVectorsForEvaluation.list";
|
||||
public static final String EMBEDDED_ALL_TOKENS1 = ROOT_PATH + "TestData/AllEmbeddedTokens.list";
|
||||
public static final String VECTORIED_ALL_SOURCE_CODE1 = ROOT_PATH + "TestData/AllVectorizedSourceCode.list";
|
||||
public static final String EXTRACTED_FEATURES_TESTING = ROOT_PATH + "TestDataExtractedFeatures/";
|
||||
public static final String VECTORIED_ALL_SOURCE_CODE1 = ROOT_PATH + "TestData/AllVectorizedSourceCode/";
|
||||
public static final String EXTRACTED_FEATURES_EVALUATION = ROOT_PATH + "TestDataExtractedFeatures/"; // extracted features of all source code (training data and testing data)
|
||||
|
||||
// Data of supervised learning
|
||||
public static final String CLUSTERNUMBER_LABEL_MAP = ROOT_PATH + "TestData/clusterMappingLabel.list";
|
||||
public static final String EMBEDDING_DATA_TOKENS2 = ROOT_PATH + "TestData/AllTokenVectorsForSupervisedEvaluation.list";
|
||||
public static final String EMBEDDED_ALL_TOKENS2 = ROOT_PATH + "TestData/AllEmbeddedTokensForSuperVisedEvaluation.list";
|
||||
public static final String TRAINING_DATA = ROOT_PATH + "TestData/TrainingData.csv"; // Training data of supervised learning
|
||||
public static final String TESTING_DATA = ROOT_PATH + "TestData/TestingData.csv"; // testing data of supervised learning
|
||||
|
||||
public static final String TESTING_DATA = ROOT_PATH + "TestData/SupervisedLearning/"; // testing data of supervised learning
|
||||
public static final String FEATURES_OF_TRAINING_DATA = ROOT_PATH + "TestingOutput/TraingFeatures/";
|
||||
public static final String FEATURES_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/TestingFeatures/";
|
||||
public static final String POSSIBILITIES_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/Posibilities/";
|
||||
public static final String PREDICTED_RESULTS_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/Prediction/";
|
||||
|
||||
public static final String SUPERVISED_LEARNING_MODEL = ROOT_PATH + "TestingOutput/SupervisedLearningModel.zip";
|
||||
|
||||
}
|
||||
|
||||
@@ -28,23 +28,17 @@ import edu.lu.uni.serval.utils.FileHelper;
|
||||
*/
|
||||
public class ProjectScanner {
|
||||
|
||||
public static void main(String[] args) {
|
||||
String inputPath = Configuration.TEST_INPUT; //test java projects
|
||||
File inputFileDirector = new File(inputPath);
|
||||
File[] projects = inputFileDirector.listFiles(); // project folders
|
||||
|
||||
String outputLocalizeFile = Configuration.TEST_LOCALIZATION_FILE;
|
||||
String outputTokensFile = Configuration.TEST_DATA_FILE;
|
||||
|
||||
private int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
|
||||
private int numberOfFiles = 0;
|
||||
private List<SimpleTree> allSimpleTrees = new ArrayList<>();
|
||||
|
||||
public void scanJavaProject(File[] projects, String outputLocalizeFile, String outputTokensFile, int limitation) {
|
||||
for (File project : projects) {
|
||||
ProjectScanner scanner = new ProjectScanner();
|
||||
scanner.scanJavaProject(project, outputLocalizeFile, outputTokensFile);
|
||||
scanJavaProject(project, outputLocalizeFile, outputTokensFile, limitation);
|
||||
}
|
||||
}
|
||||
|
||||
List<SimpleTree> allSimpleTrees = new ArrayList<>();
|
||||
|
||||
public void scanJavaProject(File javaProject, String outputLocalizeFile, String outputTokensFile) {
|
||||
public void scanJavaProject(File javaProject, String outputLocalizeFile, String outputTokensFile, int limitation) {
|
||||
List<File> files = new ArrayList<>();
|
||||
files.addAll(FileHelper.getAllFiles(javaProject.getPath(), ".java"));
|
||||
|
||||
@@ -60,19 +54,24 @@ public class ProjectScanner {
|
||||
CUCreator cuCreator = new CUCreator();
|
||||
CompilationUnit cUnit = cuCreator.createCompilationUnit(file);
|
||||
getTokenVectorOfAllStatements(tree, cUnit, tokensBuilder, localizationsBuilder, javaProject.getPath(), file.getPath());
|
||||
|
||||
if (++ counter % 1000 == 0) {
|
||||
FileHelper.outputToFile(outputLocalizeFile, localizationsBuilder, true);
|
||||
FileHelper.outputToFile(outputTokensFile, tokensBuilder, true);
|
||||
++ counter;
|
||||
|
||||
if ( counter % limitation == 0) {
|
||||
numberOfFiles ++;
|
||||
FileHelper.outputToFile(outputLocalizeFile + "Positions" + numberOfFiles + ".list", localizationsBuilder, true);
|
||||
FileHelper.outputToFile(outputTokensFile + "Tokens" + numberOfFiles + ".list", tokensBuilder, true);
|
||||
localizationsBuilder.setLength(0);
|
||||
tokensBuilder.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
FileHelper.outputToFile(outputLocalizeFile, localizationsBuilder, true);
|
||||
FileHelper.outputToFile(outputTokensFile, tokensBuilder, true);
|
||||
localizationsBuilder.setLength(0);
|
||||
tokensBuilder.setLength(0);
|
||||
if (localizationsBuilder.length() > 0) {
|
||||
numberOfFiles ++;
|
||||
FileHelper.outputToFile(outputLocalizeFile + "Positions" + numberOfFiles + ".list", localizationsBuilder, true);
|
||||
FileHelper.outputToFile(outputTokensFile + "Tokens" + numberOfFiles + ".list", tokensBuilder, true);
|
||||
localizationsBuilder.setLength(0);
|
||||
tokensBuilder.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
private void getTokenVectorOfAllStatements(ITree tree, CompilationUnit unit, StringBuilder tokensBuilder, StringBuilder localizationsBuilder, String projectName, String filePath) {
|
||||
@@ -98,7 +97,8 @@ public class ProjectScanner {
|
||||
// project name: file name: line number
|
||||
String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim();
|
||||
String[] tokensArray = tokens.split(" ");
|
||||
if (tokensArray.length <= Configuration.MAX_SOURCE_CODE_TOKEN_VECTOR_SIZE) {
|
||||
|
||||
if (tokensArray.length <= maxSize) {
|
||||
int position = tree.getPos();
|
||||
int lineNum = unit.getLineNumber(position);
|
||||
tokensBuilder.append(tokens).append("\n");
|
||||
@@ -183,4 +183,5 @@ public class ProjectScanner {
|
||||
simpleTree.setParent(parent);
|
||||
return simpleTree;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user