Add deep learning models.

2017-08-02 23:49:27 +02:00
parent cbddfdec34
commit baf6b06eed
26 changed files with 1761 additions and 31 deletions
@@ -16,6 +16,18 @@
 	</properties>

 	<dependencies>
+		<dependency>
+			<groupId>edu.lu.uni.serval</groupId>
+			<artifactId>MyCluster</artifactId>
+			<version>0.0.1-SNAPSHOT</version>
+		</dependency>
+		
+		<dependency>
+			<groupId>edu.lu.uni.serval</groupId>
+			<artifactId>MyFeatureLearner</artifactId>
+			<version>0.0.1-SNAPSHOT</version>
+		</dependency>
+		
 		<dependency>
 			<groupId>edu.lu.uni</groupId>
 			<artifactId>simple-utils</artifactId>
@@ -0,0 +1,24 @@
+package edu.lu.uni.serval.FixPattern.info;
+
+import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet;
+import edu.lu.uni.serval.gumtree.regroup.SimpleTree;
+
+public class FixPattern {
+	private SimpleTree buggyCodeTree;          // it will be used to compute the similarity.
+	private HierarchicalActionSet editScripts; // it will be used to generate new patches.
+	
+	public SimpleTree getBuggyCodeTree() {
+		return buggyCodeTree;
+	}
+	
+	public HierarchicalActionSet getEditScripts() {
+		return editScripts;
+	}
+	
+	public FixPattern(SimpleTree buggyCodeTree, HierarchicalActionSet editScripts) {
+		super();
+		this.buggyCodeTree = buggyCodeTree;
+		this.editScripts = editScripts;
+	}
+	
+}
@@ -0,0 +1,163 @@
+package edu.lu.uni.serval.FixPattern.info;
+
+//import java.io.File;
+//import java.io.IOException;
+//import java.util.ArrayList;
+//import java.util.HashMap;
+//import java.util.List;
+//import java.util.Map;
+
+import org.eclipse.jdt.core.dom.ASTParser;
+//import org.slf4j.Logger;
+//import org.slf4j.LoggerFactory;
+
+//import com.github.gumtreediff.actions.ActionGenerator;
+//import com.github.gumtreediff.actions.model.Action;
+//import com.github.gumtreediff.gen.jdt.JdtTreeGenerator;
+//import com.github.gumtreediff.gen.jdt.cd.CdJdtTreeGenerator;
+//import com.github.gumtreediff.matchers.Matcher;
+//import com.github.gumtreediff.matchers.Matchers;
+import com.github.gumtreediff.tree.ITree;
+import com.github.gumtreediff.tree.TreeContext;
+
+import edu.lu.uni.serval.FixPattern.utils.ASTNodeMap;
+import edu.lu.uni.serval.gen.jdt.exp.ExpJdtTreeGenerator;
+
+@Deprecated
+public class GumTreeAnalysis {
+	
+
+//	private static void analyzeBugFixes(String gitRepoPath, String outputPath) {
+//		log.info("Repo: " + gitRepoPath);
+//		
+//		GitTraveller gitTraveller = new GitTraveller(gitRepoPath, outputPath);
+//        gitTraveller.travelGitRepo();
+////        Map<String, List<CommitFile>> commitFiles = gitTraveller.getCommitFiles();
+//
+//        List<MyDiffEntry> allDiffEntries = gitTraveller.getAllDiffEntries();
+//        String previousFilesPath = gitTraveller.getPreviousFilesPath();
+//        String revisedFilesPath = gitTraveller.getRevisedFilesPath();
+//        for (MyDiffEntry diff : allDiffEntries) {
+//        	String fileA = previousFilesPath + diff.getPrevFile();
+//            String fileB = revisedFilesPath + diff.getRevFile();
+//            List<String> gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB);
+//            if (gumTreeResults.size() == 0) {
+//            	continue;
+//            }
+//            StringBuilder builder = new StringBuilder();
+//            builder.append("Previous File: " + fileA + "\n");
+//            builder.append("Revised File: " + fileB + "\n");
+//            String diffs = "";
+//            for (ModifiedDetails md : diff.getModifiedDetails()) {
+//            	diffs += md.getLineNumber() + "\n";
+//            	diffs += md.getFragment() + "\n";
+//            }
+//            builder.append("DiffEntry: " + diffs);
+//            for (String gumTreeResult : gumTreeResults) {
+//            	builder.append(gumTreeResult.toString() + "\n");
+//            }
+//            FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + diff.getRevFile().replace(".java", ".txt"), builder, false);
+//        }
+//        
+////        DiffEntryParser diffEntryParser = new DiffEntryParser(allDiffEntries);
+////        diffEntryParser.parseDiffEntries();
+////        
+////        // <String, List>: String ==> revisedFileName.
+////        Map<String, List<ModifiedFragment>> parsedDiffEntries = diffEntryParser.getParsedDiffEntries();
+////        diffEntryParser = null;
+////        allDiffEntries = null;
+////        
+////        gitTraveller = null;
+////
+////        for (Map.Entry<String, List<ModifiedFragment>> entry : parsedDiffEntries.entrySet()) {
+////        	String revisedFileName = entry.getKey();
+////        	String fileA = previousFilesPath + "prev_" + revisedFileName;
+////            String fileB = revisedFilesPath + revisedFileName;
+////            System.err.println("FileName" + fileA);
+////            List<String> gumTreeResults = GumTreeAnalysis.compareTwoFilesWithGumTree(fileA, fileB);
+////            StringBuilder builder = new StringBuilder();
+////            builder.append("Previous File: " + fileA + "\n");
+////            builder.append("Revised File: " + fileB + "\n");
+////            builder.append("DiffEntry: ");
+////            for (String gumTreeResult : gumTreeResults) {
+////            	builder.append(gumTreeResult + "\n");
+////            }
+////            FileHelper.outputToFile("OUTPUT/GumTreeResults/" + FileHelper.getRepositoryName(gitRepoPath) + "/" + revisedFileName.replace(".java", ".txt"), builder, false);
+////        }
+//        
+//	}
+//
+//	public static List<String> compareTwoFilesWithGumTree(String prevFile, String revFile) {
+//		List<String> gumTreeResults = new ArrayList<String>();
+//		
+//		try {
+////			TreeContext tc1 = new ExpJdtTreeGenerator().generateFromFile(prevFile);
+////			TreeContext tc2 = new ExpJdtTreeGenerator().generateFromFile(revFile);
+////			TreeContext tc1 = new JdtTreeGenerator().generateFromFile(prevFile);
+////			TreeContext tc2 = new JdtTreeGenerator().generateFromFile(revFile);
+//			TreeContext tc1 = new RowTokenJdtTreeGenerator().generateFromFile(prevFile);
+//			TreeContext tc2 = new RowTokenJdtTreeGenerator().generateFromFile(revFile);
+////			TreeContext tc1 = new CdJdtTreeGenerator().generateFromFile(prevFile);
+////			TreeContext tc2 = new CdJdtTreeGenerator().generateFromFile(revFile);
+//			ITree t1 = tc1.getRoot();
+//			ITree t2 = tc2.getRoot();
+//			
+//			Matcher m = Matchers.getInstance().getMatcher(t1, t2);
+//			m.match();
+//			
+//			ActionGenerator ag = new ActionGenerator(t1, t2, m.getMappings());
+//			ag.generate();
+//			
+//			List<Action> actions = ag.getActions();
+//			for(Action ac : actions){
+//				String actionStr = parseAction(ac.toString());
+//				gumTreeResults.add(actionStr);
+//			}
+//			
+//		} catch (IOException e) {
+//			e.printStackTrace();
+//		}
+//		return gumTreeResults;
+//	}
+
+//	private static String parseAction(String actStr) {
+//		// UPD 25@@!a from !a to isTrue(a) at 69
+//		String[] actStrArrays = actStr.split("@@");
+//		actStr = "";
+//		int length = actStrArrays.length;
+//		for (int i = 0; i < length - 1; i++) {
+//			String actStrFrag = actStrArrays[i];
+//			int index = actStrFrag.lastIndexOf(" ") + 1;
+//			String nodeType = actStrFrag.substring(index);
+//			String backup = nodeType;
+//			try {
+//				nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType));
+//			} catch (NumberFormatException e) {
+//				nodeType = backup;
+//				log.info(actStr);
+//			}
+//			actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@";
+//			actStr += actStrFrag;
+//		}
+//		actStr += actStrArrays[length - 1];
+//		return actStr;
+//	}
+	
+	private static String parseAction(String actStr) {
+		// UPD 25@@!a from !a to isTrue(a) at 69
+		String[] actStrArrays = actStr.split("@@");
+		actStr = "";
+		int length = actStrArrays.length;
+		for (int i =0; i < length - 1; i ++) {
+			String actStrFrag = actStrArrays[i];
+			int index = actStrFrag.lastIndexOf("	") + 1;
+			String nodeType = actStrFrag.substring(index);
+			nodeType = ASTNodeMap.map.get(Integer.parseInt(nodeType));
+			actStrFrag = actStrFrag.substring(0, index) + nodeType + "@@";
+			actStr += actStrFrag;
+		}
+		actStr += actStrArrays[length - 1];
+		return actStr;
+	}
+	
+}
@@ -0,0 +1,35 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for tokens embedding of edit scripts.
+ * 
+ * Input data: parsed results of patches with GumTree.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step1 {
+	public static void main(String[] args) {
+		String editScriptsFile = Configuration.EDITSCRIPT_SIZES_FILE;
+		String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE;
+		String buggyTokensFile = Configuration.BUGGY_CODY_TOKENS_FILE;
+		String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE;
+		FileHelper.deleteFile(editScriptsFile);
+		FileHelper.deleteFile(patchesSourceCodeFile);
+		FileHelper.deleteFile(buggyTokensFile);
+		FileHelper.deleteFile(editScriptSizesFile);
+		
+		String selectedEditScripts = Configuration.SELECTED_EDITSCRIPTES_FILE;
+		String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE;
+		String selectedBuggyTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE;
+		FileHelper.deleteFile(selectedEditScripts);
+		FileHelper.deleteFile(selectedPatches);
+		FileHelper.deleteFile(selectedBuggyTokens);
+		
+		DataPreparation.prepareDataForTokenEmbedding();
+	}
+}
@@ -0,0 +1,33 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.TokenEmbedder;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for evaluation.
+ * 
+ * Embed tokens of source code vectors of training data and testing data.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step10 {
+	
+	public static void main(String[] args) {
+		boolean isSupervisedLearning = true;
+		if (isSupervisedLearning) {// supervised learning
+			String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2;
+			FileHelper.deleteFile(outputFileName);
+			// Data pre-processing
+			TokenEmbedder embedder2 = new TokenEmbedder();
+			embedder2.embedTokensOfSourceCodeForSupervisedTesting();
+		} else { // un-supervised learning
+			String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1;
+			FileHelper.deleteFile(outputFileName);
+			// Data pre-processing
+			TokenEmbedder embedder2 = new TokenEmbedder();
+			embedder2.embedTokensOfSourceCodeForUnsupervisedTesting();
+		}
+	}
+}
@@ -0,0 +1,37 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.util.Map;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for evaluation.
+ * 
+ * Vectorize data for deep learning.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step11 {
+	
+	public static void main(String[] args) {
+		boolean isSupervisedLearning = true;
+		if (isSupervisedLearning) {// supervised learning
+			String trainingDataPath = Configuration.TRAINING_DATA;
+			FileHelper.deleteFile(trainingDataPath);
+			String testingDataPath = Configuration.TESTING_DATA;
+			FileHelper.deleteDirectory(testingDataPath);
+			
+			Map<Integer, Integer> commonClustersMappingLabel = DataPreparation.readCommonCLusters();
+			DataPreparation.prepareDataForFeatureLearningOfEvaluation2(commonClustersMappingLabel);
+		} else { // un-supervised learning
+			String outputData = Configuration.VECTORIED_ALL_SOURCE_CODE1;
+			FileHelper.deleteFile(outputData);
+			// Before embedding tokens.
+			// List<File> files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
+			DataPreparation.prepareDataForFeatureLearningOfEvaluation1();
+		}
+	}
+}
@@ -0,0 +1,40 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.io.File;
+import java.util.List;
+
+import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Evaluation: extract features of testing data and predict their labels.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step12 {
+	
+	public static void main(String[] args) {
+		boolean isSupervisedLearning = true;
+		if (isSupervisedLearning) {// supervised learning
+			List<File> testingDataFiles = FileHelper.getAllFilesInCurrentDiectory(Configuration.TESTING_DATA, ".csv");
+			for (int i = 0, size = testingDataFiles.size(); i < size; i ++) {
+				if (i == 0) {
+					// TODO: we can test this model by our clustered resutls.
+					FeatureLearner learner2 = new FeatureLearner();
+					learner2.learnFeaturesOfSourceCode2(testingDataFiles.get(i));
+				} else {
+					FeatureLearner learner2 = new FeatureLearner();
+					learner2.learnFeaturesOfSourceCode3(testingDataFiles.get(i));
+				}
+			}
+		} else { // un-supervised learning
+			
+			FeatureLearner learner2 = new FeatureLearner();
+			learner2.learnFeaturesOfSourceCode();
+			// Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING;
+			// Compute the similarity: cosin similarity
+		}
+	}
+}
@@ -0,0 +1,33 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.io.File;
+import java.util.List;
+
+import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Evaluation: extract features of testing data and predict their labels.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step13 {
+	
+	public static void main(String[] args) {
+		boolean isSupervisedLearning = true;
+		if (isSupervisedLearning) {// supervised learning
+			// label --> possibility --> 90, 80, 70, 60 others ignored, level one localization
+			// label: clusterNum, re-compute similarity with each element. 90, 80, 70, 60.
+			// similarity: patches --> fixing bug.
+			List<File> testingDataFiles = FileHelper.getAllFilesInCurrentDiectory(Configuration.TESTING_DATA, ".csv");
+			for (int i = 0, size = testingDataFiles.size(); i < size; i ++) {
+			}
+		} else { // un-supervised learning
+			
+			// Extracted Features: Configuration.EXTRACTED_FEATURES_TESTING;
+			// Compute the similarity: cosin similarity
+		}
+	}
+}
@@ -0,0 +1,25 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.TokenEmbedder;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Embed tokens of all selected edit scripts.
+ * 
+ * Input data: all tokens of selected edit scripts.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step2 {
+	
+	public static void main(String[] args) {
+		String outputFileName = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
+		FileHelper.deleteFile(outputFileName);
+		
+		TokenEmbedder embedder = new TokenEmbedder();
+		embedder.embedTokensOfEditScripts();
+	}
+
+}
@@ -0,0 +1,24 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for features learning of selected edit scripts.
+ * 
+ * Vectorize edit scripts with embedded tokens of edit scripts.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step3 {
+
+	public static void main(String[] args) {
+		String vectorizedEditScripts = Configuration.VECTORIED_EDIT_SCRIPTS;
+		FileHelper.deleteFile(vectorizedEditScripts);
+		
+		DataPreparation.prepareDataForFeatureLearning();
+	}
+
+}
@@ -0,0 +1,25 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.FeatureLearner;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Learn features of all selected edit scripts with CNN algorithm.
+ * 
+ * Input data: vectorized edit scripts.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step4 {
+
+	public static void main(String[] args) {
+		String extractedFeatures = Configuration.EXTRACTED_FEATURES;
+		FileHelper.deleteDirectory(extractedFeatures);
+		
+		FeatureLearner learner = new FeatureLearner();
+		learner.learnFeatures();
+	}
+
+}
@@ -0,0 +1,24 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for clustering of edit scripts.
+ * 
+ * Input data: learned features of edit scripts by CNN.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step5 {
+
+	public static void main(String[] args) {
+		String clusterInput = Configuration.CLUSTER_INPUT;
+		FileHelper.deleteFile(clusterInput);
+		
+		DataPreparation.prepareDataForClustering();
+	}
+
+}
@@ -0,0 +1,23 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import edu.lu.uni.serval.FixPatternMining.Cluster;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Clustering of edit scripts with extracted features of edit scripts.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step6 {
+
+	public static void main(String[] args) {
+		String clusterOutput = Configuration.CLUSTER_OUTPUT;
+		FileHelper.deleteFile(clusterOutput);
+
+		Cluster cluster = new Cluster();
+		cluster.cluster();
+	}
+
+}
@@ -0,0 +1,47 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.util.List;
+import java.util.Map;
+
+import edu.lu.uni.serval.FixPatternMining.ClusterAnalyser;
+import edu.lu.uni.serval.FixPatternMining.CommonPatterns;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Analyze cluster results to obtain common fix patterns.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step7 {
+
+	public static void main(String[] args) {
+		String clusteredPatches = Configuration.CLUSTERED_PATCHES_FILE;
+		String clusteredBuggyTokens = Configuration.CLUSTERED_TOKENSS_FILE;
+		FileHelper.deleteDirectory(clusteredPatches);
+		FileHelper.deleteDirectory(clusteredBuggyTokens);
+		
+		// analyze cluster results.
+		ClusterAnalyser analyser = new ClusterAnalyser();
+		analyser.readClusterResutls();
+		analyser.clusterPatchSourceCode();
+		analyser.clusterBuggyCodeTokens();  // the results will be used to compute similarity with target java code to localize bugs.
+	
+		List<Integer> clusterResults = analyser.getClusterResults();
+		
+		// Common patterns.
+		CommonPatterns commonPatterns = new CommonPatterns(); // Metrics TODO
+		// <Integer, Integer>: <ClusterNum, Label for supervised learning>
+		Map<Integer, Integer> commonClustersMappingLabel = commonPatterns.identifyCommonPatterns(clusterResults);
+		String clusterMappingLabel = "Label : ClusterNum\n";
+		for (Map.Entry<Integer, Integer> entry : commonClustersMappingLabel.entrySet()) {
+			clusterMappingLabel += entry.getValue() + " : " + entry.getKey() + "\n";
+		}
+		FileHelper.outputToFile(Configuration.CLUSTERNUMBER_LABEL_MAP, clusterMappingLabel, false);
+
+		int totalNumberOfTrainingData = commonPatterns.getTotalNumberofTrainingData();
+		FileHelper.outputToFile(Configuration.NUMBER_OF_TRAINING_DATA, "" + totalNumberOfTrainingData, false);
+	}
+
+}
@@ -0,0 +1,33 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.io.File;
+
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.evaluation.ProjectScanner;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare testing data for evaluation.
+ * 
+ * Parse java projects to get the token vectors of all statements.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step8 {
+
+	public static void main(String[] args) {
+		String outputLocalizeFile = Configuration.TEST_POSITION_FILE;
+		String outputTokensFile = Configuration.TEST_DATA_FILE;
+		FileHelper.deleteDirectory(outputLocalizeFile);
+		FileHelper.deleteDirectory(outputTokensFile);
+		
+		int limitationOfTestingInstances = Integer.parseInt(FileHelper.readFile(Configuration.NUMBER_OF_TRAINING_DATA).trim()) / 10;
+		
+		File testProjects = new File(Configuration.TEST_INPUT);
+		File[] projects = testProjects.listFiles();
+		ProjectScanner scanner = new ProjectScanner();
+		scanner.scanJavaProject(projects, outputLocalizeFile, outputTokensFile, limitationOfTestingInstances);
+	}
+
+}
@@ -0,0 +1,33 @@
+package edu.lu.uni.serval.FixPatternMining.App;
+
+import java.util.Map;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for evaluation.
+ * 
+ * Merge token vectors of source code of training data and testing data.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Step9 {
+	
+	public static void main(String[] args) {
+		boolean isSupervisedLearning = true;
+		if (isSupervisedLearning) {// supervised learning
+			Map<Integer, Integer> commonClustersMappingLabel = DataPreparation.readCommonCLusters();
+			
+			String outputFile = Configuration.EMBEDDING_DATA_TOKENS2;
+			FileHelper.deleteFile(outputFile);
+			// Data merge
+			DataPreparation.prepareTokensForEvaluation2(commonClustersMappingLabel);
+		} else { // un-supervised learning
+			// Data merge
+			DataPreparation.prepareTokensForEvaluation1();
+		}
+	}
+}
@@ -0,0 +1,45 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import edu.lu.uni.serval.Clusters.XMeansCluster;
+import edu.lu.uni.serval.config.Configuration;
+import weka.core.EuclideanDistance;
+
+/**
+ * Cluster features with X-means clustering algorithm.
+ * 
+ * @author kui.liu
+ *
+ */
+public class Cluster {
+
+	public void cluster() {
+		String arffFile = Configuration.CLUSTER_INPUT;
+		String clusterResults = Configuration.CLUSTER_OUTPUT;
+		
+		XMeansCluster cluster = new XMeansCluster();
+		try {
+			/*
+			 * The below 5 parameters have default values.
+			 */
+			cluster.setDistanceF(new EuclideanDistance());
+			cluster.setUseKDTree(true);
+			cluster.setMaxNumberOfIterations(1000);
+			// The below 2 parameters are recommended to be the same.
+			cluster.setMaxKMeans(200);
+			cluster.setMaxKMeansForChildren(200);
+			
+			/*
+			 * The values of the below 3 parameters should be set by developers.
+			 */
+			cluster.setSeed(200);
+			cluster.setMaxNumClusters(100);
+			cluster.setMinNumClusters(1);
+			
+			// X-means clustering is beginning.
+			cluster.cluster(arffFile, clusterResults);
+			// X-means clustering is finished.
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+}
@@ -0,0 +1,146 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.utils.FileHelper;
+
+public class ClusterAnalyser {
+	
+	private List<Integer> clusterResults; // each element is a cluster number.
+	
+	public void readClusterResutls() {
+		clusterResults = DataPreparation.readClusterResults();
+	}
+	
+	public void clusterBuggyCodeTokens() {
+		String selectedTokens = Configuration.SELECTED_BUGGY_TOKEN_FILE;
+		String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
+		
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		
+		Map<Integer, StringBuilder> builderMap = new HashMap<>();
+		Map<Integer, Integer> countersMap = new HashMap<>();
+		try {
+			fis = new FileInputStream(selectedTokens);
+			scanner = new Scanner(fis);
+			int index = 0;
+			
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				int clusterNum = clusterResults.get(index);
+				StringBuilder builder = getBuilder(builderMap, clusterNum);
+				builder.append(line).append("\n");
+				int counter = getCounter(countersMap, clusterNum);
+				if (counter % 1000 == 0) {
+					FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true);
+					builder.setLength(0);
+					builderMap.put(clusterNum, builder);
+				}
+				index ++;
+			}
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				scanner.close();
+				fis.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		for (Map.Entry<Integer, StringBuilder> entry : builderMap.entrySet()) {
+			int clusterNum = entry.getKey();
+			StringBuilder builder = entry.getValue();
+			FileHelper.outputToFile(clusteredTokens + "Tokens_" + clusterNum + ".list", builder, true);
+			builder.setLength(0);
+		}
+	}
+	
+	public void clusterPatchSourceCode() {
+		String selectedPatches = Configuration.SELECTED_PATCHES_SOURE_CODE_FILE;
+		String clusteredPatches = Configuration.CLUSTERED_PATCHES_FILE;
+		
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		
+		Map<Integer, StringBuilder> builderMap = new HashMap<>();
+		Map<Integer, Integer> countersMap = new HashMap<>();
+		try {
+			fis = new FileInputStream(selectedPatches);
+			scanner = new Scanner(fis);
+			String singlePatch = "";
+			int index = -1;
+			
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				if ("".equals(line)) continue;
+				if ("PATCH###".equals(line)) {
+					if (!"".equals(singlePatch)) {
+						int clusterNum = clusterResults.get(index);
+						StringBuilder builder = getBuilder(builderMap, clusterNum);
+						builder.append(singlePatch);
+						int counter = getCounter(countersMap, clusterNum);
+						if (counter % 1000 == 0) {
+							FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true);
+							builder.setLength(0);
+							builderMap.put(clusterNum, builder);
+						}
+					}
+					singlePatch = "";
+					index ++;
+				}
+				singlePatch += line + "\n";
+			}
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				scanner.close();
+				fis.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		for (Map.Entry<Integer, StringBuilder> entry : builderMap.entrySet()) {
+			int clusterNum = entry.getKey();
+			StringBuilder builder = entry.getValue();
+			FileHelper.outputToFile(clusteredPatches + "PatchesCluster_" + clusterNum + ".list", builder, true);
+			builder.setLength(0);
+		}
+	}
+
+	private int getCounter(Map<Integer, Integer> countersMap, int clusterNum) {
+		int counter = 1;
+		if (countersMap.containsKey(clusterNum)) {
+			counter += countersMap.get(clusterNum);
+		}
+		countersMap.put(clusterNum, counter);
+		return counter;
+	}
+
+	private StringBuilder getBuilder(Map<Integer, StringBuilder> builderMap, int clusterNum) {
+		if (builderMap.containsKey(clusterNum)) {
+			return builderMap.get(clusterNum);
+		} else {
+			StringBuilder builder = new StringBuilder();
+			builderMap.put(clusterNum, builder);
+			return builder;
+		}
+	}
+
+	public List<Integer> getClusterResults() {
+		return clusterResults;
+	}
+	
+}
@@ -0,0 +1,35 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.lu.uni.serval.utils.FileHelper;
+
+public class ClusterResults {
+
+	/**
+	 * Read the cluster results from the file of cluster results.
+	 * 
+	 * @param clusterResultsFile, the file of cluster results.
+	 * @return List<Integer>, each integer is a cluster number.
+	 * @throws IOException
+	 */
+	public static List<Integer> readClusterResults(File clusterResultsFile) throws IOException {
+		List<Integer> clusterResultsList = new ArrayList<>();
+		String clusterResults = FileHelper.readFile(clusterResultsFile);
+		BufferedReader reader = new BufferedReader(new StringReader(clusterResults));
+
+		String line = null;
+		while ((line = reader.readLine()) != null) {
+			int cluster = Integer.parseInt(line);
+			clusterResultsList.add(cluster);
+		}
+
+		reader.close();
+		return clusterResultsList;
+	}
+}
@@ -0,0 +1,72 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.utils.MapSorter;
+
+public class CommonPatterns {
+	
+	private static final int LEAST_NUMBER = 100;
+	private int totalNumberofTrainingData = 0;
+
+	public Map<Integer, Integer> identifyCommonPatterns(List<Integer> clusterResults) {
+		Map<Integer, List<Integer>> clusterMap = DataPreparation.readClusterResult(clusterResults);
+		// TODO how to select the common patterns, number or ratio?
+		List<Integer> commonClusterNum = getCommonClustersByNumber(clusterMap); // Integer: clusterNum.
+		
+		Map<Integer, Integer> clusterNumMapLabel = new HashMap<>(); // <ClusterNum, Label for supervised learning>
+		for (int i = 0, size = commonClusterNum.size(); i < size; i ++) {
+			clusterNumMapLabel.put(commonClusterNum.get(i), i);
+		}
+		
+		return clusterNumMapLabel;
+	}
+	
+	private List<Integer> getCommonClustersByNumber(Map<Integer, List<Integer>> clusterMap) {
+		List<Integer> commonClusterNum = new ArrayList<>();
+		
+		for (Map.Entry<Integer, List<Integer>> entry : clusterMap.entrySet()) {
+			List<Integer> elements = entry.getValue();
+			int size = elements.size();
+			if (size >= LEAST_NUMBER) { // TODO how to set this threshold?
+				commonClusterNum.add(entry.getKey());
+				totalNumberofTrainingData += size;
+			}
+		}
+		
+		return commonClusterNum;
+	}
+	
+	private List<Integer> getCommonClustersByRatio(Map<Integer, List<Integer>> clusterMap, List<Integer> clusterResults) {
+		List<Integer> commonClusterNum = new ArrayList<>();
+		
+		double sizes = (double) clusterResults.size();
+		Map<Integer, Double> ratios = new HashMap<>();
+		for (Map.Entry<Integer, List<Integer>> entry : clusterMap.entrySet()) {
+			List<Integer> elements = entry.getValue();
+			ratios.put(entry.getKey(), (double) elements.size() / sizes);
+		}
+		MapSorter<Integer, Double> sorter = new MapSorter<Integer, Double>();
+		ratios = sorter.sortByValueDescending(ratios);
+		double counterRatio = 0.0;
+		for (Map.Entry<Integer, Double> entry : ratios.entrySet()) {
+			counterRatio += entry.getValue();
+			commonClusterNum.add(entry.getKey());
+			totalNumberofTrainingData += clusterMap.get(entry.getKey()).size();
+			if (counterRatio >= 0.8) { // TODO: how to set the value of this threshold?
+				break;
+			}
+		}
+		
+		return commonClusterNum;
+	}
+
+	public int getTotalNumberofTrainingData() {
+		return totalNumberofTrainingData;
+	}
+	
+}
@@ -0,0 +1,560 @@
+package edu.lu.uni.serval.FixPatternMining.DataPrepare;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.MaxSizeSelector.MaxSizeType;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.data.DataPreparer;
+import edu.lu.uni.serval.utils.FileHelper;
+
+/**
+ * Prepare data for fix patterns mining and evaluation.
+ * 
+ * @author kui.liu
+ *
+ */
+public class DataPreparation {
+	
+	/**
+	 * Prepare data for token embedding in the process of fix patterns mining.
+	 */
+	public static void prepareDataForTokenEmbedding() {
+		// Collect all data into one file.
+		String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE_PATH;
+		String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH;
+		String buggyTokensFilePath = Configuration.BUGGYTREE_FILE_PATH;
+		String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH;
+
+		String editScriptsFile = Configuration.EDITSCRIPT_SIZES_FILE;
+		String patchesSourceCodeFile = Configuration.PATCH_SOURCECODE_FILE;
+		String buggyTokensFile = Configuration.BUGGY_CODY_TOKENS_FILE;
+		String editScriptSizesFile = Configuration.EDITSCRIPT_SIZES_FILE;
+		File file = new File(editScriptsFilePath);
+		File[] subFiles = file.listFiles();
+		
+		// Merge results of parsed patches.
+		for (File subFile : subFiles) {
+			String fileName = subFile.getName(); // edistScripts file
+			String id = fileName.substring(fileName.lastIndexOf("_"));
+			FileHelper.outputToFile(editScriptsFile, FileHelper.readFile(subFile), true);
+			String patchesSourceCode = patchesSourceCodeFilePath + "patches" + id;
+			FileHelper.outputToFile(patchesSourceCodeFile, FileHelper.readFile(patchesSourceCode), true);
+			String sizes = editScriptSizesFile + "sizes" + id;
+			FileHelper.outputToFile(editScriptSizesFilePath, FileHelper.readFile(sizes), true);
+			String buggyTokens = buggyTokensFilePath + "tokens" + id;
+			FileHelper.outputToFile(buggyTokensFile, FileHelper.readFile(buggyTokens), true);
+		}
+		
+
+		// Select data by the size of edit script vectors.
+		List<Integer> sizesList;
+		try {
+			sizesList = MaxSizeSelector.readSizes(editScriptSizesFile);
+			int maxSize = MaxSizeSelector.selectMaxSize(MaxSizeType.ThirdQuartile, sizesList);
+			List<Integer> outlierIndexes = new ArrayList<>();
+			for (int i = 0, size = sizesList.size(); i < size; i ++) {
+				if (sizesList.get(i) > maxSize) {
+					outlierIndexes.add(i);
+				}
+			}
+			FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS, "" + maxSize, false);
+			
+			selectData(editScriptsFile, outlierIndexes, Configuration.SELECTED_EDITSCRIPTES_FILE);
+			selectData(patchesSourceCodeFile, outlierIndexes, Configuration.PATCH_SIGNAL, Configuration.SELECTED_PATCHES_SOURE_CODE_FILE);
+			int maxTokenVectorSize = selectDataOfSourceCodeTokens(buggyTokensFile, outlierIndexes, Configuration.SELECTED_BUGGY_TOKEN_FILE);
+			FileHelper.outputToFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE, "" + maxTokenVectorSize, false);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	private static void selectData(String intputFile, List<Integer> outlierIndexList, String outputFile) {
+		List<Integer> outlierIndexes = new ArrayList<>();
+		outlierIndexes.addAll(outlierIndexList);
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		try {
+			fis = new FileInputStream(intputFile);
+			scanner = new Scanner(fis);
+			int index = 0;
+			StringBuilder builder = new StringBuilder();
+			int counter = 0;
+			
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				if (outlierIndexes.contains(index)) {
+					outlierIndexes.remove(new Integer(index));
+				} else {
+					builder.append(line + "\n");
+					if (++ counter % 100000 == 0) {
+						FileHelper.outputToFile(outputFile, builder, true);
+						builder.setLength(0);
+					}
+				}
+				index ++;
+			}
+			
+			FileHelper.outputToFile(outputFile, builder, true);
+			builder.setLength(0);
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				if (scanner != null) {
+					scanner.close();
+					scanner = null;
+				}
+				if (fis != null) {
+					fis.close();
+					fis = null;
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+	}
+
+	private static void selectData(String inputFile, List<Integer> outlierIndexes, String startingSignal, String outputFile) {
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		try {
+			fis = new FileInputStream(inputFile);
+			scanner = new Scanner(fis);
+			int index = -1;
+			StringBuilder builder = new StringBuilder();
+			int counter = 0;
+			String singleEntity = "";
+			
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				if (line.equals(startingSignal)) {
+					if (!"".equals(singleEntity)) {
+						if (outlierIndexes.contains(index)) {
+							outlierIndexes.remove(new Integer(index));
+						} else {
+							builder.append(singleEntity + "\n");
+							if (++ counter % 100000 == 0) {
+								FileHelper.outputToFile(outputFile, builder, true);
+								builder.setLength(0);
+							}
+						}
+						singleEntity = "";
+					}
+					index ++;
+				}
+				singleEntity += line + "\n";
+			}
+			
+			FileHelper.outputToFile(outputFile, builder, true);
+			builder.setLength(0);
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				if (scanner != null) {
+					scanner.close();
+					scanner = null;
+				}
+				if (fis != null) {
+					fis.close();
+					fis = null;
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+	}
+	
+	private static int selectDataOfSourceCodeTokens(String inputFile, List<Integer> outlierIndexList, String outputFile) {
+		List<Integer> outlierIndexes = new ArrayList<>();
+		outlierIndexes.addAll(outlierIndexList);
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		int size = 0;
+		try {
+			fis = new FileInputStream(inputFile);
+			scanner = new Scanner(fis);
+			int index = 0;
+			StringBuilder builder = new StringBuilder();
+			int counter = 0;
+			
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				if (outlierIndexes.contains(index)) {
+					outlierIndexes.remove(new Integer(index));
+				} else {
+					builder.append(line + "\n");
+					if (++ counter % 100000 == 0) {
+						FileHelper.outputToFile(outputFile, builder, true);
+						builder.setLength(0);
+					}
+					String[] tokens = line.split(" ");
+					if (tokens.length > size) size = tokens.length;
+				}
+				index ++;
+			}
+			
+			FileHelper.outputToFile(outputFile, builder, true);
+			builder.setLength(0);
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				if (scanner != null) {
+					scanner.close();
+					scanner = null;
+				}
+				if (fis != null) {
+					fis.close();
+					fis = null;
+				}
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		return size;
+	}
+	
+	/**
+	 * Prepare data for feature learning.
+	 */
+	public static void prepareDataForFeatureLearning() {
+		String zeroVector = "";
+		for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1 - 1; i < length; i ++) {
+			zeroVector += "0, ";
+		}
+		zeroVector += "0";
+		int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim());
+
+		String embeddedTokensFile = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
+		Map<String, String> embeddedTokens = readEmbeddedTokens(embeddedTokensFile);
+
+		String editScriptsFile = Configuration.SELECTED_EDITSCRIPTES_FILE;
+		String outputFile = Configuration.VECTORIED_EDIT_SCRIPTS;
+		dataPrepare(editScriptsFile, maxSize, outputFile, embeddedTokens, zeroVector);
+	}
+	
+	private static Map<String, String> readEmbeddedTokens(String embeddedTokensFile) {
+		Map<String, String> embeddedTokens = new HashMap<>();
+		File file = new File(embeddedTokensFile);
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		try {
+			fis = new FileInputStream(file);
+			scanner = new Scanner(fis);
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				int firstBlankIndex = line.indexOf(" ");
+				String token = line.substring(0, firstBlankIndex);
+				String value = line.substring(firstBlankIndex + 1).replaceAll(" ", ", ");
+				embeddedTokens.put(token, value);
+			}
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				scanner.close();
+				fis.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		return embeddedTokens;
+	}
+
+	private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map<String, String> embeddedTokens, String zeroVector) {
+		File file = new File(inputFile);
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		StringBuilder builder = new StringBuilder();
+		int counter = 0;
+		
+		try {
+			fis = new FileInputStream(file);
+			scanner = new Scanner(fis);
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector);
+				builder.append(vectorStr);
+				if (++ counter % 10000 == 0) {
+					FileHelper.outputToFile(outputFile, builder, true);
+					builder.setLength(0);
+				}
+			}
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				scanner.close();
+				fis.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		FileHelper.outputToFile(outputFile, builder, true);
+		builder.setLength(0);
+	}
+	
+	private static StringBuilder convertToVector(Map<String, String> embeddedTokens, String line, int maxSize, String zeroVector) {
+		String[] tokens = line.split(" ");
+		StringBuilder vectorStr = new StringBuilder();
+		int length = tokens.length;
+		if (length == maxSize) {
+			for (int i = 0; i < length - 1; i ++) {
+				String token = tokens[i];
+				vectorStr.append(embeddedTokens.get(token) + ", ");
+			}
+			vectorStr.append(embeddedTokens.get(tokens[length - 1]) + "\n");
+		} else {
+			for (int i = 0; i < length; i ++) {
+				String token = tokens[i];
+				vectorStr.append(embeddedTokens.get(token) + ", ");
+			}
+			for (int i = length; i < maxSize - 1; i ++) {
+				vectorStr.append(zeroVector + ", ");
+			}
+			vectorStr.append(zeroVector + "\n");
+		}
+		
+		return vectorStr;
+	}
+
+	/**
+	 * Prepare data for clustering.
+	 */
+	public static void prepareDataForClustering() {
+		String featureFile = Configuration.EXTRACTED_FEATURES + "vectorizedEditScripts.csv";
+		String arffFile = Configuration.CLUSTER_INPUT;
+		DataPreparer.prepareData(featureFile, arffFile);
+	}
+	
+	/**
+	 * Read cluster results.
+	 */
+	public static List<Integer> readClusterResults() {
+		List<Integer> clusterResults = new ArrayList<>();
+		String clusterResultsFile = Configuration.CLUSTER_OUTPUT;
+		String results = FileHelper.readFile(clusterResultsFile);
+		BufferedReader reader = null;
+		try {
+			reader = new BufferedReader(new StringReader(results));
+			String line = null;
+			while ((line = reader.readLine()) != null) {
+				clusterResults.add(Integer.parseInt(line));
+			}
+		} catch (IOException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				reader.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		return clusterResults;
+	}
+	
+	public static Map<Integer, List<Integer>> readClusterResult(List<Integer> clusterResults) {
+		Map<Integer, List<Integer>> clusters = new HashMap<>();
+		
+		for (int i = 0, size = clusterResults.size(); i < size; i ++) {
+			int clusterNo = clusterResults.get(i);
+			if (clusters.containsKey(clusterNo)) {
+				clusters.get(clusterNo).add(i + 1);
+			} else {
+				List<Integer> newCLuster = new ArrayList<>();
+				newCLuster.add(i + 1);
+				clusters.put(clusterNo, newCLuster);
+			}
+		}
+
+		return clusters;
+	}
+	
+	/**
+	 * Data for un-supervised learning.
+	 */
+	public static void prepareTokensForEvaluation1() {
+		String outputFile = Configuration.EMBEDDING_DATA_TOKENS1;
+		FileHelper.outputToFile(outputFile, FileHelper.readFile(Configuration.SELECTED_BUGGY_TOKEN_FILE), false);
+		List<File> files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
+		for (File file : files) {
+			FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true);
+		}
+	}
+	
+	public static void prepareDataForFeatureLearningOfEvaluation1() {
+		String zeroVector = "";
+		for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) {
+			zeroVector += "0, ";
+		}
+		zeroVector += "0";
+		int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
+		
+		String allEmbeddedTokens = Configuration.EMBEDDED_ALL_TOKENS1;
+		Map<String, String> embeddedTokens = readEmbeddedTokens(allEmbeddedTokens);
+
+		// Testing data 
+		String clusteredTokens = Configuration.TEST_DATA_FILE;
+		List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
+		for (File file : files) {
+			
+		}
+		String allTokensOfSourceCode = Configuration.EMBEDDING_DATA_TOKENS1; // TODO testing data should be separated.
+		dataPrepare(allTokensOfSourceCode, maxSize, Configuration.VECTORIED_ALL_SOURCE_CODE1, embeddedTokens, zeroVector);
+	}
+	
+	/**
+	 * Data for supervised learning.
+	 */
+	public static void prepareTokensForEvaluation2(Map<Integer, Integer> commonClustersMappingLabel) {
+		String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
+		String outputFile = Configuration.EMBEDDING_DATA_TOKENS2;
+		
+		List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
+		for (File file : files) {
+			String fileName = file.getName();
+			String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list"));
+			int clusterNum = Integer.parseInt(clusterNumStr);
+			if (commonClustersMappingLabel.containsKey(clusterNum)) {
+				String content = FileHelper.readFile(file);
+				FileHelper.outputToFile(outputFile, content, true);
+			}
+		}
+		files.clear();
+		files = FileHelper.getAllFilesInCurrentDiectory(Configuration.TEST_DATA_FILE, ".list");
+		for (File file : files) {
+			FileHelper.outputToFile(outputFile, FileHelper.readFile(file), true);
+		}
+	}
+
+	public static void prepareDataForFeatureLearningOfEvaluation2(Map<Integer, Integer> commonClustersMappingLabel) {
+		String zeroVector = "";
+		for (int i =0, length = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2 - 1; i < length; i ++) {
+			zeroVector += "0, ";
+		}
+		zeroVector += "0";
+		
+		String allEmbeddedTokensOfEvaluation = Configuration.EMBEDDED_ALL_TOKENS2;
+		Map<String, String> embeddedTokens = readEmbeddedTokens(allEmbeddedTokensOfEvaluation);
+
+		int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
+		// Training data
+		String clusteredTokens = Configuration.CLUSTERED_TOKENSS_FILE;
+		List<File> files = FileHelper.getAllFilesInCurrentDiectory(clusteredTokens, ".list");
+		for (File file : files) {
+			String fileName = file.getName();
+			String clusterNumStr = fileName.substring(fileName.lastIndexOf("_") + 1, fileName.lastIndexOf(".list"));
+			int clusterNum = Integer.parseInt(clusterNumStr);
+			if (commonClustersMappingLabel.containsKey(clusterNum)) {
+				dataPrepare(file.getPath(), maxSize, Configuration.TRAINING_DATA, embeddedTokens, zeroVector, clusterNum);
+			}
+		}
+		// Testing data
+		files.clear();
+		String testingData = Configuration.TEST_DATA_FILE;
+		files = FileHelper.getAllFilesInCurrentDiectory(testingData, ".list");
+		String testingDataPath = Configuration.TESTING_DATA;
+		for (File file : files) {
+			String fileName = file.getName();
+			fileName.replace(".list", ".csv");
+			dataPrepare(file.getPath(), maxSize, testingDataPath + fileName, embeddedTokens, zeroVector, 0);
+		}
+	}
+
+	private static void dataPrepare(String inputFile, int maxSize, String outputFile, Map<String, String> embeddedTokens,
+			String zeroVector, int clusterNum) {
+		FileInputStream fis = null;
+		Scanner scanner = null;
+		StringBuilder builder = new StringBuilder();
+		int counter = 0;
+		
+		try {
+			fis = new FileInputStream(inputFile);
+			scanner = new Scanner(fis);
+			while (scanner.hasNextLine()) {
+				String line = scanner.nextLine();
+				StringBuilder vectorStr = convertToVector(embeddedTokens, line, maxSize, zeroVector, clusterNum);
+				builder.append(vectorStr);
+				if (++ counter % 10000 == 0) {
+					FileHelper.outputToFile(outputFile, builder, true);
+					builder.setLength(0);
+				}
+			}
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				scanner.close();
+				fis.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		FileHelper.outputToFile(outputFile, builder, true);
+		builder.setLength(0);
+	}
+	
+	private static StringBuilder convertToVector(Map<String, String> embeddedTokens, String line, int maxSize, String zeroVector, int clusterNum) {
+		String[] tokens = line.split(" ");
+		StringBuilder vectorStr = new StringBuilder();
+		int length = tokens.length;
+		if (length == maxSize) {
+			for (int i = 0; i < length; i ++) {
+				String token = tokens[i];
+				vectorStr.append(embeddedTokens.get(token) + ", ");
+			}
+		} else {
+			for (int i = 0; i < length; i ++) {
+				String token = tokens[i];
+				vectorStr.append(embeddedTokens.get(token) + ", ");
+			}
+			for (int i = length; i < maxSize; i ++) {
+				vectorStr.append(zeroVector + ", ");
+			}
+		}
+		
+		vectorStr.append(clusterNum + "\n");
+		
+		return vectorStr;
+	}
+
+	public static Map<Integer, Integer> readCommonCLusters() {
+		Map<Integer, Integer> commonClustersMappingLabel = new HashMap<>();
+		String commonClusters = FileHelper.readFile(Configuration.CLUSTERNUMBER_LABEL_MAP);
+		BufferedReader reader = null;
+		try {
+			reader = new BufferedReader(new StringReader(commonClusters));
+			String line = reader.readLine();
+			while ((line = reader.readLine()) != null) {
+				String[] strArray = line.split(" : ");
+				int key = Integer.parseInt(strArray[1]);
+				int value = Integer.parseInt(strArray[0]);
+				commonClustersMappingLabel.put(key, value);
+			}
+		} catch (IOException e) {
+			e.printStackTrace();
+		} finally {
+			try {
+				reader.close();
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		return commonClustersMappingLabel;
+	}
+}
@@ -0,0 +1,66 @@
+package edu.lu.uni.serval.FixPatternMining.DataPrepare;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.lu.uni.serval.utils.FileHelper;
+import edu.lu.uni.serval.utils.ListSorter;
+
+public class MaxSizeSelector {
+	
+	public enum MaxSizeType {  
+		UpperWhisker, ThirdQuartile
+	}
+	
+	public static List<Integer> readSizes(String sizeFilePath) throws IOException {
+		List<Integer> sizes = new ArrayList<>();
+		String sizesStr = FileHelper.readFile(sizeFilePath);
+		BufferedReader br = new BufferedReader(new StringReader(sizesStr));
+		String line = null;
+		
+		while ((line = br.readLine()) != null) {
+			sizes.add(Integer.parseInt(line.trim()));
+		}
+		
+		return sizes;
+	}
+	
+	public static int selectMaxSize(MaxSizeType maxSizeType, List<Integer> sizesDistribution) {
+		int maxSize = 0;
+		switch (maxSizeType) {
+		case UpperWhisker:
+			maxSize = upperWhisker(sizesDistribution);
+			break;
+		case ThirdQuartile:
+			maxSize = thirdQuarter(sizesDistribution);
+			break;
+		}
+		return maxSize;
+	}
+
+	private static int upperWhisker(List<Integer> sizesDistribution) {
+		List<Integer> sizes = new ArrayList<>();
+		sizes.addAll(sizesDistribution);
+		ListSorter<Integer> sorter = new ListSorter<Integer>(sizes);
+		sizesDistribution = sorter.sortAscending();
+		int firstQuarterIndex = sizesDistribution.size() * 25 / 100;
+		int firstQuarter = sizesDistribution.get(firstQuarterIndex);
+		int thirdQuarterIndex = sizesDistribution.size() * 75 / 100;
+		int thirdQuarter = sizesDistribution.get(thirdQuarterIndex);
+		int upperWhisker = thirdQuarter + (int) (1.5 * (thirdQuarter - firstQuarter));
+		return upperWhisker;
+	}
+	
+	private static int thirdQuarter(List<Integer> sizesDistribution) {
+		List<Integer> sizes = new ArrayList<>();
+		sizes.addAll(sizesDistribution);
+		ListSorter<Integer> sorter = new ListSorter<Integer>(sizes);
+		sizesDistribution = sorter.sortAscending();
+		int thirdQuarterIndex = sizesDistribution.size() * 75 / 100;
+		int thirdQuarter = sizesDistribution.get(thirdQuarterIndex);
+		return thirdQuarter;
+	}
+}
@@ -0,0 +1,121 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import edu.lu.uni.serval.FixPatternMining.DataPrepare.DataPreparation;
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.deeplearner.CNNFeatureExtractor2;
+import edu.lu.uni.serval.deeplearner.CNNSupervisedLearning;
+import edu.lu.uni.serval.utils.FileHelper;
+
+public class FeatureLearner {
+	
+	/**
+	 * Learn features of edit scripts for fix patterns mining.
+	 */
+	public void learnFeatures() {
+		String editScriptsVectorFile = Configuration.VECTORIED_EDIT_SCRIPTS; // input
+		int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS).trim());
+		int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1;
+		int batchSize = 1000;
+		int sizeOfFeatureVector = 200;
+		
+		try {
+			CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(editScriptsVectorFile), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfFeatureVector);
+			learner.setNumberOfEpochs(20);
+			learner.setSeed(123);
+			learner.setNumOfOutOfLayer1(20);
+			learner.setNumOfOutOfLayer2(50);
+			learner.setOutputPath(Configuration.EXTRACTED_FEATURES);
+			
+			learner.extracteFeaturesWithCNN();
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (InterruptedException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	public void learnFeaturesOfSourceCode() {
+		int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
+		int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
+		int batchSize = 1000;
+		int sizeOfExtractedFeatureVector = 200;
+		
+		try {
+			CNNFeatureExtractor2 learner = new CNNFeatureExtractor2(new File(Configuration.VECTORIED_ALL_SOURCE_CODE1), sizeOfVector, sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector);
+			learner.setNumberOfEpochs(20);
+			learner.setSeed(123);
+			learner.setNumOfOutOfLayer1(20);
+			learner.setNumOfOutOfLayer2(50);
+			learner.setOutputPath(Configuration.EXTRACTED_FEATURES_EVALUATION);
+			
+			learner.extracteFeaturesWithCNN();
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (InterruptedException e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * Supervised learning.
+	 */
+	public void learnFeaturesOfSourceCode2(File testingData) {
+		int sizeOfVector = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
+		int sizeOfTokenVec = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
+		int batchSize = 1000;
+		int sizeOfExtractedFeatureVector = 200;
+		
+		try {
+			int clusterNum = DataPreparation.readCommonCLusters().size();
+			File trainingData = new File(Configuration.TRAINING_DATA);
+			CNNSupervisedLearning learner = new CNNSupervisedLearning(trainingData, sizeOfVector, 
+					sizeOfTokenVec, batchSize, sizeOfExtractedFeatureVector, clusterNum, testingData);
+			learner.setNumberOfEpochs(20);
+			learner.setSeed(123);
+			learner.setNumOfOutOfLayer1(20);
+			learner.setNumOfOutOfLayer2(50);
+			learner.setOutputPath(Configuration.FEATURES_OF_TRAINING_DATA);
+			learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA);
+			learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA);
+			learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA);
+			learner.setModelFile(Configuration.SUPERVISED_LEARNING_MODEL);
+			learner.extracteFeaturesWithCNN();
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (InterruptedException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	/**
+	 * Supervised learning by loading a model.
+	 */
+	public void learnFeaturesOfSourceCode3(File testingData) {
+		int batchSize = 1000;
+		
+		try {
+			String modelFile = Configuration.SUPERVISED_LEARNING_MODEL;
+			CNNSupervisedLearning learner = new CNNSupervisedLearning(batchSize, testingData, modelFile);
+			learner.setFeatresOfTestingData(Configuration.FEATURES_OF_TESTING_DATA);
+			learner.setPossibilitiesOfPrediction(Configuration.POSSIBILITIES_OF_TESTING_DATA);
+			learner.setPredictedResultsOfTestingData(Configuration.PREDICTED_RESULTS_OF_TESTING_DATA);
+			learner.extracteFeaturesWithCNNByLoadingModel();
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (InterruptedException e) {
+			e.printStackTrace();
+		}
+	}
+}
@@ -0,0 +1,65 @@
+package edu.lu.uni.serval.FixPatternMining;
+
+import java.io.File;
+import java.io.IOException;
+
+import edu.lu.uni.serval.config.Configuration;
+import edu.lu.uni.serval.deeplearner.Word2VecEncoder;
+
+/**
+ * Encode tokens of edit scripts with Word2Vec.
+ * 
+ * @author kui.liu
+ *
+ */
+public class TokenEmbedder {
+
+	/**
+	 * Embed tokens for fix patterns mining.
+	 */
+	public void embedTokensOfEditScripts() {
+		Word2VecEncoder encoder = new Word2VecEncoder();
+		int windowSize = 2;
+		encoder.setWindowSize(windowSize);
+		try {
+			File inputFile = new File(Configuration.SELECTED_EDITSCRIPTES_FILE);
+			int minWordFrequency = 1;
+			int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN1;
+			String outputFileName = Configuration.EMBEDDED_EDIT_SCRIPT_TOKENS;
+			encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	public void embedTokensOfSourceCodeForSupervisedTesting() {
+		Word2VecEncoder encoder = new Word2VecEncoder();
+		int windowSize = 2;
+		encoder.setWindowSize(windowSize);
+		try {
+			File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS2);
+			int minWordFrequency = 1;
+			int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
+			String outputFileName = Configuration.EMBEDDED_ALL_TOKENS2;
+			encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public void embedTokensOfSourceCodeForUnsupervisedTesting() {
+		Word2VecEncoder encoder = new Word2VecEncoder();
+		int windowSize = 2;
+		encoder.setWindowSize(windowSize);
+		try {
+			File inputFile = new File(Configuration.EMBEDDING_DATA_TOKENS1);
+			int minWordFrequency = 1;
+			int layerSize = Configuration.VECTOR_SIZE_OF_EMBEDED_TOKEN2;
+			String outputFileName = Configuration.EMBEDDED_ALL_TOKENS1;
+			encoder.embedTokens(inputFile, minWordFrequency, layerSize, outputFileName);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+}
@@ -24,13 +24,13 @@ public class Configuration {
 	public static final String BUGGY_CODY_TOKENS_FILE = GUM_TREE_OUTPUT + "tokens.list";
 	public static final String EDITSCRIPT_SIZES_FILE = GUM_TREE_OUTPUT + "editScriptSizes.list";
 	
-	public static int MAX_EDIT_SCRIPT_VECTOR_SIZE = 0; // The max size of edit script vectors.
-	public static int MAX_SOURCE_CODE_TOKEN_VECTOR_SIZE = 0; // The max size of all buggy source code token vectors.
 	public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN1 = 100; // tokens of edit scripts.
 	public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN2 = 200; // tokens of source code
 	
 	// the input path of fix patterns mining.
 	private static final String MINING_INPUT = ROOT_PATH + "MiningInput/";
+	public static final String MAX_TOKEN_VECTORS_SIZE_OF_EDIT_SCRIPTS = MINING_INPUT + "/MaxTokenVectorSizeOfEditScripts.list"; // The max size of edit scripts: upper limitation of max size.
+	public static final String MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE = MINING_INPUT + "/MaxTokenVectorSizeOfBuggySourceCode.list"; // The max size of all buggy source code token vectors.
 	// the input path of token embedding.
 	public static final String EMBEDDING_INPUT = MINING_INPUT + "Embedding/";
 	public static final String SELECTED_PATCHES_SOURE_CODE_FILE = EMBEDDING_INPUT + "patchSourceCode.list";// Selected patches.
@@ -42,7 +42,7 @@ public class Configuration {
 	public static final String EMBEDDED_EDIT_SCRIPT_TOKENS = FEATURE_LEARNING_INPUT + "embeddedEditScriptTokens.list"; // All embedded tokens of selected edit scripts.
 	public static final String VECTORIED_EDIT_SCRIPTS = FEATURE_LEARNING_INPUT + "vectorizedEditScripts.csv"; // Embedded and vectorized edit script vectors.
 	// the input path of clustering.
-	public static final String EXTRACTED_FEATURES = MINING_INPUT + "ExtractedFeatures/"; // Extracted features of edit scripts.
+	public static final String EXTRACTED_FEATURES = MINING_INPUT + "ExtractedFeatures/"; // Extracted features of all edit scripts.
 	public static final String CLUSTER_INPUT = MINING_INPUT + "ClusteringInput/input.arff";

 	// the output path of fix patterns mining.
@@ -53,20 +53,28 @@ public class Configuration {
 	
 	// evaluation data
 	public static final String TEST_INPUT = ROOT_PATH + "TestProjects/";
-	public static final String TEST_LOCALIZATION_FILE = ROOT_PATH + "TestData/Localization.list"; // Positions of all test statements.
-	public static final String TEST_DATA_FILE = ROOT_PATH + "TestData/TestStatements.list"; // Token vectors of all test statements.
+	public static final String TEST_POSITION_FILE = ROOT_PATH + "TestData/Positions/"; // Positions of all test statements.
+	public static final String TEST_DATA_FILE = ROOT_PATH + "TestData/TestStatements/"; // Token vectors of all test statements.
+	
+	public static final String NUMBER_OF_TRAINING_DATA = ROOT_PATH + "TestData/NumberOfTrainingData.list";;
 	
 	// data of unsupervised learning
 	public static final String EMBEDDING_DATA_TOKENS1 = ROOT_PATH + "TestData/AllTokenVectorsForEvaluation.list";
 	public static final String EMBEDDED_ALL_TOKENS1 = ROOT_PATH + "TestData/AllEmbeddedTokens.list";
-	public static final String VECTORIED_ALL_SOURCE_CODE1 = ROOT_PATH + "TestData/AllVectorizedSourceCode.list";
-	public static final String EXTRACTED_FEATURES_TESTING = ROOT_PATH + "TestDataExtractedFeatures/";
+	public static final String VECTORIED_ALL_SOURCE_CODE1 = ROOT_PATH + "TestData/AllVectorizedSourceCode/";
+	public static final String EXTRACTED_FEATURES_EVALUATION = ROOT_PATH + "TestDataExtractedFeatures/"; // extracted features of all source code (training data and testing data)
 	
 	// Data of supervised learning
 	public static final String CLUSTERNUMBER_LABEL_MAP = ROOT_PATH + "TestData/clusterMappingLabel.list";
 	public static final String EMBEDDING_DATA_TOKENS2 = ROOT_PATH + "TestData/AllTokenVectorsForSupervisedEvaluation.list";
 	public static final String EMBEDDED_ALL_TOKENS2 = ROOT_PATH + "TestData/AllEmbeddedTokensForSuperVisedEvaluation.list";
 	public static final String TRAINING_DATA = ROOT_PATH + "TestData/TrainingData.csv"; // Training data of supervised learning
-	public static final String TESTING_DATA = ROOT_PATH + "TestData/TestingData.csv";   // testing data of supervised learning
+	public static final String TESTING_DATA = ROOT_PATH + "TestData/SupervisedLearning/"; // testing data of supervised learning
+	public static final String FEATURES_OF_TRAINING_DATA = ROOT_PATH + "TestingOutput/TraingFeatures/";
+	public static final String FEATURES_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/TestingFeatures/";
+	public static final String POSSIBILITIES_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/Posibilities/";
+	public static final String PREDICTED_RESULTS_OF_TESTING_DATA = ROOT_PATH + "TestingOutput/Prediction/";
+
+	public static final String SUPERVISED_LEARNING_MODEL = ROOT_PATH + "TestingOutput/SupervisedLearningModel.zip";

 }
@@ -28,23 +28,17 @@ import edu.lu.uni.serval.utils.FileHelper;
 */
 public class ProjectScanner {
 	
-	public static void main(String[] args) {
-		String inputPath = Configuration.TEST_INPUT; //test java projects
-		File inputFileDirector = new File(inputPath);
-		File[] projects = inputFileDirector.listFiles(); // project folders
-		
-		String outputLocalizeFile = Configuration.TEST_LOCALIZATION_FILE;
-		String outputTokensFile = Configuration.TEST_DATA_FILE;
+	private int maxSize = Integer.parseInt(FileHelper.readFile(Configuration.MAX_TOKEN_VECTORS_SIZE_OF_SOURCE_CODE));
+	private int numberOfFiles = 0;
+	private List<SimpleTree> allSimpleTrees = new ArrayList<>();
 	
+	public void scanJavaProject(File[] projects, String outputLocalizeFile, String outputTokensFile, int limitation) {
 		for (File project : projects) {
-			ProjectScanner scanner = new ProjectScanner();
-			scanner.scanJavaProject(project, outputLocalizeFile, outputTokensFile);
+			scanJavaProject(project, outputLocalizeFile, outputTokensFile, limitation);
 		}
 	}
 	
-	List<SimpleTree> allSimpleTrees = new ArrayList<>();
-	
-	public void scanJavaProject(File javaProject, String outputLocalizeFile, String outputTokensFile) {
+	public void scanJavaProject(File javaProject, String outputLocalizeFile, String outputTokensFile, int limitation) {
 		List<File> files = new ArrayList<>();
 		files.addAll(FileHelper.getAllFiles(javaProject.getPath(), ".java"));
 		
@@ -60,20 +54,25 @@ public class ProjectScanner {
 			CUCreator cuCreator = new CUCreator();
 			CompilationUnit cUnit = cuCreator.createCompilationUnit(file);
 			getTokenVectorOfAllStatements(tree, cUnit, tokensBuilder, localizationsBuilder, javaProject.getPath(), file.getPath());
+			++ counter;
 			
-			if (++ counter % 1000 == 0) {
-				FileHelper.outputToFile(outputLocalizeFile, localizationsBuilder, true);
-				FileHelper.outputToFile(outputTokensFile, tokensBuilder, true);
+			if ( counter % limitation == 0) {
+				numberOfFiles ++;
+				FileHelper.outputToFile(outputLocalizeFile + "Positions" + numberOfFiles + ".list", localizationsBuilder, true);
+				FileHelper.outputToFile(outputTokensFile + "Tokens" + numberOfFiles + ".list", tokensBuilder, true);
 				localizationsBuilder.setLength(0);
 				tokensBuilder.setLength(0);
 			}
 		}
 		
-		FileHelper.outputToFile(outputLocalizeFile, localizationsBuilder, true);
-		FileHelper.outputToFile(outputTokensFile, tokensBuilder, true);
+		if (localizationsBuilder.length() > 0) {
+			numberOfFiles ++;
+			FileHelper.outputToFile(outputLocalizeFile + "Positions" + numberOfFiles + ".list", localizationsBuilder, true);
+			FileHelper.outputToFile(outputTokensFile + "Tokens" + numberOfFiles + ".list", tokensBuilder, true);
 			localizationsBuilder.setLength(0);
 			tokensBuilder.setLength(0);
 		}
+	}
 	
 	private void getTokenVectorOfAllStatements(ITree tree, CompilationUnit unit, StringBuilder tokensBuilder, StringBuilder localizationsBuilder, String projectName, String filePath) {
 		String astNodeType = ASTNodeMap.map.get(tree.getType()); //ignore: SwitchCase, SuperConstructorInvocation, ConstructorInvocation
@@ -98,7 +97,8 @@ public class ProjectScanner {
 				// project name: file name: line number
 				String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim();
 				String[] tokensArray = tokens.split(" ");
-				if (tokensArray.length <= Configuration.MAX_SOURCE_CODE_TOKEN_VECTOR_SIZE) {
+				
+				if (tokensArray.length <= maxSize) {
 					int position = tree.getPos();
 					int lineNum = unit.getLineNumber(position);
 					tokensBuilder.append(tokens).append("\n");
@@ -183,4 +183,5 @@ public class ProjectScanner {
 		simpleTree.setParent(parent);
 		return simpleTree;
 	}
+
 }