initial commit

This commit is contained in:
Anil Koyuncu
2018-02-21 16:31:58 +01:00
parent c6fa20645b
commit a1e9627566
8 changed files with 167 additions and 131 deletions
-1
View File
@@ -42,7 +42,6 @@
<orderEntry type="library" name="Maven: org.eclipse.equinox:app:1.3.200-v20130910-1609" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.birt.runtime:org.eclipse.core.resources:3.10.0.v20150423-0755" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.tycho:org.eclipse.jdt.core:3.12.2.v20161117-1814" level="project" />
<orderEntry type="library" name="Maven: edu.lu.uni.serval:SourceCodeParser:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="module" module-name="GitTraveller" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.7" level="project" />
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.11.2" level="project" />
@@ -36,7 +36,8 @@ public abstract class Parser implements ParserInterface {
protected List<HierarchicalActionSet> parseChangedSourceCodeWithGumTree(File prevFile, File revFile) {
List<HierarchicalActionSet> actionSets = new ArrayList<>();
// GumTree results
List<Action> gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile);
List<Action> gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile);
// List<Action> gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile);
if (gumTreeResults != null && gumTreeResults.size() > 0) {
// Regroup GumTre results.
List<HierarchicalActionSet> allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults);
@@ -49,17 +49,17 @@ public class FixedViolationHunkParser extends FixedViolationParser {
// type = "#NoStatementChange:";
// }
} else {
List<DiffEntryHunk> diffentryHunks = new DiffEntryReader().readHunks2(diffentryFile);
List<DiffEntryHunk> diffentryHunks = new DiffEntryReader().readHunks3(diffentryFile);
//Filter out the modify actions, which are not in the DiffEntry hunks.
HunkActionFilter hunkFilter = new HunkActionFilter();
List<DiffEntryHunk> selectedPatchHunks = hunkFilter.matchActionsByDiffEntryForC(diffentryHunks, actionSets);
List<DiffEntryHunk> selectedPatchHunks = hunkFilter.filterActionsByModifiedRange2(diffentryHunks, actionSets, revFile, prevFile);
for (DiffEntryHunk patchHunk : selectedPatchHunks) {
List<HierarchicalActionSet> hunkActionSets = patchHunk.getActionSets();
List<HierarchicalActionSet> hunkActionSets = patchHunk.getActionSets();
// multiple UPD, and some UPD contain other UPD.
removeOverlapperdUPD(hunkActionSets);
// Range of buggy source code
int bugStartLine = 0;
int bugEndLine = 0;
@@ -69,7 +69,6 @@ public class FixedViolationHunkParser extends FixedViolationParser {
int bugEndPosition = 0;
int fixEndPosition = 0;
for (HierarchicalActionSet hunkActionSet : hunkActionSets) {
//TODO FIX ME
int actionBugStart = hunkActionSet.getBugStartLineNum();
int actionBugEnd = hunkActionSet.getBugEndLineNum();
int actionFixStart = hunkActionSet.getFixStartLineNum();
@@ -93,13 +92,18 @@ public class FixedViolationHunkParser extends FixedViolationParser {
fixEndPosition = hunkActionSet.getFixEndPosition();
}
}
if (fixStartLine == 0 && bugStartLine == 0) {
this.unfixedViolations += "#WRONG: " + revFile.getName() + ":" + patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize() + "\n";
this.nullMappingGumTreeResult ++;
continue;
}
if (fixStartLine == 0 && bugStartLine != 0) {// pure delete actions.
// get the exact buggy code by violation's position. TODO later
}
// if (children.size() == 0) continue;
boolean isPureInsert = false;
if (bugStartLine == 0 && patchHunk.getBugLineStartNum() > 0) {
bugStartLine = patchHunk.getBugLineStartNum();
@@ -107,13 +111,15 @@ public class FixedViolationHunkParser extends FixedViolationParser {
isPureInsert = true;
// continue;
}
if ((bugEndLine - bugStartLine > Configuration.HUNK_SIZE && !isPureInsert) || fixEndLine - fixStartLine > Configuration.HUNK_SIZE) {
// continue; //TODO filter out the
// if ((bugEndLine - bugStartLine > Configuration.HUNK_SIZE ) || fixEndLine - fixStartLine > Configuration.HUNK_SIZE) {
// continue; //TODO hunk size
// }
if(patchHunk.getBuggyHunkSize() > Configuration.HUNK_SIZE || patchHunk.getFixedHunkSize() > Configuration.HUNK_SIZE){
continue;
}
/**
* Select edit scripts for deep learning.
* Select edit scripts for deep learning.
* Edit scripts will be used to mine common fix patterns.
*/
// 1. First level: AST node type.
@@ -122,8 +128,8 @@ public class FixedViolationHunkParser extends FixedViolationParser {
System.err.println("===+++===: " + revFile.getName() + ":" +patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize());
}
// 2. source code: raw tokens
// 3. abstract identifiers:
// 4. semi-source code:
// 3. abstract identifiers:
// 4. semi-source code:
String[] editScriptTokens = astEditScripts.split(" ");
int size = editScriptTokens.length;
if (size == 1) {
@@ -131,17 +137,21 @@ public class FixedViolationHunkParser extends FixedViolationParser {
this.unfixedViolations += "#NullMatchedGumTreeResult1:" + revFile.getName() + ":" + patchHunk.getBugLineStartNum() + ", " + patchHunk.getBuggyHunkSize() + "\n";
continue;
}
String patchPosition = "\n" + revFile.getName() + "\n@@ -" + bugStartLine + ", " + bugEndLine + " +" + fixStartLine + ", " + fixEndLine + "@@\n";
String info = Configuration.PATCH_SIGNAL + "\n" + patchPosition + patchHunk.getHunk() + "\nAST Diff###:\n" + getAstEditScripts(hunkActionSets, bugEndPosition, fixEndPosition) + "\n";
//TODO uncomment the line below for more detailed gumtree input.
// String info = Configuration.PATCH_SIGNAL + "\n" + patchPosition + patchHunk.getHunk() + "\nAST Diff###:\n" + getAstEditScripts(hunkActionSets) + "\n";
// if (noUpdate(editScriptTokens)) {
// }
this.patchesSourceCode += info;
this.sizes += size + "\n";
this.astEditScripts += astEditScripts + "\n";
// SimpleTree simpleTree = getBuggyCodeTree(patchHunk, bugEndPosition, prevFile, bugStartLine, bugEndLine);
// String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim();
// this.tokensOfSourceCode += tokens + "\n";
SimpleTree simpleTree = getBuggyCodeTree(patchHunk, bugEndPosition, prevFile, bugStartLine, bugEndLine);
String tokens = Tokenizer.getTokensDeepFirst(simpleTree).trim();
this.tokensOfSourceCode += tokens + "\n";
}
}
}
@@ -12,19 +12,18 @@ import com.github.gumtreediff.actions.model.Action;
import edu.lu.uni.serval.FixPatternParser.Parser;
import edu.lu.uni.serval.gumtree.GumTreeComparer;
import edu.lu.uni.serval.gumtree.regroup.HierarchicalActionSet;
import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouperForC;
import edu.lu.uni.serval.gumtree.regroup.HierarchicalRegrouper;
import edu.lu.uni.serval.utils.FileHelper;
import edu.lu.uni.serval.utils.ListSorter;
/**
* Parse fix patterns with GumTree.
*
*
* @author kui.liu
*
*/
public class FixedViolationParser extends Parser {
/*
* ResultType:
* 0: normal GumTree results.
@@ -34,16 +33,16 @@ public class FixedViolationParser extends Parser {
* 4: useless violations
*/
public int resultType = 0;
protected String violationTypes = "";
@Override
public void parseFixPatterns(File prevFile, File revFile, File diffentryFile) {
}
/**
* Regroup GumTree results without remove the modification of variable names.
*
*
* @param prevFile
* @param revFile
* @return
@@ -51,7 +50,7 @@ public class FixedViolationParser extends Parser {
protected List<HierarchicalActionSet> parseChangedSourceCodeWithGumTree2(File prevFile, File revFile) {
List<HierarchicalActionSet> actionSets = new ArrayList<>();
// GumTree results
List<Action> gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTreeForCCode(prevFile, revFile);
List<Action> gumTreeResults = new GumTreeComparer().compareTwoFilesWithGumTree(prevFile, revFile);
if (gumTreeResults == null) {
this.resultType = 1;
return null;
@@ -60,7 +59,7 @@ public class FixedViolationParser extends Parser {
return actionSets;
} else {
// Regroup GumTre results.
List<HierarchicalActionSet> allActionSets = new HierarchicalRegrouperForC().regroupGumTreeResults(gumTreeResults);
List<HierarchicalActionSet> allActionSets = new HierarchicalRegrouper().regroupGumTreeResults(gumTreeResults);
// for (HierarchicalActionSet actionSet : allActionSets) {
// String astNodeType = actionSet.getAstNodeType();
// if (astNodeType.endsWith("Statement") || "FieldDeclaration".equals(astNodeType)) {
@@ -6,111 +6,132 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import edu.lu.uni.serval.FixPatternParser.RunnableParser;
import edu.lu.uni.serval.MultipleThreadsParser.MessageFile;
import edu.lu.uni.serval.config.Configuration;
import edu.lu.uni.serval.utils.FileHelper;
import org.eclipse.jgit.revwalk.RevCommit;
public class TestHunkParser {
public static void main(String[] args) {
// input data
String pjName = "linux";
final List<MessageFile> msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT +pjName +"/");
System.out.println(msgFiles.size());
File folder = new File("/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput");
File[] listOfFiles = folder.listFiles();
Stream<File> stream = Arrays.stream(listOfFiles);
List<File> folders = stream
.filter(x -> !x.getName().startsWith("."))
.collect(Collectors.toList());
// output path
final String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE;
final String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE;
final String buggyTokensFilePath = Configuration.BUGGY_CODE_TOKENS_FILE;
final String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE;
final String alarmTypesFilePath = Configuration.ALARM_TYPES_FILE;
List<File> targetList = new ArrayList<File>();
for (File f:folders){
for(File f1 :f.listFiles()){
if (!f1.getName().startsWith(".")){
targetList.add(f1);
}
}
}
// FileHelper.deleteDirectory(editScriptsFilePath);
// FileHelper.deleteDirectory(patchesSourceCodeFilePath);
// FileHelper.deleteDirectory(buggyTokensFilePath);
// FileHelper.deleteDirectory(editScriptSizesFilePath);
// FileHelper.deleteDirectory(alarmTypesFilePath);
StringBuilder astEditScripts = new StringBuilder();
StringBuilder tokens = new StringBuilder();
StringBuilder sizes = new StringBuilder();
StringBuilder patches = new StringBuilder();
StringBuilder alarmTypes = new StringBuilder();
int a = 0;
for (File target : targetList) {
final List<MessageFile> msgFiles = getMessageFiles(target.toString() + "/"); //"/Users/anilkoyuncu/bugStudy/code/python/GumTreeInput/Apache/CAMEL/"
System.out.println(msgFiles.size());
if(msgFiles.size() == 0)
continue;
String pjName = target.getName();
// output path
String GUM_TREE_OUTPUT = Configuration.ROOT_PATH + pjName + "/";
final String editScriptsFilePath = GUM_TREE_OUTPUT + "editScripts.list";
final String patchesSourceCodeFilePath =GUM_TREE_OUTPUT + "patchSourceCode.list";
final String buggyTokensFilePath = GUM_TREE_OUTPUT + "tokens.list";
final String editScriptSizesFilePath = GUM_TREE_OUTPUT + "editScriptSizes.csv";
final String alarmTypesFilePath = GUM_TREE_OUTPUT + "alarmTypes.list";
FileHelper.deleteDirectory(editScriptsFilePath);
FileHelper.deleteDirectory(patchesSourceCodeFilePath);
FileHelper.deleteDirectory(buggyTokensFilePath);
FileHelper.deleteDirectory(editScriptSizesFilePath);
FileHelper.deleteDirectory(alarmTypesFilePath);
StringBuilder astEditScripts = new StringBuilder();
StringBuilder tokens = new StringBuilder();
StringBuilder sizes = new StringBuilder();
StringBuilder patches = new StringBuilder();
StringBuilder alarmTypes = new StringBuilder();
int a = 0;
// int counter = 0;
for (MessageFile msgFile : msgFiles) {
FixedViolationHunkParser parser = new FixedViolationHunkParser();
final ExecutorService executor = Executors.newSingleThreadExecutor();
// schedule the work
final Future<?> future = executor.submit(new RunnableParser(msgFile.getPrevFile(),
msgFile.getRevFile(), msgFile.getDiffEntryFile(), parser));
try {
// where we wait for task to complete
future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS);
String editScripts = parser.getAstEditScripts();
if (!editScripts.equals("")) {
astEditScripts.append(editScripts);
tokens.append(parser.getTokensOfSourceCode());
sizes.append(parser.getSizes());
patches.append(parser.getPatchesSourceCode());
alarmTypes.append(parser.getAlarmTypes());
for (MessageFile msgFile : msgFiles) {
FixedViolationHunkParser parser = new FixedViolationHunkParser();
final ExecutorService executor = Executors.newSingleThreadExecutor();
// schedule the work
final Future<?> future = executor.submit(new RunnableParser(msgFile.getPrevFile(),
msgFile.getRevFile(), msgFile.getDiffEntryFile(), parser));
try {
// where we wait for task to complete
future.get(Configuration.SECONDS_TO_WAIT, TimeUnit.SECONDS);
String editScripts = parser.getAstEditScripts();
if (!editScripts.equals("")) {
astEditScripts.append(editScripts);
tokens.append(parser.getTokensOfSourceCode());
sizes.append(parser.getSizes());
patches.append(parser.getPatchesSourceCode());
alarmTypes.append(parser.getAlarmTypes());
a++;
if (a % 10 == 0) {
FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true);
FileHelper.outputToFile(buggyTokensFilePath, tokens, true);
FileHelper.outputToFile(editScriptSizesFilePath, sizes, true);
FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true);
FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true);
astEditScripts.setLength(0);
tokens.setLength(0);
sizes.setLength(0);
patches.setLength(0);
alarmTypes.setLength(0);
System.out.println("Finish of parsing " + a + " files......");
}
}
} catch (TimeoutException e) {
err.println("task timed out");
future.cancel(true /* mayInterruptIfRunning */);
} catch (InterruptedException e) {
err.println("task interrupted");
} catch (ExecutionException e) {
err.println("task aborted");
} finally {
executor.shutdownNow();
}
}
FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true);
FileHelper.outputToFile(buggyTokensFilePath, tokens, true);
FileHelper.outputToFile(editScriptSizesFilePath, sizes, true);
FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true);
FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true);
astEditScripts.setLength(0);
tokens.setLength(0);
sizes.setLength(0);
patches.setLength(0);
alarmTypes.setLength(0);
System.out.println(a);
a ++;
if (a % 100 == 0) {
FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true);
FileHelper.outputToFile(buggyTokensFilePath, tokens, true);
FileHelper.outputToFile(editScriptSizesFilePath, sizes, true);
FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true);
FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true);
astEditScripts.setLength(0);
tokens.setLength(0);
sizes.setLength(0);
patches.setLength(0);
alarmTypes.setLength(0);
System.out.println("Finish of parsing " + a + " files......");
}
}
} catch (TimeoutException e) {
err.println("task timed out");
future.cancel(true /* mayInterruptIfRunning */ );
} catch (InterruptedException e) {
err.println("task interrupted");
} catch (ExecutionException e) {
err.println("task aborted");
} finally {
executor.shutdownNow();
}
}
FileHelper.outputToFile(editScriptsFilePath, astEditScripts, true);
FileHelper.outputToFile(buggyTokensFilePath, tokens, true);
FileHelper.outputToFile(editScriptSizesFilePath, sizes, true);
FileHelper.outputToFile(patchesSourceCodeFilePath, patches, true);
FileHelper.outputToFile(alarmTypesFilePath, alarmTypes, true);
astEditScripts.setLength(0);
tokens.setLength(0);
sizes.setLength(0);
patches.setLength(0);
alarmTypes.setLength(0);
System.out.println(a);
// classifyByAlarmTypes();
}
}
@@ -119,21 +140,25 @@ public class TestHunkParser {
File revFilesPath = new File(inputPath + "revFiles/");
File[] revFiles = revFilesPath.listFiles(); // project folders
List<MessageFile> msgFiles = new ArrayList<>();
for (File revFile : revFiles) {
if (revFiles.length >= 0) {
for (File revFile : revFiles) {
// if (revFile.getName().endsWith(".java")) {
String fileName = revFile.getName();
File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file
fileName = fileName.replace(".java", ".txt");
File diffentryFile = new File(gumTreeInput + "diffentries/" + fileName); // DiffEntry file
File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file
MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile);
msgFile.setPositionFile(positionFile);
msgFiles.add(msgFile);
String fileName = revFile.getName();
File prevFile = new File(gumTreeInput + "prevFiles/prev_" + fileName);// previous file
fileName = fileName.replace(".java", ".txt");
File diffentryFile = new File(gumTreeInput + "DiffEntries/" + fileName); // DiffEntry file
File positionFile = new File(gumTreeInput + "positions/" + fileName); // position file
MessageFile msgFile = new MessageFile(revFile, prevFile, diffentryFile);
msgFile.setPositionFile(positionFile);
msgFiles.add(msgFile);
// }
}
return msgFiles;
}
return msgFiles;
}
else{
return null;
}
}
public static void classifyByAlarmTypes() {
@@ -52,8 +52,8 @@ public class AkkaParser2 {
// String GIT_REPOSITORY_PATH = "/Users/anilkoyuncu/bugLocalizationStudy/irblsensitivity/data/" + pjPath + "/" + pjName + "/gitrepo/.git";
log.info(pjName);
// input data
log.info("Get the input data..." + Configuration.GUM_TREE_INPUT +pjName +"/");
final List<MessageFile> msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT +pjName +"/");
log.info("Get the input data..." + Configuration.GUM_TREE_INPUT );
final List<MessageFile> msgFiles = getMessageFiles(Configuration.GUM_TREE_INPUT);
log.info("MessageFiles: " + msgFiles.size());
// output path
@@ -6,6 +6,7 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import edu.lu.uni.serval.utils.FileHelper;
@@ -101,6 +102,7 @@ public class DiffEntryReader {
while ((line = reader.readLine()) != null) {
if (RegExp.filterSignal(line.trim())) {
// line = Pattern.compile("^@@\\s\\-\\d+,*\\d*\\s\\+\\d+,*\\d*\\s@@").split(line)[1];
sourceCode = true;
if (hunk.length() > 0) {
if (startLine > 0) {
@@ -9,7 +9,7 @@ public class RegExp {
public static boolean filterSignal(String string) {
boolean flag = false;
Matcher res = pattern.matcher(string);
if (res.matches()) {
flag = true;