Update token vectorization of buggy code.

This commit is contained in:
Kui LIU
2017-08-01 11:29:39 +02:00
parent cb9098067d
commit 0431cd4fbb
8 changed files with 51 additions and 23 deletions
+1 -1
View File
@@ -10,6 +10,6 @@ OUTPUT/
*.list
*.csv
*.pdf
Dataset/
# Package Files #
*.jar
@@ -165,10 +165,22 @@ public class HunkParser {
}
if (children.isEmpty()) {
tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " ";
if ("StringLiteral".equals(astNodeType)) {
tokens += astNodeType + " stringLiteral ";
} else if ("CharacterLiteral".equals(astNodeType)) {
tokens += astNodeType + " charLiteral ";
} else if ("ArrayInitializer".equals(astNodeType)) {
tokens += astNodeType + " arrayInitializer ";
} else {
tokens += astNodeType + " " + simpleTree.getLabel() + " ";
}
} else {
for (SimpleTree child : children) {
tokens += getTokensDeepFirst(child);
if ("ArrayInitializer".equals(astNodeType)) {
tokens += astNodeType + " arrayInitializer ";
} else {
for (SimpleTree child : children) {
tokens += getTokensDeepFirst(child);
}
}
}
return tokens;
@@ -150,7 +150,7 @@ public class SingleStatementParser {
String patchSourceCode = getPatchSourceCode(hunk, startLine, endLine, startLine2, endLine2);
if ("".equals(patchSourceCode)) continue;
this.patchesSourceCode += Configuration.PATCH_TOKEN +"\n" + revFile.getName() + "\n" + patchSourceCode + "\n";
this.patchesSourceCode += Configuration.PATCH_TOKEN + "\n" + patchSourceCode + "\n";
this.sizes += size + "\n";
this.astEditScripts += astEditScripts + "\n";
// 2. source code: raw tokens
@@ -276,10 +276,22 @@ public class SingleStatementParser {
}
if (children.isEmpty()) {
tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " ";
if ("StringLiteral".equals(astNodeType)) {
tokens += astNodeType + " stringLiteral ";
} else if ("CharacterLiteral".equals(astNodeType)) {
tokens += astNodeType + " charLiteral ";
} else if ("ArrayInitializer".equals(astNodeType)) {
tokens += astNodeType + " arrayInitializer ";
} else {
tokens += astNodeType + " " + simpleTree.getLabel() + " ";
}
} else {
for (SimpleTree child : children) {
tokens += getTokensDeepFirst(child);
if ("ArrayInitializer".equals(astNodeType)) {
tokens += astNodeType + " arrayInitializer ";
} else {
for (SimpleTree child : children) {
tokens += getTokensDeepFirst(child);
}
}
}
return tokens;
@@ -25,11 +25,11 @@ public class AkkaParser {
// output path
final String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE_PATH;
final String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH;
final String buggyTreesFilePath = Configuration.BUGGYTREE_FILE_PATH;
final String buggyTokensFilePath = Configuration.BUGGY_CODE_TOKEN_FILE_PATH;
final String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH;
FileHelper.deleteDirectory(editScriptsFilePath);
FileHelper.deleteDirectory(patchesSourceCodeFilePath);
FileHelper.deleteDirectory(buggyTreesFilePath);
FileHelper.deleteDirectory(buggyTokensFilePath);
FileHelper.deleteDirectory(editScriptSizesFilePath);
ActorSystem system = null;
@@ -39,7 +39,7 @@ public class AkkaParser {
try {
log.info("Akka begins...");
system = ActorSystem.create("Mining-FixPattern-System");
parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath), "mine-fix-pattern-actor");
parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath), "mine-fix-pattern-actor");
parsingActor.tell(msg, ActorRef.noSender());
} catch (Exception e) {
system.shutdown();
@@ -20,13 +20,13 @@ public class ParseFixPatternActor extends UntypedActor {
private final int numberOfWorkers;
private int counter = 0;
public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTreesFilePath, String editScriptSizesFilePath) {
public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) {
mineRouter = this.getContext().actorOf(new RoundRobinPool(numberOfWorkers)
.props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath)), "mine-fix-pattern-router");
.props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath)), "mine-fix-pattern-router");
this.numberOfWorkers = numberOfWorkers;
}
public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, final String buggyTreesFilePath, final String editScriptSizesFilePath) {
public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, final String buggyTokensFilePath, final String editScriptSizesFilePath) {
return Props.create(new Creator<ParseFixPatternActor>() {
@@ -34,7 +34,7 @@ public class ParseFixPatternActor extends UntypedActor {
@Override
public ParseFixPatternActor create() throws Exception {
return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath);
return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath);
}
});
@@ -18,23 +18,23 @@ public class ParseFixPatternWorker extends UntypedActor {
private String editScriptsFilePath;
private String patchesSourceCodeFilePath;
private String editScriptSizesFilePath;
private String buggyTreesFilePath;
private String buggyTokensFilePath;
public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTreesFilePath, String editScriptSizesFilePath) {
public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) {
this.editScriptsFilePath = editScriptsFilePath;
this.patchesSourceCodeFilePath = patchesSourceCodeFilePath;
this.editScriptSizesFilePath = editScriptSizesFilePath;
this.buggyTreesFilePath = buggyTreesFilePath;
this.buggyTokensFilePath = buggyTokensFilePath;
}
public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTreesFilePath, final String editScriptSizesFilePath) {
public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTokensFilePath, final String editScriptSizesFilePath) {
return Props.create(new Creator<ParseFixPatternWorker>() {
private static final long serialVersionUID = -7615153844097275009L;
@Override
public ParseFixPatternWorker create() throws Exception {
return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, buggyTreesFilePath, editScriptSizesFilePath);
return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, buggyTokensFilePath, editScriptSizesFilePath);
}
});
@@ -72,7 +72,7 @@ public class ParseFixPatternWorker extends UntypedActor {
FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true);
FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true);
// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true);
FileHelper.outputToFile(buggyTreesFilePath + "tokens_" + id + ".list", tokens, true);
FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true);
editScripts.setLength(0);
patchesSourceCode.setLength(0);
sizes.setLength(0);
@@ -85,7 +85,7 @@ public class ParseFixPatternWorker extends UntypedActor {
FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true);
FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true);
// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true);
FileHelper.outputToFile(buggyTreesFilePath + "tokens_" + id + ".list", tokens, true);
FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true);
log.info("Worker #" + id + " finished the work...");
this.getSender().tell("STOP", getSelf());
@@ -8,6 +8,7 @@ public class Configuration {
public static final String PATCH_TOKEN = "PATCH###";
public static int MAXZ_SIZE = 0;
public static int TOKEN_VECTOR_SIZE = 0;
public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN = 100;
// input path of GumTree. (i.e., Fix patterns parser)
@@ -18,11 +19,13 @@ public class Configuration {
public static final String EDITSCRIPTS_FILE_PATH = GUM_TREE_OUTPUT + "editScripts/";
public static final String PATCH_SOURCECODE_FILE_PATH = GUM_TREE_OUTPUT + "sourceCode/";
public static final String BUGGYTREE_FILE_PATH = GUM_TREE_OUTPUT + "buggyTrees/";
public static final String BUGGY_CODE_TOKEN_FILE_PATH = GUM_TREE_OUTPUT + "tokens/";
public static final String EDITSCRIPT_SIZES_FILE_PATH = GUM_TREE_OUTPUT + "editScriptSizes/";
public static final String EDITSCRIPTS_FILE = GUM_TREE_OUTPUT + "editScripts.list";
public static final String PATCH_SOURCECODE_FILE = GUM_TREE_OUTPUT + "patchSourceCode.list";
public static final String BUGGYTREES_FILE = GUM_TREE_OUTPUT + "buggyTrees.list";
public static final String BUGGY_CODY_TOKENS_FILE = GUM_TREE_OUTPUT + "tokens.list";
public static final String EDITSCRIPT_SIZES_FILE = GUM_TREE_OUTPUT + "editScriptSizes.list";
// the input path of fix patterns mining.
@@ -31,6 +34,7 @@ public class Configuration {
public static final String EMBEDDING_INPUT = MINING_INPUT + "Embedding/";
public static final String SELECTED_PATCHES_SOURE_CODE_FILE = EMBEDDING_INPUT + "patchSourceCode.list";
public static final String SELECTED_BUGGY_TREE_FILE = EMBEDDING_INPUT + "buggyTrees.list";
public static final String SELECTED_BUGGY_TOKEN_FILE = EMBEDDING_INPUT + "tokens.list";
public static final String SELECTED_EDITSCRIPTES_FILE = EMBEDDING_INPUT + "editScripts.list";
// the input path of feature learning.
public static final String FEATURE_LEARNING_INPUT = MINING_INPUT + "FeatureLearning/";
@@ -103,7 +103,7 @@ public class SimplifyTree {
return simpleTree;
}
private SimpleTree canonicalizeSourceCodeTree(ITree tree, SimpleTree parent) {
public SimpleTree canonicalizeSourceCodeTree(ITree tree, SimpleTree parent) {
SimpleTree simpleTree = new SimpleTree();
String label = tree.getLabel();