Update token vectorization of buggy code.
This commit is contained in:
+1
-1
@@ -10,6 +10,6 @@ OUTPUT/
|
||||
*.list
|
||||
*.csv
|
||||
*.pdf
|
||||
|
||||
Dataset/
|
||||
# Package Files #
|
||||
*.jar
|
||||
|
||||
@@ -165,10 +165,22 @@ public class HunkParser {
|
||||
}
|
||||
|
||||
if (children.isEmpty()) {
|
||||
tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " ";
|
||||
if ("StringLiteral".equals(astNodeType)) {
|
||||
tokens += astNodeType + " stringLiteral ";
|
||||
} else if ("CharacterLiteral".equals(astNodeType)) {
|
||||
tokens += astNodeType + " charLiteral ";
|
||||
} else if ("ArrayInitializer".equals(astNodeType)) {
|
||||
tokens += astNodeType + " arrayInitializer ";
|
||||
} else {
|
||||
tokens += astNodeType + " " + simpleTree.getLabel() + " ";
|
||||
}
|
||||
} else {
|
||||
for (SimpleTree child : children) {
|
||||
tokens += getTokensDeepFirst(child);
|
||||
if ("ArrayInitializer".equals(astNodeType)) {
|
||||
tokens += astNodeType + " arrayInitializer ";
|
||||
} else {
|
||||
for (SimpleTree child : children) {
|
||||
tokens += getTokensDeepFirst(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
|
||||
@@ -150,7 +150,7 @@ public class SingleStatementParser {
|
||||
String patchSourceCode = getPatchSourceCode(hunk, startLine, endLine, startLine2, endLine2);
|
||||
if ("".equals(patchSourceCode)) continue;
|
||||
|
||||
this.patchesSourceCode += Configuration.PATCH_TOKEN +"\n" + revFile.getName() + "\n" + patchSourceCode + "\n";
|
||||
this.patchesSourceCode += Configuration.PATCH_TOKEN + "\n" + patchSourceCode + "\n";
|
||||
this.sizes += size + "\n";
|
||||
this.astEditScripts += astEditScripts + "\n";
|
||||
// 2. source code: raw tokens
|
||||
@@ -276,10 +276,22 @@ public class SingleStatementParser {
|
||||
}
|
||||
|
||||
if (children.isEmpty()) {
|
||||
tokens += simpleTree.getNodeType() + " " + simpleTree.getLabel() + " ";
|
||||
if ("StringLiteral".equals(astNodeType)) {
|
||||
tokens += astNodeType + " stringLiteral ";
|
||||
} else if ("CharacterLiteral".equals(astNodeType)) {
|
||||
tokens += astNodeType + " charLiteral ";
|
||||
} else if ("ArrayInitializer".equals(astNodeType)) {
|
||||
tokens += astNodeType + " arrayInitializer ";
|
||||
} else {
|
||||
tokens += astNodeType + " " + simpleTree.getLabel() + " ";
|
||||
}
|
||||
} else {
|
||||
for (SimpleTree child : children) {
|
||||
tokens += getTokensDeepFirst(child);
|
||||
if ("ArrayInitializer".equals(astNodeType)) {
|
||||
tokens += astNodeType + " arrayInitializer ";
|
||||
} else {
|
||||
for (SimpleTree child : children) {
|
||||
tokens += getTokensDeepFirst(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
|
||||
@@ -25,11 +25,11 @@ public class AkkaParser {
|
||||
// output path
|
||||
final String editScriptsFilePath = Configuration.EDITSCRIPTS_FILE_PATH;
|
||||
final String patchesSourceCodeFilePath = Configuration.PATCH_SOURCECODE_FILE_PATH;
|
||||
final String buggyTreesFilePath = Configuration.BUGGYTREE_FILE_PATH;
|
||||
final String buggyTokensFilePath = Configuration.BUGGY_CODE_TOKEN_FILE_PATH;
|
||||
final String editScriptSizesFilePath = Configuration.EDITSCRIPT_SIZES_FILE_PATH;
|
||||
FileHelper.deleteDirectory(editScriptsFilePath);
|
||||
FileHelper.deleteDirectory(patchesSourceCodeFilePath);
|
||||
FileHelper.deleteDirectory(buggyTreesFilePath);
|
||||
FileHelper.deleteDirectory(buggyTokensFilePath);
|
||||
FileHelper.deleteDirectory(editScriptSizesFilePath);
|
||||
|
||||
ActorSystem system = null;
|
||||
@@ -39,7 +39,7 @@ public class AkkaParser {
|
||||
try {
|
||||
log.info("Akka begins...");
|
||||
system = ActorSystem.create("Mining-FixPattern-System");
|
||||
parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath), "mine-fix-pattern-actor");
|
||||
parsingActor = system.actorOf(ParseFixPatternActor.props(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath), "mine-fix-pattern-actor");
|
||||
parsingActor.tell(msg, ActorRef.noSender());
|
||||
} catch (Exception e) {
|
||||
system.shutdown();
|
||||
|
||||
@@ -20,13 +20,13 @@ public class ParseFixPatternActor extends UntypedActor {
|
||||
private final int numberOfWorkers;
|
||||
private int counter = 0;
|
||||
|
||||
public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTreesFilePath, String editScriptSizesFilePath) {
|
||||
public ParseFixPatternActor(int numberOfWorkers, String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) {
|
||||
mineRouter = this.getContext().actorOf(new RoundRobinPool(numberOfWorkers)
|
||||
.props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath)), "mine-fix-pattern-router");
|
||||
.props(ParseFixPatternWorker.props(editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath)), "mine-fix-pattern-router");
|
||||
this.numberOfWorkers = numberOfWorkers;
|
||||
}
|
||||
|
||||
public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, final String buggyTreesFilePath, final String editScriptSizesFilePath) {
|
||||
public static Props props(final int numberOfWorkers, final String editScriptsFilePath, final String patchesSourceCodeFilePath, final String buggyTokensFilePath, final String editScriptSizesFilePath) {
|
||||
|
||||
return Props.create(new Creator<ParseFixPatternActor>() {
|
||||
|
||||
@@ -34,7 +34,7 @@ public class ParseFixPatternActor extends UntypedActor {
|
||||
|
||||
@Override
|
||||
public ParseFixPatternActor create() throws Exception {
|
||||
return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTreesFilePath, editScriptSizesFilePath);
|
||||
return new ParseFixPatternActor(numberOfWorkers, editScriptsFilePath, patchesSourceCodeFilePath, buggyTokensFilePath, editScriptSizesFilePath);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
@@ -18,23 +18,23 @@ public class ParseFixPatternWorker extends UntypedActor {
|
||||
private String editScriptsFilePath;
|
||||
private String patchesSourceCodeFilePath;
|
||||
private String editScriptSizesFilePath;
|
||||
private String buggyTreesFilePath;
|
||||
private String buggyTokensFilePath;
|
||||
|
||||
public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTreesFilePath, String editScriptSizesFilePath) {
|
||||
public ParseFixPatternWorker(String editScriptsFilePath, String patchesSourceCodeFilePath, String buggyTokensFilePath, String editScriptSizesFilePath) {
|
||||
this.editScriptsFilePath = editScriptsFilePath;
|
||||
this.patchesSourceCodeFilePath = patchesSourceCodeFilePath;
|
||||
this.editScriptSizesFilePath = editScriptSizesFilePath;
|
||||
this.buggyTreesFilePath = buggyTreesFilePath;
|
||||
this.buggyTokensFilePath = buggyTokensFilePath;
|
||||
}
|
||||
|
||||
public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTreesFilePath, final String editScriptSizesFilePath) {
|
||||
public static Props props(final String editScriptsFile, final String patchesSourceCodeFile, final String buggyTokensFilePath, final String editScriptSizesFilePath) {
|
||||
return Props.create(new Creator<ParseFixPatternWorker>() {
|
||||
|
||||
private static final long serialVersionUID = -7615153844097275009L;
|
||||
|
||||
@Override
|
||||
public ParseFixPatternWorker create() throws Exception {
|
||||
return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, buggyTreesFilePath, editScriptSizesFilePath);
|
||||
return new ParseFixPatternWorker(editScriptsFile, patchesSourceCodeFile, buggyTokensFilePath, editScriptSizesFilePath);
|
||||
}
|
||||
|
||||
});
|
||||
@@ -72,7 +72,7 @@ public class ParseFixPatternWorker extends UntypedActor {
|
||||
FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true);
|
||||
FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true);
|
||||
// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true);
|
||||
FileHelper.outputToFile(buggyTreesFilePath + "tokens_" + id + ".list", tokens, true);
|
||||
FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true);
|
||||
editScripts.setLength(0);
|
||||
patchesSourceCode.setLength(0);
|
||||
sizes.setLength(0);
|
||||
@@ -85,7 +85,7 @@ public class ParseFixPatternWorker extends UntypedActor {
|
||||
FileHelper.outputToFile(patchesSourceCodeFilePath + "patches_" + id + ".list", patchesSourceCode, true);
|
||||
FileHelper.outputToFile(editScriptSizesFilePath + "sizes_" + id + ".list", sizes, true);
|
||||
// FileHelper.outputToFile(buggyTreesFilePath + "buggyTrees_" + id + ".list", buggyTrees, true);
|
||||
FileHelper.outputToFile(buggyTreesFilePath + "tokens_" + id + ".list", tokens, true);
|
||||
FileHelper.outputToFile(buggyTokensFilePath + "tokens_" + id + ".list", tokens, true);
|
||||
|
||||
log.info("Worker #" + id + " finished the work...");
|
||||
this.getSender().tell("STOP", getSelf());
|
||||
|
||||
@@ -8,6 +8,7 @@ public class Configuration {
|
||||
public static final String PATCH_TOKEN = "PATCH###";
|
||||
|
||||
public static int MAXZ_SIZE = 0;
|
||||
public static int TOKEN_VECTOR_SIZE = 0;
|
||||
public static final int VECTOR_SIZE_OF_EMBEDED_TOKEN = 100;
|
||||
|
||||
// input path of GumTree. (i.e., Fix patterns parser)
|
||||
@@ -18,11 +19,13 @@ public class Configuration {
|
||||
public static final String EDITSCRIPTS_FILE_PATH = GUM_TREE_OUTPUT + "editScripts/";
|
||||
public static final String PATCH_SOURCECODE_FILE_PATH = GUM_TREE_OUTPUT + "sourceCode/";
|
||||
public static final String BUGGYTREE_FILE_PATH = GUM_TREE_OUTPUT + "buggyTrees/";
|
||||
public static final String BUGGY_CODE_TOKEN_FILE_PATH = GUM_TREE_OUTPUT + "tokens/";
|
||||
public static final String EDITSCRIPT_SIZES_FILE_PATH = GUM_TREE_OUTPUT + "editScriptSizes/";
|
||||
|
||||
public static final String EDITSCRIPTS_FILE = GUM_TREE_OUTPUT + "editScripts.list";
|
||||
public static final String PATCH_SOURCECODE_FILE = GUM_TREE_OUTPUT + "patchSourceCode.list";
|
||||
public static final String BUGGYTREES_FILE = GUM_TREE_OUTPUT + "buggyTrees.list";
|
||||
public static final String BUGGY_CODY_TOKENS_FILE = GUM_TREE_OUTPUT + "tokens.list";
|
||||
public static final String EDITSCRIPT_SIZES_FILE = GUM_TREE_OUTPUT + "editScriptSizes.list";
|
||||
|
||||
// the input path of fix patterns mining.
|
||||
@@ -31,6 +34,7 @@ public class Configuration {
|
||||
public static final String EMBEDDING_INPUT = MINING_INPUT + "Embedding/";
|
||||
public static final String SELECTED_PATCHES_SOURE_CODE_FILE = EMBEDDING_INPUT + "patchSourceCode.list";
|
||||
public static final String SELECTED_BUGGY_TREE_FILE = EMBEDDING_INPUT + "buggyTrees.list";
|
||||
public static final String SELECTED_BUGGY_TOKEN_FILE = EMBEDDING_INPUT + "tokens.list";
|
||||
public static final String SELECTED_EDITSCRIPTES_FILE = EMBEDDING_INPUT + "editScripts.list";
|
||||
// the input path of feature learning.
|
||||
public static final String FEATURE_LEARNING_INPUT = MINING_INPUT + "FeatureLearning/";
|
||||
|
||||
@@ -103,7 +103,7 @@ public class SimplifyTree {
|
||||
return simpleTree;
|
||||
}
|
||||
|
||||
private SimpleTree canonicalizeSourceCodeTree(ITree tree, SimpleTree parent) {
|
||||
public SimpleTree canonicalizeSourceCodeTree(ITree tree, SimpleTree parent) {
|
||||
SimpleTree simpleTree = new SimpleTree();
|
||||
|
||||
String label = tree.getLabel();
|
||||
|
||||
Reference in New Issue
Block a user