new change for big data

This commit is contained in:
fixminer
2018-09-10 18:08:50 +02:00
parent bf19c4c02f
commit a8885c5eea
6 changed files with 52 additions and 44 deletions
@@ -10,6 +10,8 @@ import redis.clients.jedis.ScanParams;
import redis.clients.jedis.ScanResult; import redis.clients.jedis.ScanResult;
import java.io.*; import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List; import java.util.List;
import static edu.lu.uni.serval.FixPatternParser.cluster.TreeLoaderClusterL1.poolConfig; import static edu.lu.uni.serval.FixPatternParser.cluster.TreeLoaderClusterL1.poolConfig;
@@ -44,7 +46,7 @@ public class CalculatePairs {
ScanParams sc = new ScanParams(); ScanParams sc = new ScanParams();
//150000000 //150000000
sc.count(150000000); sc.count(1500000000);
sc.match("*"); sc.match("*");
scan = outer.scan("0", sc); scan = outer.scan("0", sc);
@@ -59,26 +61,9 @@ public class CalculatePairs {
byte [] buf = new byte[0]; byte [] buf = new byte[0];
String line = null; String line = null;
try { try {
FileOutputStream fos = new FileOutputStream(outputPath + "/" +pjName+".csv"); // FileOutputStream fos = new FileOutputStream(outputPath + "/" +pjName+".csv");
DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(fos)); // DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(fos));
//
for (int i = 0; i < result.size(); i++) {
for (int j = i + 1; j < result.size(); j++) {
line = String.valueOf(i) +"," + String.valueOf(j) + "," + result.get(i) + "," + result.get(j)+"\n";
outStream.write(line.getBytes());
}
}
outStream.close();
// int fileCounter = 0;
// FileChannel rwChannel = new RandomAccessFile(outputPath + "/" +pjName +String.valueOf(fileCounter)+".txt", "rw").getChannel();
// int maxSize = 500*500000;
// ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
// //
// //
// for (int i = 0; i < result.size(); i++) { // for (int i = 0; i < result.size(); i++) {
@@ -86,23 +71,40 @@ public class CalculatePairs {
// //
// //
// //
// line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + result.get(i) + "\t" + result.get(j)+"\n"; // line = String.valueOf(i) +"," + String.valueOf(j) + "," + result.get(i) + "," + result.get(j)+"\n";
// buf = line.getBytes(); // outStream.write(line.getBytes());
// if(wrBuf.remaining() > 500) {
// wrBuf.put(buf);
// }else{
// log.info("Next pair dump");
// fileCounter++;
// rwChannel = new RandomAccessFile(outputPath+"/" +pjName+String.valueOf(fileCounter)+".txt", "rw").getChannel();
// wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
// }
//
//
//
// //
// } // }
// } // }
// rwChannel.close(); // outStream.close();
int fileCounter = 0;
FileChannel rwChannel = new RandomAccessFile(outputPath + "/" +pjName +String.valueOf(fileCounter)+".txt", "rw").getChannel();
int maxSize = 500*500000;
ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
for (int i = 0; i < result.size(); i++) {
for (int j = i + 1; j < result.size(); j++) {
line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + result.get(i) + "\t" + result.get(j)+"\n";
buf = line.getBytes();
if(wrBuf.remaining() > 500) {
wrBuf.put(buf);
}else{
log.info("Next pair dump");
fileCounter++;
rwChannel = new RandomAccessFile(outputPath+"/" +pjName+String.valueOf(fileCounter)+".txt", "rw").getChannel();
wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
}
}
}
rwChannel.close();
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
e.printStackTrace(); e.printStackTrace();
} catch (IOException e) { } catch (IOException e) {
@@ -32,7 +32,7 @@ public class ImportPairs2DB {
File[] subFolders = folder.listFiles(); File[] subFolders = folder.listFiles();
Stream<File> stream = Arrays.stream(subFolders); Stream<File> stream = Arrays.stream(subFolders);
List<File> pjs = stream List<File> pjs = stream
.filter(x -> x.getName().endsWith(".csv")) .filter(x -> x.getName().endsWith(".txt"))
.collect(Collectors.toList()); .collect(Collectors.toList());
Integer portInt = Integer.valueOf(portInner); Integer portInt = Integer.valueOf(portInner);
@@ -72,7 +72,7 @@ public class StoreFile {
File[] files = pj.listFiles(); File[] files = pj.listFiles();
Stream<File> fileStream = Arrays.stream(files); Stream<File> fileStream = Arrays.stream(files);
List<File> fs; List<File> fs;
if (operation.equals("ALL")){ if (operation.equals("ALLOP")){
fs= fileStream fs= fileStream
.filter(x -> x.getName().startsWith("UPD") || .filter(x -> x.getName().startsWith("UPD") ||
x.getName().startsWith("INS") || x.getName().startsWith("INS") ||
@@ -565,6 +565,8 @@ public class MultiThreadTreeLoaderCluster3 {
CharSequence[] oldSequences = oldTokens.toArray(new CharSequence[oldTokens.size()]); CharSequence[] oldSequences = oldTokens.toArray(new CharSequence[oldTokens.size()]);
CharSequence[] newSequences = newTokens.toArray(new CharSequence[newTokens.size()]); CharSequence[] newSequences = newTokens.toArray(new CharSequence[newTokens.size()]);
JaroWinklerDistance jwd = new JaroWinklerDistance(); JaroWinklerDistance jwd = new JaroWinklerDistance();
LevenshteinDistance ld = new LevenshteinDistance();
Double overallSimi = Double.valueOf(1); Double overallSimi = Double.valueOf(1);
if(oldSequences.length > 0 && (oldSequences.length == newSequences.length)){ if(oldSequences.length > 0 && (oldSequences.length == newSequences.length)){
for (int idx = 0; idx < newSequences.length; idx++) { for (int idx = 0; idx < newSequences.length; idx++) {
@@ -55,6 +55,7 @@ public class TestHunkParser {
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/INS"); FileHelper.createDirectory(GUM_TREE_OUTPUT + "/INS");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/DEL"); FileHelper.createDirectory(GUM_TREE_OUTPUT + "/DEL");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/MOV"); FileHelper.createDirectory(GUM_TREE_OUTPUT + "/MOV");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/ALL");
// FileHelper.deleteDirectory(editScriptsFilePath); // FileHelper.deleteDirectory(editScriptsFilePath);
// FileHelper.deleteDirectory(patchesSourceCodeFilePath); // FileHelper.deleteDirectory(patchesSourceCodeFilePath);
// FileHelper.deleteDirectory(buggyTokensFilePath); // FileHelper.deleteDirectory(buggyTokensFilePath);
+10 -7
View File
@@ -77,6 +77,13 @@ public class Launcher {
case "CACHE": case "CACHE":
StoreFile.main(gumOutput, portInner, serverWait, dbDir, actionType+dumpsName,actionType); StoreFile.main(gumOutput, portInner, serverWait, dbDir, actionType+dumpsName,actionType);
break; break;
case "COMP":
CalculatePairs.main(serverWait, dbDir, actionType+dumpsName, portInner, pairsPath+actionType, pjName+actionType);
ImportPairs2DB.main(pairsPath+actionType, portInner, serverWait, dbDir,datasetPath);
AkkaTreeLoader.main(portInner, serverWait, dbDir, pjName +actionType+".csv.rdb" , port, actionType+dumpsName);
case "LEVEL1": case "LEVEL1":
level1(portInner, serverWait, port, pythonPath, datasetPath, pjName, actionType, threshold, dbDir, pairsPath, dumpsName, gumInput); level1(portInner, serverWait, port, pythonPath, datasetPath, pjName, actionType, threshold, dbDir, pairsPath, dumpsName, gumInput);
break; break;
@@ -112,13 +119,9 @@ public class Launcher {
} }
private static void level1(String portInner, String serverWait, String port, String pythonPath, String datasetPath, String pjName, String actionType, String threshold, String dbDir, String pairsPath, String dumpsName, String gumInput) throws Exception { private static void level1(String portInner, String serverWait, String port, String pythonPath, String datasetPath, String pjName, String actionType, String threshold, String dbDir, String pairsPath, String dumpsName, String gumInput) throws Exception {
// CalculatePairs.main(serverWait, dbDir, actionType+dumpsName, portInner, pairsPath+actionType, pjName+actionType);
//
// ImportPairs2DB.main(pairsPath+actionType, portInner, serverWait, dbDir,datasetPath); TreeLoaderClusterL1.main(portInner, serverWait, port, dbDir, "level1-"+pjName+ actionType+".rdb", dbDir ,pjName + actionType);
//
// AkkaTreeLoader.main(portInner, serverWait, dbDir, pjName +actionType+".csv.rdb" , port, actionType+dumpsName);
//
// TreeLoaderClusterL1.main(portInner, serverWait, port, dbDir, "level1-"+pjName+ actionType+".rdb", dbDir ,pjName + actionType);
CallShell cs1 =new CallShell(); CallShell cs1 =new CallShell();
String db1 = "bash "+dbDir + "/" + "startServer.sh" +" %s %s %s"; String db1 = "bash "+dbDir + "/" + "startServer.sh" +" %s %s %s";