new change for big data

This commit is contained in:
fixminer
2018-09-10 18:08:50 +02:00
parent bf19c4c02f
commit a8885c5eea
6 changed files with 52 additions and 44 deletions
@@ -10,6 +10,8 @@ import redis.clients.jedis.ScanParams;
import redis.clients.jedis.ScanResult;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List;
import static edu.lu.uni.serval.FixPatternParser.cluster.TreeLoaderClusterL1.poolConfig;
@@ -44,7 +46,7 @@ public class CalculatePairs {
ScanParams sc = new ScanParams();
//150000000
sc.count(150000000);
sc.count(1500000000);
sc.match("*");
scan = outer.scan("0", sc);
@@ -59,26 +61,9 @@ public class CalculatePairs {
byte [] buf = new byte[0];
String line = null;
try {
FileOutputStream fos = new FileOutputStream(outputPath + "/" +pjName+".csv");
DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(fos));
for (int i = 0; i < result.size(); i++) {
for (int j = i + 1; j < result.size(); j++) {
line = String.valueOf(i) +"," + String.valueOf(j) + "," + result.get(i) + "," + result.get(j)+"\n";
outStream.write(line.getBytes());
}
}
outStream.close();
// int fileCounter = 0;
// FileChannel rwChannel = new RandomAccessFile(outputPath + "/" +pjName +String.valueOf(fileCounter)+".txt", "rw").getChannel();
// int maxSize = 500*500000;
// ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
// FileOutputStream fos = new FileOutputStream(outputPath + "/" +pjName+".csv");
// DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(fos));
//
//
//
// for (int i = 0; i < result.size(); i++) {
@@ -86,23 +71,40 @@ public class CalculatePairs {
//
//
//
// line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + result.get(i) + "\t" + result.get(j)+"\n";
// buf = line.getBytes();
// if(wrBuf.remaining() > 500) {
// wrBuf.put(buf);
// }else{
// log.info("Next pair dump");
// fileCounter++;
// rwChannel = new RandomAccessFile(outputPath+"/" +pjName+String.valueOf(fileCounter)+".txt", "rw").getChannel();
// wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
// }
//
//
//
// line = String.valueOf(i) +"," + String.valueOf(j) + "," + result.get(i) + "," + result.get(j)+"\n";
// outStream.write(line.getBytes());
//
// }
// }
// rwChannel.close();
// outStream.close();
int fileCounter = 0;
FileChannel rwChannel = new RandomAccessFile(outputPath + "/" +pjName +String.valueOf(fileCounter)+".txt", "rw").getChannel();
int maxSize = 500*500000;
ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
for (int i = 0; i < result.size(); i++) {
for (int j = i + 1; j < result.size(); j++) {
line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + result.get(i) + "\t" + result.get(j)+"\n";
buf = line.getBytes();
if(wrBuf.remaining() > 500) {
wrBuf.put(buf);
}else{
log.info("Next pair dump");
fileCounter++;
rwChannel = new RandomAccessFile(outputPath+"/" +pjName+String.valueOf(fileCounter)+".txt", "rw").getChannel();
wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, maxSize);
}
}
}
rwChannel.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
@@ -32,7 +32,7 @@ public class ImportPairs2DB {
File[] subFolders = folder.listFiles();
Stream<File> stream = Arrays.stream(subFolders);
List<File> pjs = stream
.filter(x -> x.getName().endsWith(".csv"))
.filter(x -> x.getName().endsWith(".txt"))
.collect(Collectors.toList());
Integer portInt = Integer.valueOf(portInner);
@@ -72,7 +72,7 @@ public class StoreFile {
File[] files = pj.listFiles();
Stream<File> fileStream = Arrays.stream(files);
List<File> fs;
if (operation.equals("ALL")){
if (operation.equals("ALLOP")){
fs= fileStream
.filter(x -> x.getName().startsWith("UPD") ||
x.getName().startsWith("INS") ||
@@ -565,6 +565,8 @@ public class MultiThreadTreeLoaderCluster3 {
CharSequence[] oldSequences = oldTokens.toArray(new CharSequence[oldTokens.size()]);
CharSequence[] newSequences = newTokens.toArray(new CharSequence[newTokens.size()]);
JaroWinklerDistance jwd = new JaroWinklerDistance();
LevenshteinDistance ld = new LevenshteinDistance();
Double overallSimi = Double.valueOf(1);
if(oldSequences.length > 0 && (oldSequences.length == newSequences.length)){
for (int idx = 0; idx < newSequences.length; idx++) {
@@ -55,6 +55,7 @@ public class TestHunkParser {
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/INS");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/DEL");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/MOV");
FileHelper.createDirectory(GUM_TREE_OUTPUT + "/ALL");
// FileHelper.deleteDirectory(editScriptsFilePath);
// FileHelper.deleteDirectory(patchesSourceCodeFilePath);
// FileHelper.deleteDirectory(buggyTokensFilePath);
+10 -7
View File
@@ -77,6 +77,13 @@ public class Launcher {
case "CACHE":
StoreFile.main(gumOutput, portInner, serverWait, dbDir, actionType+dumpsName,actionType);
break;
case "COMP":
CalculatePairs.main(serverWait, dbDir, actionType+dumpsName, portInner, pairsPath+actionType, pjName+actionType);
ImportPairs2DB.main(pairsPath+actionType, portInner, serverWait, dbDir,datasetPath);
AkkaTreeLoader.main(portInner, serverWait, dbDir, pjName +actionType+".csv.rdb" , port, actionType+dumpsName);
case "LEVEL1":
level1(portInner, serverWait, port, pythonPath, datasetPath, pjName, actionType, threshold, dbDir, pairsPath, dumpsName, gumInput);
break;
@@ -112,13 +119,9 @@ public class Launcher {
}
private static void level1(String portInner, String serverWait, String port, String pythonPath, String datasetPath, String pjName, String actionType, String threshold, String dbDir, String pairsPath, String dumpsName, String gumInput) throws Exception {
// CalculatePairs.main(serverWait, dbDir, actionType+dumpsName, portInner, pairsPath+actionType, pjName+actionType);
//
// ImportPairs2DB.main(pairsPath+actionType, portInner, serverWait, dbDir,datasetPath);
//
// AkkaTreeLoader.main(portInner, serverWait, dbDir, pjName +actionType+".csv.rdb" , port, actionType+dumpsName);
//
// TreeLoaderClusterL1.main(portInner, serverWait, port, dbDir, "level1-"+pjName+ actionType+".rdb", dbDir ,pjName + actionType);
TreeLoaderClusterL1.main(portInner, serverWait, port, dbDir, "level1-"+pjName+ actionType+".rdb", dbDir ,pjName + actionType);
CallShell cs1 =new CallShell();
String db1 = "bash "+dbDir + "/" + "startServer.sh" +" %s %s %s";