memory friendly
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
package edu.lu.uni.serval.FixPatternParser.violations;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by anilkoyuncu on 19/03/2018.
|
||||
*/
|
||||
public class CSVUtils {
|
||||
|
||||
private static final char DEFAULT_SEPARATOR = ',';
|
||||
|
||||
public static void writeLine(Writer w, List<String> values) throws IOException {
|
||||
writeLine(w, values, DEFAULT_SEPARATOR, ' ');
|
||||
}
|
||||
|
||||
public static void writeLine(Writer w, List<String> values, char separators) throws IOException {
|
||||
writeLine(w, values, separators, ' ');
|
||||
}
|
||||
|
||||
//https://tools.ietf.org/html/rfc4180
|
||||
private static String followCVSformat(String value) {
|
||||
|
||||
String result = value;
|
||||
if (result.contains("\"")) {
|
||||
result = result.replace("\"", "\"\"");
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public static void writeLine(Writer w, List<String> values, char separators, char customQuote) throws IOException {
|
||||
|
||||
boolean first = true;
|
||||
|
||||
//default customQuote is empty
|
||||
|
||||
if (separators == ' ') {
|
||||
separators = DEFAULT_SEPARATOR;
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (String value : values) {
|
||||
if (!first) {
|
||||
sb.append(separators);
|
||||
}
|
||||
if (customQuote == ' ') {
|
||||
sb.append(followCVSformat(value));
|
||||
} else {
|
||||
sb.append(customQuote).append(followCVSformat(value)).append(customQuote);
|
||||
}
|
||||
|
||||
first = false;
|
||||
}
|
||||
sb.append("\n");
|
||||
w.append(sb.toString());
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
package edu.lu.uni.serval.FixPatternParser.violations;
|
||||
|
||||
/**
|
||||
* Created by anilkoyuncu on 19/03/2018.
|
||||
*/
|
||||
import java.io.RandomAccessFile;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
||||
import sun.nio.ch.FileChannelImpl;
|
||||
import sun.misc.Unsafe;
|
||||
|
||||
@SuppressWarnings("restriction")
|
||||
public class MMapper {
|
||||
|
||||
private static final Unsafe unsafe;
|
||||
private static final Method mmap;
|
||||
private static final Method unmmap;
|
||||
private static final int BYTE_ARRAY_OFFSET;
|
||||
|
||||
private long addr, size;
|
||||
private final String loc;
|
||||
|
||||
static {
|
||||
try {
|
||||
Field singleoneInstanceField = Unsafe.class.getDeclaredField("theUnsafe");
|
||||
singleoneInstanceField.setAccessible(true);
|
||||
unsafe = (Unsafe) singleoneInstanceField.get(null);
|
||||
|
||||
mmap = getMethod(FileChannelImpl.class, "map0", int.class, long.class, long.class);
|
||||
unmmap = getMethod(FileChannelImpl.class, "unmap0", long.class, long.class);
|
||||
|
||||
BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
|
||||
} catch (Exception e){
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
//Bundle reflection calls to get access to the given method
|
||||
private static Method getMethod(Class<?> cls, String name, Class<?>... params) throws Exception {
|
||||
Method m = cls.getDeclaredMethod(name, params);
|
||||
m.setAccessible(true);
|
||||
return m;
|
||||
}
|
||||
|
||||
//Round to next 4096 bytes
|
||||
private static long roundTo4096(long i) {
|
||||
return (i + 0xfffL) & ~0xfffL;
|
||||
}
|
||||
|
||||
//Given that the location and size have been set, map that location
|
||||
//for the given length and set this.addr to the returned offset
|
||||
private void mapAndSetOffset() throws Exception{
|
||||
final RandomAccessFile backingFile = new RandomAccessFile(this.loc, "rw");
|
||||
backingFile.setLength(this.size);
|
||||
|
||||
final FileChannel ch = backingFile.getChannel();
|
||||
this.addr = (long) mmap.invoke(ch, 1, 0L, this.size);
|
||||
|
||||
ch.close();
|
||||
backingFile.close();
|
||||
}
|
||||
|
||||
public MMapper(final String loc, long len) throws Exception {
|
||||
this.loc = loc;
|
||||
this.size = roundTo4096(len);
|
||||
mapAndSetOffset();
|
||||
}
|
||||
|
||||
//Callers should synchronize to avoid calls in the middle of this, but
|
||||
//it is undesirable to synchronize w/ all access methods.
|
||||
public void remap(long nLen) throws Exception{
|
||||
unmmap.invoke(null, addr, this.size);
|
||||
this.size = roundTo4096(nLen);
|
||||
mapAndSetOffset();
|
||||
}
|
||||
|
||||
public int getInt(long pos){
|
||||
return unsafe.getInt(pos + addr);
|
||||
}
|
||||
|
||||
public long getLong(long pos){
|
||||
return unsafe.getLong(pos + addr);
|
||||
}
|
||||
|
||||
public void putInt(long pos, int val){
|
||||
unsafe.putInt(pos + addr, val);
|
||||
}
|
||||
|
||||
public void putLong(long pos, long val){
|
||||
unsafe.putLong(pos + addr, val);
|
||||
}
|
||||
|
||||
//May want to have offset & length within data as well, for both of these
|
||||
public void getBytes(long pos, byte[] data){
|
||||
unsafe.copyMemory(null, pos + addr, data, BYTE_ARRAY_OFFSET, data.length);
|
||||
}
|
||||
|
||||
public void setBytes(long pos, byte[] data){
|
||||
unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, null, pos + addr, data.length);
|
||||
}
|
||||
}
|
||||
+101
-93
@@ -15,12 +15,17 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
/**
|
||||
* Created by anilkoyuncu on 19/03/2018.
|
||||
@@ -33,46 +38,23 @@ public class MultiThreadTreeLoader {
|
||||
|
||||
String inputPath;
|
||||
String outputPath;
|
||||
if(args.length > 0){
|
||||
if (args.length > 0) {
|
||||
inputPath = args[0];
|
||||
outputPath = args[1];
|
||||
}else{
|
||||
} else {
|
||||
inputPath = "/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2/";
|
||||
outputPath = "/Users/anilkoyuncu/bugStudy/dataset/";
|
||||
}
|
||||
|
||||
|
||||
|
||||
calculatePairs(inputPath,outputPath);
|
||||
processMessages(outputPath );
|
||||
calculatePairs(inputPath, outputPath);
|
||||
processMessages(inputPath,outputPath);
|
||||
|
||||
|
||||
|
||||
// List<Message> loaded = null;
|
||||
// try {
|
||||
// FileInputStream fi = new FileInputStream(new File(outputPath + "messageFile"));
|
||||
// ObjectInputStream oi = new ObjectInputStream(fi);
|
||||
// loaded = (List<Message>) oi.readObject();
|
||||
// oi.close();
|
||||
// fi.close();
|
||||
//
|
||||
//
|
||||
// } catch (FileNotFoundException e) {
|
||||
// System.out.println("File not found");
|
||||
// } catch (IOException e) {
|
||||
// System.out.println("Error initializing stream");
|
||||
// } catch (ClassNotFoundException e) {
|
||||
// // TODO Auto-generated catch block
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
//
|
||||
//
|
||||
// log.info(String.valueOf(msgFiles.size()));
|
||||
// log.info(String.valueOf(loaded.size()));
|
||||
// msgFiles.parallelStream()
|
||||
// .forEach(m -> coreLoop(m,outputPath));
|
||||
}
|
||||
|
||||
public static void calculatePairs(String inputPath,String outputPath){
|
||||
public static void calculatePairs(String inputPath, String outputPath) {
|
||||
File folder = new File(inputPath);
|
||||
File[] listOfFiles = folder.listFiles();
|
||||
Stream<File> stream = Arrays.stream(listOfFiles);
|
||||
@@ -92,11 +74,11 @@ public class MultiThreadTreeLoader {
|
||||
}
|
||||
System.out.println("a");
|
||||
// compareAll(fileToCompare);
|
||||
readMessageFiles(fileToCompare,outputPath);
|
||||
readMessageFiles(fileToCompare, outputPath);
|
||||
}
|
||||
|
||||
public static void processMessages(String outputPath){
|
||||
File folder = new File(outputPath+ "dumps/");
|
||||
public static void processMessages(String inputPath, String outputPath) {
|
||||
File folder = new File(outputPath + "pairs/");
|
||||
File[] listOfFiles = folder.listFiles();
|
||||
Stream<File> stream = Arrays.stream(listOfFiles);
|
||||
List<File> pjs = stream
|
||||
@@ -104,7 +86,7 @@ public class MultiThreadTreeLoader {
|
||||
.collect(Collectors.toList());
|
||||
FileHelper.createDirectory(outputPath + "comparison/");
|
||||
pjs.parallelStream()
|
||||
.forEach(m -> coreLoop(m,outputPath));
|
||||
.forEach(m -> coreLoop(m, outputPath,inputPath));
|
||||
}
|
||||
|
||||
public static ITree getSimpliedTree(String fn) {
|
||||
@@ -137,62 +119,68 @@ public class MultiThreadTreeLoader {
|
||||
|
||||
}
|
||||
|
||||
private static void coreLoop(File mes,String outputPath){
|
||||
private static void coreLoop(File mes, String outputPath,String inputPath) {
|
||||
try {
|
||||
FileInputStream fi = new FileInputStream(mes);
|
||||
ObjectInputStream oi = new ObjectInputStream(fi);
|
||||
Message loaded = (Message) oi.readObject();
|
||||
oi.close();
|
||||
fi.close();
|
||||
Pair<Integer, String> first = loaded.first;
|
||||
Pair<Integer, String> second = loaded.second;
|
||||
int i = first.getKey();
|
||||
int j = second.getKey();
|
||||
String firstValue = first.getValue();
|
||||
String secondValue = second.getValue();
|
||||
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath +"comparison/" + "output_"+String.valueOf(i)+"_"+String.valueOf(j)+".txt", true));
|
||||
ITree oldTree = getSimpliedTree(firstValue);
|
||||
log.info("Starting in coreLoop");
|
||||
|
||||
ITree newTree = getSimpliedTree(secondValue);
|
||||
BufferedReader br = null;
|
||||
String sCurrentLine = null;
|
||||
BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath + "comparison/" + "output_" + mes.getName()));
|
||||
|
||||
Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree);
|
||||
m.match();
|
||||
br = new BufferedReader(
|
||||
new FileReader(mes));
|
||||
while ((sCurrentLine = br.readLine()) != null) {
|
||||
String currentLine = sCurrentLine;
|
||||
String[] split = currentLine.split("\t");
|
||||
String i = split[0];
|
||||
String j = split[1];
|
||||
String firstValue = split[2];
|
||||
String secondValue = split[3];
|
||||
|
||||
ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings());
|
||||
ag.generate();
|
||||
List<Action> actions = ag.getActions();
|
||||
writer.write(String.valueOf(i));
|
||||
writer.write("\t");
|
||||
writer.write(String.valueOf(j));
|
||||
writer.write("\t");
|
||||
firstValue = inputPath + firstValue;
|
||||
secondValue = inputPath + secondValue;
|
||||
|
||||
writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.valueOf(actions.size()));
|
||||
writer.write("\t");
|
||||
writer.write(firstValue);
|
||||
writer.write("\t");
|
||||
writer.write(secondValue);
|
||||
writer.write("\n");
|
||||
ITree oldTree = getSimpliedTree(firstValue);
|
||||
|
||||
ITree newTree = getSimpliedTree(secondValue);
|
||||
|
||||
Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree);
|
||||
m.match();
|
||||
|
||||
ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings());
|
||||
ag.generate();
|
||||
List<Action> actions = ag.getActions();
|
||||
writer.write(String.valueOf(i));
|
||||
writer.write("\t");
|
||||
writer.write(String.valueOf(j));
|
||||
writer.write("\t");
|
||||
|
||||
writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree)));
|
||||
writer.write("\t");
|
||||
writer.write(String.valueOf(actions.size()));
|
||||
writer.write("\t");
|
||||
writer.write(firstValue);
|
||||
writer.write("\t");
|
||||
writer.write(secondValue);
|
||||
writer.write("\n");
|
||||
|
||||
|
||||
}
|
||||
writer.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
System.out.println("File not found");
|
||||
} catch (IOException e) {
|
||||
System.out.println("Error initializing stream");
|
||||
|
||||
} catch (ClassNotFoundException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void readMessageFiles(List<File> folders,String outputPath) {
|
||||
private static void readMessageFiles(List<File> folders, String outputPath) {
|
||||
|
||||
List<String> treesFileNames = new ArrayList<>();
|
||||
|
||||
@@ -201,37 +189,57 @@ public class MultiThreadTreeLoader {
|
||||
|
||||
treesFileNames.add(target.toString());
|
||||
}
|
||||
FileHelper.createDirectory(outputPath + "dumps/");
|
||||
// FileHelper.createDirectory(outputPath + "dumps/");
|
||||
log.info("Calculating pairs");
|
||||
// treesFileNames = treesFileNames.subList(0,10);
|
||||
// treesFileNames = treesFileNames.subList(0,100);
|
||||
byte [] buf = new byte[0];
|
||||
String line = null;
|
||||
try {
|
||||
|
||||
FileChannel rwChannel = new RandomAccessFile(outputPath +"textfile.txt", "rw").getChannel();
|
||||
ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE);
|
||||
int fileCounter = 0;
|
||||
|
||||
|
||||
for (int i = 0; i < treesFileNames.size(); i++) {
|
||||
for (int j = i + 1; j < treesFileNames.size(); j++) {
|
||||
|
||||
|
||||
|
||||
line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + treesFileNames.get(i).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","") + "\t" + treesFileNames.get(j).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","")+"\n";
|
||||
buf = line.getBytes();
|
||||
if(wrBuf.remaining() > 500) {
|
||||
wrBuf.put(buf);
|
||||
}else{
|
||||
fileCounter++;
|
||||
rwChannel = new RandomAccessFile(outputPath +"textfile"+String.valueOf(fileCounter)+".txt", "rw").getChannel();
|
||||
wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
for (int i = 0; i < treesFileNames.size(); i++) {
|
||||
for (int j = i + 1; j < treesFileNames.size(); j++) {
|
||||
Message msgFile = new Message(i, treesFileNames.get(i), j, treesFileNames.get(j));
|
||||
// msgFiles.add(msgFile);
|
||||
|
||||
FileOutputStream f = null;
|
||||
try {
|
||||
|
||||
|
||||
f = new FileOutputStream(new File(outputPath + "dumps/" + "messageFile_"+String.valueOf(i)+"_"+String.valueOf(j)));
|
||||
ObjectOutputStream o = new ObjectOutputStream(f);
|
||||
o.writeObject(msgFile);
|
||||
|
||||
o.close();
|
||||
f.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
rwChannel.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}catch (java.nio.BufferOverflowException e) {
|
||||
log.error(line);
|
||||
log.error(String.valueOf(buf.length));
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
log.info("Done pairs");
|
||||
// return msgFiles;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// return msgFiles;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user