memory friendly

This commit is contained in:
Anil Koyuncu
2018-03-19 16:07:19 +01:00
parent 9df0b6c745
commit 8d21175d05
3 changed files with 266 additions and 93 deletions
@@ -0,0 +1,62 @@
package edu.lu.uni.serval.FixPatternParser.violations;
import java.io.IOException;
import java.io.Writer;
import java.util.List;
/**
* Created by anilkoyuncu on 19/03/2018.
*/
public class CSVUtils {
private static final char DEFAULT_SEPARATOR = ',';
public static void writeLine(Writer w, List<String> values) throws IOException {
writeLine(w, values, DEFAULT_SEPARATOR, ' ');
}
public static void writeLine(Writer w, List<String> values, char separators) throws IOException {
writeLine(w, values, separators, ' ');
}
//https://tools.ietf.org/html/rfc4180
private static String followCVSformat(String value) {
String result = value;
if (result.contains("\"")) {
result = result.replace("\"", "\"\"");
}
return result;
}
public static void writeLine(Writer w, List<String> values, char separators, char customQuote) throws IOException {
boolean first = true;
//default customQuote is empty
if (separators == ' ') {
separators = DEFAULT_SEPARATOR;
}
StringBuilder sb = new StringBuilder();
for (String value : values) {
if (!first) {
sb.append(separators);
}
if (customQuote == ' ') {
sb.append(followCVSformat(value));
} else {
sb.append(customQuote).append(followCVSformat(value)).append(customQuote);
}
first = false;
}
sb.append("\n");
w.append(sb.toString());
}
}
@@ -0,0 +1,103 @@
package edu.lu.uni.serval.FixPatternParser.violations;
/**
* Created by anilkoyuncu on 19/03/2018.
*/
import java.io.RandomAccessFile;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.nio.channels.FileChannel;
import sun.nio.ch.FileChannelImpl;
import sun.misc.Unsafe;
@SuppressWarnings("restriction")
public class MMapper {
private static final Unsafe unsafe;
private static final Method mmap;
private static final Method unmmap;
private static final int BYTE_ARRAY_OFFSET;
private long addr, size;
private final String loc;
static {
try {
Field singleoneInstanceField = Unsafe.class.getDeclaredField("theUnsafe");
singleoneInstanceField.setAccessible(true);
unsafe = (Unsafe) singleoneInstanceField.get(null);
mmap = getMethod(FileChannelImpl.class, "map0", int.class, long.class, long.class);
unmmap = getMethod(FileChannelImpl.class, "unmap0", long.class, long.class);
BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class);
} catch (Exception e){
throw new RuntimeException(e);
}
}
//Bundle reflection calls to get access to the given method
private static Method getMethod(Class<?> cls, String name, Class<?>... params) throws Exception {
Method m = cls.getDeclaredMethod(name, params);
m.setAccessible(true);
return m;
}
//Round to next 4096 bytes
private static long roundTo4096(long i) {
return (i + 0xfffL) & ~0xfffL;
}
//Given that the location and size have been set, map that location
//for the given length and set this.addr to the returned offset
private void mapAndSetOffset() throws Exception{
final RandomAccessFile backingFile = new RandomAccessFile(this.loc, "rw");
backingFile.setLength(this.size);
final FileChannel ch = backingFile.getChannel();
this.addr = (long) mmap.invoke(ch, 1, 0L, this.size);
ch.close();
backingFile.close();
}
public MMapper(final String loc, long len) throws Exception {
this.loc = loc;
this.size = roundTo4096(len);
mapAndSetOffset();
}
//Callers should synchronize to avoid calls in the middle of this, but
//it is undesirable to synchronize w/ all access methods.
public void remap(long nLen) throws Exception{
unmmap.invoke(null, addr, this.size);
this.size = roundTo4096(nLen);
mapAndSetOffset();
}
public int getInt(long pos){
return unsafe.getInt(pos + addr);
}
public long getLong(long pos){
return unsafe.getLong(pos + addr);
}
public void putInt(long pos, int val){
unsafe.putInt(pos + addr, val);
}
public void putLong(long pos, long val){
unsafe.putLong(pos + addr, val);
}
//May want to have offset & length within data as well, for both of these
public void getBytes(long pos, byte[] data){
unsafe.copyMemory(null, pos + addr, data, BYTE_ARRAY_OFFSET, data.length);
}
public void setBytes(long pos, byte[] data){
unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, null, pos + addr, data.length);
}
}
@@ -15,12 +15,17 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Created by anilkoyuncu on 19/03/2018.
@@ -33,46 +38,23 @@ public class MultiThreadTreeLoader {
String inputPath;
String outputPath;
if(args.length > 0){
if (args.length > 0) {
inputPath = args[0];
outputPath = args[1];
}else{
} else {
inputPath = "/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2/";
outputPath = "/Users/anilkoyuncu/bugStudy/dataset/";
}
calculatePairs(inputPath,outputPath);
processMessages(outputPath );
calculatePairs(inputPath, outputPath);
processMessages(inputPath,outputPath);
// List<Message> loaded = null;
// try {
// FileInputStream fi = new FileInputStream(new File(outputPath + "messageFile"));
// ObjectInputStream oi = new ObjectInputStream(fi);
// loaded = (List<Message>) oi.readObject();
// oi.close();
// fi.close();
//
//
// } catch (FileNotFoundException e) {
// System.out.println("File not found");
// } catch (IOException e) {
// System.out.println("Error initializing stream");
// } catch (ClassNotFoundException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
//
//
// log.info(String.valueOf(msgFiles.size()));
// log.info(String.valueOf(loaded.size()));
// msgFiles.parallelStream()
// .forEach(m -> coreLoop(m,outputPath));
}
public static void calculatePairs(String inputPath,String outputPath){
public static void calculatePairs(String inputPath, String outputPath) {
File folder = new File(inputPath);
File[] listOfFiles = folder.listFiles();
Stream<File> stream = Arrays.stream(listOfFiles);
@@ -92,11 +74,11 @@ public class MultiThreadTreeLoader {
}
System.out.println("a");
// compareAll(fileToCompare);
readMessageFiles(fileToCompare,outputPath);
readMessageFiles(fileToCompare, outputPath);
}
public static void processMessages(String outputPath){
File folder = new File(outputPath+ "dumps/");
public static void processMessages(String inputPath, String outputPath) {
File folder = new File(outputPath + "pairs/");
File[] listOfFiles = folder.listFiles();
Stream<File> stream = Arrays.stream(listOfFiles);
List<File> pjs = stream
@@ -104,7 +86,7 @@ public class MultiThreadTreeLoader {
.collect(Collectors.toList());
FileHelper.createDirectory(outputPath + "comparison/");
pjs.parallelStream()
.forEach(m -> coreLoop(m,outputPath));
.forEach(m -> coreLoop(m, outputPath,inputPath));
}
public static ITree getSimpliedTree(String fn) {
@@ -137,62 +119,68 @@ public class MultiThreadTreeLoader {
}
private static void coreLoop(File mes,String outputPath){
private static void coreLoop(File mes, String outputPath,String inputPath) {
try {
FileInputStream fi = new FileInputStream(mes);
ObjectInputStream oi = new ObjectInputStream(fi);
Message loaded = (Message) oi.readObject();
oi.close();
fi.close();
Pair<Integer, String> first = loaded.first;
Pair<Integer, String> second = loaded.second;
int i = first.getKey();
int j = second.getKey();
String firstValue = first.getValue();
String secondValue = second.getValue();
BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath +"comparison/" + "output_"+String.valueOf(i)+"_"+String.valueOf(j)+".txt", true));
ITree oldTree = getSimpliedTree(firstValue);
log.info("Starting in coreLoop");
ITree newTree = getSimpliedTree(secondValue);
BufferedReader br = null;
String sCurrentLine = null;
BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath + "comparison/" + "output_" + mes.getName()));
Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree);
m.match();
br = new BufferedReader(
new FileReader(mes));
while ((sCurrentLine = br.readLine()) != null) {
String currentLine = sCurrentLine;
String[] split = currentLine.split("\t");
String i = split[0];
String j = split[1];
String firstValue = split[2];
String secondValue = split[3];
ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings());
ag.generate();
List<Action> actions = ag.getActions();
writer.write(String.valueOf(i));
writer.write("\t");
writer.write(String.valueOf(j));
writer.write("\t");
firstValue = inputPath + firstValue;
secondValue = inputPath + secondValue;
writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.valueOf(actions.size()));
writer.write("\t");
writer.write(firstValue);
writer.write("\t");
writer.write(secondValue);
writer.write("\n");
ITree oldTree = getSimpliedTree(firstValue);
ITree newTree = getSimpliedTree(secondValue);
Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree);
m.match();
ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings());
ag.generate();
List<Action> actions = ag.getActions();
writer.write(String.valueOf(i));
writer.write("\t");
writer.write(String.valueOf(j));
writer.write("\t");
writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree)));
writer.write("\t");
writer.write(String.valueOf(actions.size()));
writer.write("\t");
writer.write(firstValue);
writer.write("\t");
writer.write(secondValue);
writer.write("\n");
}
writer.close();
} catch (FileNotFoundException e) {
System.out.println("File not found");
} catch (IOException e) {
System.out.println("Error initializing stream");
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static void readMessageFiles(List<File> folders,String outputPath) {
private static void readMessageFiles(List<File> folders, String outputPath) {
List<String> treesFileNames = new ArrayList<>();
@@ -201,37 +189,57 @@ public class MultiThreadTreeLoader {
treesFileNames.add(target.toString());
}
FileHelper.createDirectory(outputPath + "dumps/");
// FileHelper.createDirectory(outputPath + "dumps/");
log.info("Calculating pairs");
// treesFileNames = treesFileNames.subList(0,10);
// treesFileNames = treesFileNames.subList(0,100);
byte [] buf = new byte[0];
String line = null;
try {
FileChannel rwChannel = new RandomAccessFile(outputPath +"textfile.txt", "rw").getChannel();
ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE);
int fileCounter = 0;
for (int i = 0; i < treesFileNames.size(); i++) {
for (int j = i + 1; j < treesFileNames.size(); j++) {
line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + treesFileNames.get(i).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","") + "\t" + treesFileNames.get(j).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","")+"\n";
buf = line.getBytes();
if(wrBuf.remaining() > 500) {
wrBuf.put(buf);
}else{
fileCounter++;
rwChannel = new RandomAccessFile(outputPath +"textfile"+String.valueOf(fileCounter)+".txt", "rw").getChannel();
wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE);
}
for (int i = 0; i < treesFileNames.size(); i++) {
for (int j = i + 1; j < treesFileNames.size(); j++) {
Message msgFile = new Message(i, treesFileNames.get(i), j, treesFileNames.get(j));
// msgFiles.add(msgFile);
FileOutputStream f = null;
try {
f = new FileOutputStream(new File(outputPath + "dumps/" + "messageFile_"+String.valueOf(i)+"_"+String.valueOf(j)));
ObjectOutputStream o = new ObjectOutputStream(f);
o.writeObject(msgFile);
o.close();
f.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
rwChannel.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}catch (java.nio.BufferOverflowException e) {
log.error(line);
log.error(String.valueOf(buf.length));
e.printStackTrace();
}
log.info("Done pairs");
// return msgFiles;
}
// return msgFiles;
}