From 8d21175d054152b02c245951226470837a3640e9 Mon Sep 17 00:00:00 2001 From: Anil Koyuncu Date: Mon, 19 Mar 2018 16:07:19 +0100 Subject: [PATCH] memory friendly --- .../FixPatternParser/violations/CSVUtils.java | 62 ++++++ .../FixPatternParser/violations/MMapper.java | 103 ++++++++++ .../violations/MultiThreadTreeLoader.java | 194 +++++++++--------- 3 files changed, 266 insertions(+), 93 deletions(-) create mode 100644 src/main/java/edu/lu/uni/serval/FixPatternParser/violations/CSVUtils.java create mode 100644 src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MMapper.java diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/CSVUtils.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/CSVUtils.java new file mode 100644 index 0000000..1c5981b --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/CSVUtils.java @@ -0,0 +1,62 @@ +package edu.lu.uni.serval.FixPatternParser.violations; + +import java.io.IOException; +import java.io.Writer; +import java.util.List; + +/** + * Created by anilkoyuncu on 19/03/2018. + */ +public class CSVUtils { + + private static final char DEFAULT_SEPARATOR = ','; + + public static void writeLine(Writer w, List values) throws IOException { + writeLine(w, values, DEFAULT_SEPARATOR, ' '); + } + + public static void writeLine(Writer w, List values, char separators) throws IOException { + writeLine(w, values, separators, ' '); + } + + //https://tools.ietf.org/html/rfc4180 + private static String followCVSformat(String value) { + + String result = value; + if (result.contains("\"")) { + result = result.replace("\"", "\"\""); + } + return result; + + } + + public static void writeLine(Writer w, List values, char separators, char customQuote) throws IOException { + + boolean first = true; + + //default customQuote is empty + + if (separators == ' ') { + separators = DEFAULT_SEPARATOR; + } + + StringBuilder sb = new StringBuilder(); + for (String value : values) { + if (!first) { + sb.append(separators); + } + if (customQuote == ' ') { + sb.append(followCVSformat(value)); + } else { + sb.append(customQuote).append(followCVSformat(value)).append(customQuote); + } + + first = false; + } + sb.append("\n"); + w.append(sb.toString()); + + + } + +} diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MMapper.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MMapper.java new file mode 100644 index 0000000..61b755c --- /dev/null +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MMapper.java @@ -0,0 +1,103 @@ +package edu.lu.uni.serval.FixPatternParser.violations; + +/** + * Created by anilkoyuncu on 19/03/2018. + */ +import java.io.RandomAccessFile; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.nio.channels.FileChannel; + +import sun.nio.ch.FileChannelImpl; +import sun.misc.Unsafe; + +@SuppressWarnings("restriction") +public class MMapper { + + private static final Unsafe unsafe; + private static final Method mmap; + private static final Method unmmap; + private static final int BYTE_ARRAY_OFFSET; + + private long addr, size; + private final String loc; + + static { + try { + Field singleoneInstanceField = Unsafe.class.getDeclaredField("theUnsafe"); + singleoneInstanceField.setAccessible(true); + unsafe = (Unsafe) singleoneInstanceField.get(null); + + mmap = getMethod(FileChannelImpl.class, "map0", int.class, long.class, long.class); + unmmap = getMethod(FileChannelImpl.class, "unmap0", long.class, long.class); + + BYTE_ARRAY_OFFSET = unsafe.arrayBaseOffset(byte[].class); + } catch (Exception e){ + throw new RuntimeException(e); + } + } + + //Bundle reflection calls to get access to the given method + private static Method getMethod(Class cls, String name, Class... params) throws Exception { + Method m = cls.getDeclaredMethod(name, params); + m.setAccessible(true); + return m; + } + + //Round to next 4096 bytes + private static long roundTo4096(long i) { + return (i + 0xfffL) & ~0xfffL; + } + + //Given that the location and size have been set, map that location + //for the given length and set this.addr to the returned offset + private void mapAndSetOffset() throws Exception{ + final RandomAccessFile backingFile = new RandomAccessFile(this.loc, "rw"); + backingFile.setLength(this.size); + + final FileChannel ch = backingFile.getChannel(); + this.addr = (long) mmap.invoke(ch, 1, 0L, this.size); + + ch.close(); + backingFile.close(); + } + + public MMapper(final String loc, long len) throws Exception { + this.loc = loc; + this.size = roundTo4096(len); + mapAndSetOffset(); + } + + //Callers should synchronize to avoid calls in the middle of this, but + //it is undesirable to synchronize w/ all access methods. + public void remap(long nLen) throws Exception{ + unmmap.invoke(null, addr, this.size); + this.size = roundTo4096(nLen); + mapAndSetOffset(); + } + + public int getInt(long pos){ + return unsafe.getInt(pos + addr); + } + + public long getLong(long pos){ + return unsafe.getLong(pos + addr); + } + + public void putInt(long pos, int val){ + unsafe.putInt(pos + addr, val); + } + + public void putLong(long pos, long val){ + unsafe.putLong(pos + addr, val); + } + + //May want to have offset & length within data as well, for both of these + public void getBytes(long pos, byte[] data){ + unsafe.copyMemory(null, pos + addr, data, BYTE_ARRAY_OFFSET, data.length); + } + + public void setBytes(long pos, byte[] data){ + unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, null, pos + addr, data.length); + } +} \ No newline at end of file diff --git a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoader.java b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoader.java index d8f4eef..061d92e 100644 --- a/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoader.java +++ b/src/main/java/edu/lu/uni/serval/FixPatternParser/violations/MultiThreadTreeLoader.java @@ -15,12 +15,17 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.zip.Deflater; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; /** * Created by anilkoyuncu on 19/03/2018. @@ -33,46 +38,23 @@ public class MultiThreadTreeLoader { String inputPath; String outputPath; - if(args.length > 0){ + if (args.length > 0) { inputPath = args[0]; outputPath = args[1]; - }else{ + } else { inputPath = "/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2/"; outputPath = "/Users/anilkoyuncu/bugStudy/dataset/"; } - - calculatePairs(inputPath,outputPath); - processMessages(outputPath ); + calculatePairs(inputPath, outputPath); + processMessages(inputPath,outputPath); + -// List loaded = null; -// try { -// FileInputStream fi = new FileInputStream(new File(outputPath + "messageFile")); -// ObjectInputStream oi = new ObjectInputStream(fi); -// loaded = (List) oi.readObject(); -// oi.close(); -// fi.close(); -// -// -// } catch (FileNotFoundException e) { -// System.out.println("File not found"); -// } catch (IOException e) { -// System.out.println("Error initializing stream"); -// } catch (ClassNotFoundException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } -// -// -// log.info(String.valueOf(msgFiles.size())); -// log.info(String.valueOf(loaded.size())); -// msgFiles.parallelStream() -// .forEach(m -> coreLoop(m,outputPath)); } - public static void calculatePairs(String inputPath,String outputPath){ + public static void calculatePairs(String inputPath, String outputPath) { File folder = new File(inputPath); File[] listOfFiles = folder.listFiles(); Stream stream = Arrays.stream(listOfFiles); @@ -92,11 +74,11 @@ public class MultiThreadTreeLoader { } System.out.println("a"); // compareAll(fileToCompare); - readMessageFiles(fileToCompare,outputPath); + readMessageFiles(fileToCompare, outputPath); } - public static void processMessages(String outputPath){ - File folder = new File(outputPath+ "dumps/"); + public static void processMessages(String inputPath, String outputPath) { + File folder = new File(outputPath + "pairs/"); File[] listOfFiles = folder.listFiles(); Stream stream = Arrays.stream(listOfFiles); List pjs = stream @@ -104,7 +86,7 @@ public class MultiThreadTreeLoader { .collect(Collectors.toList()); FileHelper.createDirectory(outputPath + "comparison/"); pjs.parallelStream() - .forEach(m -> coreLoop(m,outputPath)); + .forEach(m -> coreLoop(m, outputPath,inputPath)); } public static ITree getSimpliedTree(String fn) { @@ -137,62 +119,68 @@ public class MultiThreadTreeLoader { } - private static void coreLoop(File mes,String outputPath){ + private static void coreLoop(File mes, String outputPath,String inputPath) { try { - FileInputStream fi = new FileInputStream(mes); - ObjectInputStream oi = new ObjectInputStream(fi); - Message loaded = (Message) oi.readObject(); - oi.close(); - fi.close(); - Pair first = loaded.first; - Pair second = loaded.second; - int i = first.getKey(); - int j = second.getKey(); - String firstValue = first.getValue(); - String secondValue = second.getValue(); - BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath +"comparison/" + "output_"+String.valueOf(i)+"_"+String.valueOf(j)+".txt", true)); - ITree oldTree = getSimpliedTree(firstValue); + log.info("Starting in coreLoop"); - ITree newTree = getSimpliedTree(secondValue); + BufferedReader br = null; + String sCurrentLine = null; + BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath + "comparison/" + "output_" + mes.getName())); - Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree); - m.match(); + br = new BufferedReader( + new FileReader(mes)); + while ((sCurrentLine = br.readLine()) != null) { + String currentLine = sCurrentLine; + String[] split = currentLine.split("\t"); + String i = split[0]; + String j = split[1]; + String firstValue = split[2]; + String secondValue = split[3]; - ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings()); - ag.generate(); - List actions = ag.getActions(); - writer.write(String.valueOf(i)); - writer.write("\t"); - writer.write(String.valueOf(j)); - writer.write("\t"); + firstValue = inputPath + firstValue; + secondValue = inputPath + secondValue; - writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree))); - writer.write("\t"); - writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree))); - writer.write("\t"); - writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree))); - writer.write("\t"); - writer.write(String.valueOf(actions.size())); - writer.write("\t"); - writer.write(firstValue); - writer.write("\t"); - writer.write(secondValue); - writer.write("\n"); + ITree oldTree = getSimpliedTree(firstValue); + ITree newTree = getSimpliedTree(secondValue); + + Matcher m = Matchers.getInstance().getMatcher(oldTree, newTree); + m.match(); + + ActionGenerator ag = new ActionGenerator(oldTree, newTree, m.getMappings()); + ag.generate(); + List actions = ag.getActions(); + writer.write(String.valueOf(i)); + writer.write("\t"); + writer.write(String.valueOf(j)); + writer.write("\t"); + + writer.write(String.format("%1.2f", m.chawatheSimilarity(oldTree, newTree))); + writer.write("\t"); + writer.write(String.format("%1.2f", m.diceSimilarity(oldTree, newTree))); + writer.write("\t"); + writer.write(String.format("%1.2f", m.jaccardSimilarity(oldTree, newTree))); + writer.write("\t"); + writer.write(String.valueOf(actions.size())); + writer.write("\t"); + writer.write(firstValue); + writer.write("\t"); + writer.write(secondValue); + writer.write("\n"); + + + } writer.close(); } catch (FileNotFoundException e) { System.out.println("File not found"); } catch (IOException e) { System.out.println("Error initializing stream"); - } catch (ClassNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); } } - private static void readMessageFiles(List folders,String outputPath) { + private static void readMessageFiles(List folders, String outputPath) { List treesFileNames = new ArrayList<>(); @@ -201,37 +189,57 @@ public class MultiThreadTreeLoader { treesFileNames.add(target.toString()); } - FileHelper.createDirectory(outputPath + "dumps/"); +// FileHelper.createDirectory(outputPath + "dumps/"); log.info("Calculating pairs"); -// treesFileNames = treesFileNames.subList(0,10); +// treesFileNames = treesFileNames.subList(0,100); + byte [] buf = new byte[0]; + String line = null; + try { + + FileChannel rwChannel = new RandomAccessFile(outputPath +"textfile.txt", "rw").getChannel(); + ByteBuffer wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE); + int fileCounter = 0; + + + for (int i = 0; i < treesFileNames.size(); i++) { + for (int j = i + 1; j < treesFileNames.size(); j++) { + + + + line = String.valueOf(i) +"\t" + String.valueOf(j) + "\t" + treesFileNames.get(i).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","") + "\t" + treesFileNames.get(j).replace("/Users/anilkoyuncu/bugStudy/dataset/GumTreeOutput2","")+"\n"; + buf = line.getBytes(); + if(wrBuf.remaining() > 500) { + wrBuf.put(buf); + }else{ + fileCounter++; + rwChannel = new RandomAccessFile(outputPath +"textfile"+String.valueOf(fileCounter)+".txt", "rw").getChannel(); + wrBuf = rwChannel.map(FileChannel.MapMode.READ_WRITE, 0, Integer.MAX_VALUE); + } - for (int i = 0; i < treesFileNames.size(); i++) { - for (int j = i + 1; j < treesFileNames.size(); j++) { - Message msgFile = new Message(i, treesFileNames.get(i), j, treesFileNames.get(j)); -// msgFiles.add(msgFile); - - FileOutputStream f = null; - try { - f = new FileOutputStream(new File(outputPath + "dumps/" + "messageFile_"+String.valueOf(i)+"_"+String.valueOf(j))); - ObjectOutputStream o = new ObjectOutputStream(f); - o.writeObject(msgFile); - o.close(); - f.close(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); } } - + rwChannel.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + }catch (java.nio.BufferOverflowException e) { + log.error(line); + log.error(String.valueOf(buf.length)); + e.printStackTrace(); } + log.info("Done pairs"); -// return msgFiles; } +// return msgFiles; } + + + +