diff --git a/python/dataset4j.py b/python/dataset4j.py index d8912ab..8eb17ad 100644 --- a/python/dataset4j.py +++ b/python/dataset4j.py @@ -53,7 +53,7 @@ def create_dataset(cfg: dict, project_list: str = PROJECT_LIST): :return: """ pj_list: list[str] = project_list.split(',') - + print(f'processed datasets {pj_list}') # Ensure directories exist DATASET_PATH.mkdir(exist_ok=True, parents=True) if not os.path.exists(COMMIT_DFS): @@ -82,12 +82,12 @@ def create_dataset(cfg: dict, project_list: str = PROJECT_LIST): # commits['cocci'] = commits.log.apply(lambda x: True if re.search('cocci|coccinelle', x) else False) # coccis = commits[commits.cocci].commit.values.tolist() fixes = commits[commits.fixes.str.len() != 0].commit.values.tolist() - + print(f'> Obtained {len(fixes)} fixes.') # Filter end dates if configured if 'limitCommitsBeforeDays' in cfg['fixminer']: value = eval(str(cfg['fixminer']['limitCommitsBeforeDays'])) latest_commit = commits.commitDate.iloc[0] - + print(f'> Project {repo} last commit at {latest_commit}') if isinstance(value, datetime.timedelta): end_date = latest_commit - value elif isinstance(value, float) or isinstance(value, int): @@ -97,9 +97,9 @@ def create_dataset(cfg: dict, project_list: str = PROJECT_LIST): f'Only timedelta and int/float (days) are supported.') print(f'> Has {len(commits)} commits before filtering for date < {end_date}') - commits = commits[commits.commitDate < end_date] - + commits = commits[commits.commitDate <= end_date] + print(f'> Has {len(commits)} commits after filtering') commits = commits[commits.commit.isin(fixes)] - print(f'> Has {len(commits)} comments after filtering') + print(f'> Has {len(commits)} fixes after filtering') parallelRun(prepareFiles, commits[['commit', 'files']].values.tolist(), repo) diff --git a/python/main.py b/python/main.py index ff4e83d..85b910e 100644 --- a/python/main.py +++ b/python/main.py @@ -64,8 +64,7 @@ def job_compare(): def job_cluster(): from abstractPatch import cluster - dbDir = join(DATA_PATH, 'redis') - redis_start(dbDir, REDIS_PORT, PROJECT_TYPE) + job_start_redis() cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions') diff --git a/richedit/src/main/java/edu/lu/uni/serval/Launcher.java b/richedit/src/main/java/edu/lu/uni/serval/Launcher.java index b3658c3..58c2831 100755 --- a/richedit/src/main/java/edu/lu/uni/serval/Launcher.java +++ b/richedit/src/main/java/edu/lu/uni/serval/Launcher.java @@ -21,6 +21,7 @@ public class Launcher public static void main(String[] args) throws IOException { + Properties appProps = new Properties(); if (args.length != 2) @@ -66,7 +67,6 @@ public class Launcher gumInput = input; dbDir = redisPath; - try { switch (jobType) diff --git a/richedit/src/main/java/edu/lu/uni/serval/richedit/ediff/EDiffHunkParser.java b/richedit/src/main/java/edu/lu/uni/serval/richedit/ediff/EDiffHunkParser.java index 3292a5b..c03c486 100755 --- a/richedit/src/main/java/edu/lu/uni/serval/richedit/ediff/EDiffHunkParser.java +++ b/richedit/src/main/java/edu/lu/uni/serval/richedit/ediff/EDiffHunkParser.java @@ -18,7 +18,6 @@ import java.util.List; */ public class EDiffHunkParser extends EDiffParser { - private static final Logger logger = LoggerFactory.getLogger(EDiffHunkParser.class); @Override @@ -26,43 +25,22 @@ public class EDiffHunkParser extends EDiffParser { try { - String datasetName = project; - String[] split1 = diffentryFile.getParent().split(datasetName); - String root = split1[0]; + String[] split1 = diffentryFile.getParent().split(project); String pj = split1[1].split("/")[1]; - List actionSets = parseChangedSourceCodeWithGumTree2(prevFile, revFile, srcMLPath, isJava); if (actionSets != null && actionSets.size() != 0) { - - boolean processActionSet = true; - - if (actionSets.size() > Integer.valueOf(hunkLimit)) - { - processActionSet = false; - logger.debug("Skipping {} set size {}", diffentryFile.getName(), hunkLimit); - } - int hunkSet = 0; - if (processActionSet) + if (actionSets.size() <= Integer.parseInt(hunkLimit)) { - for (HierarchicalActionSet actionSet : actionSets) { - // FileOutputStream f = null; - - // try { - String astNodeType = actionSet.getAstNodeType(); - // if (astNodeType.equals(rootType)){ - // - // } actionSet.toString(); int size = actionSet.getActionSize(); - String key = astNodeType + "/" + size + "/" + pj + "_" + diffentryFile.getName() + "_" + hunkSet; ITree targetTree = EDiffHelper.getTargets(actionSet, isJava); @@ -70,51 +48,27 @@ public class EDiffHunkParser extends EDiffParser ITree shapeTree = EDiffHelper.getShapeTree(actionSet, isJava); ITree tokenTree = EDiffHelper.getTokenTree(actionSet, isJava); String tokens = EDiffHelper.getNames2(tokenTree); - // EDiffHelper.getTokenTree(actionSet, parent, children, tc); try (Jedis inner = innerPool.getResource()) { - inner.hset("dump", key, actionSet.toString()); inner.hset(key, "actionTree", actionTree.toStaticHashString()); inner.hset(key, "targetTree", targetTree.toStaticHashString()); inner.hset(key, "shapeTree", shapeTree.toStaticHashString()); inner.hset(key, "tokens", tokens); } - // File f = new File(root+"dumps/"+astNodeType+"/"+String.valueOf(size)+"/"); - // f.mkdirs(); - // f = new File(root+"dumps/"+key); - // - // FileUtils.writeByteArrayToFile(f,EDiffHelper.kryoSerialize(actionSet)); - // FileUtils.writeByteArrayToFile(f,EDiffHelper.commonsSerialize(actionSet)); - // FileUtils.writeByteArrayToFile(f,actionSet.toString().getBytes()); - // FileOutputStream fos = new FileOutputStream(f); - // ObjectOutputStream oos = new ObjectOutputStream(fos); - // oos.writeObject(EDiffHelper.kryoSerialize(actionSet)); - // oos.flush(); - // oos.close(); - // } catch (Exception e) { - // logger.error("error", e); - //// e.printStackTrace(); - // } hunkSet++; } try (Jedis inner = innerPool.getResource()) { inner.hset("diffEntry", pj + "_" + diffentryFile.getName(), "1"); } - } } } catch (Exception e) { logger.error("error", e); - // e.printStackTrace(); } - - } - - } diff --git a/richedit/src/main/java/edu/lu/uni/serval/richedit/jobs/EnhancedASTDiff.java b/richedit/src/main/java/edu/lu/uni/serval/richedit/jobs/EnhancedASTDiff.java index 3bc7552..a365a15 100755 --- a/richedit/src/main/java/edu/lu/uni/serval/richedit/jobs/EnhancedASTDiff.java +++ b/richedit/src/main/java/edu/lu/uni/serval/richedit/jobs/EnhancedASTDiff.java @@ -30,18 +30,16 @@ public class EnhancedASTDiff public static void main(String inputPath, String redisPort, String dbDir, String chunkName, String srcMLPath, String hunkLimit, String[] projectList, String patchSize, String projectType, String srcPath) throws Exception { - String parameters = String.format("\nInput path %s", inputPath); - log.info(parameters); + log.info("Input path {}", inputPath); JedisPool innerPool = new JedisPool(PoolBuilder.getPoolConfig(), "127.0.0.1", Integer.parseInt(redisPort), 20000000); boolean isJava = projectType.equals("java"); + + // Find patches File folder = new File(inputPath); File[] listOfFiles = folder.listFiles(); - if (listOfFiles == null) - { - throw new Exception("No projects found, please verify the projects in the input path"); - } + if (listOfFiles == null) throw new Exception("No projects found, please verify the projects in the input path"); Stream stream = Arrays.stream(listOfFiles); List folders; if (projectList.length == 1 && projectList[0].equals("ALL")) @@ -57,6 +55,7 @@ public class EnhancedASTDiff List> allPredicates = new ArrayList>(); for (String s : projectList) { + log.info("processing {}", s); Predicate predicate = x -> x.getName().endsWith(s); allPredicates.add(predicate); } @@ -68,6 +67,7 @@ public class EnhancedASTDiff .collect(Collectors.toList()); } + // Get message files String project = folder.getName(); List allMessageFiles = new ArrayList<>(); for (File target : folders) @@ -92,15 +92,15 @@ public class EnhancedASTDiff { log.info("{} files already process ...", diffEntry.size()); allMessageFiles = allMessageFiles.stream().filter(f -> !diffEntry.containsKey(f.getProject() + "_" + f.getDiffEntryFile().getName())).collect(Collectors.toList()); - log.info("{} files to process ...", allMessageFiles.size()); + log.info("{} files to process ...",allMessageFiles.size()); } - boolean finalIsJava = isJava; + ProgressBar.wrap(allMessageFiles.stream().parallel(), "Task").forEach(m -> - { - EDiffHunkParser parser = new EDiffHunkParser(); - parser.parseFixPatterns(m.getPrevFile(), m.getRevFile(), m.getDiffEntryFile(), project, innerPool, srcMLPath, hunkLimit, finalIsJava); - } - ); + { + EDiffHunkParser parser = new EDiffHunkParser(); + parser.parseFixPatterns(m.getPrevFile(), m.getRevFile(), m.getDiffEntryFile(), project, innerPool, srcMLPath, hunkLimit, isJava); + } + ); }