diff --git a/.gitignore b/.gitignore index 310ffdd..e93ad44 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,10 @@ gen.jdt/src/main/java/edu/lu/uni/serval/gumtree/GumTreeTester.java *.pyc app.log *.rdb + +# Gradle temporary files +.gradle +gradlew +gradlew.bat +gradle + diff --git a/python/javaDS.py b/python/javaDS.py index 10a6913..de6bb82 100644 --- a/python/javaDS.py +++ b/python/javaDS.py @@ -1,60 +1,55 @@ from common.commons import * from commitCollector import * -DATA_PATH = os.environ["DATA_PATH"] -COMMIT_DFS = os.environ["COMMIT_DFS"] -# DATASET_PATH = '/Users/anilkoyuncu/projects/datasets' -REPO_PATH = os.environ["REPO_PATH"] -DATASET_PATH = os.environ["REPO_PATH"] -DATASET = os.environ["dataset"] -ROOT = os.environ["ROOT_DIR"] -PROJECT_LIST = os.environ["PROJECT_LIST"] +from python.settings import * from otherDatasets import markBugFixingPatches +DATASET_PATH = REPO_PATH +DATASET = os.environ["dataset"] +PROJECT_LIST = os.environ["PROJECT_LIST"] + def createDS(): - pjList = PROJECT_LIST.split(',') if not os.path.exists(DATASET_PATH): os.mkdir(DATASET_PATH) if not os.path.exists(COMMIT_DFS): os.mkdir(COMMIT_DFS) - subjects = pd.read_csv(join(ROOT,'data', 'dataset.csv')) - + subjects = pd.read_csv(join(ROOT_DIR, 'data', 'dataset.csv')) if pjList == ['ALL']: - tuples = subjects[['Repo','GitRepo','Branch']].values.tolist() + tuples = subjects[['Repo', 'GitRepo', 'Branch']].values.tolist() else: # repos = subjects.query("Subject == '{0}'".format(subject)).Repo.tolist() - tuples = subjects[subjects.Repo.isin(pjList)][['Repo', 'GitRepo','Branch']].values.tolist() + tuples = subjects[subjects.Repo.isin(pjList)][['Repo', 'GitRepo', 'Branch']].values.tolist() for t in tuples: - repo,src,branch = t + repo, src, branch = t logging.info(repo) - if isfile(join(COMMIT_DFS,repo+'Fix.pickle')): - commits = load_zipped_pickle(join(COMMIT_DFS,repo+'Fix.pickle')) + if isfile(join(COMMIT_DFS, repo + 'Fix.pickle')): + commits = load_zipped_pickle(join(COMMIT_DFS, repo + 'Fix.pickle')) else: cmd = 'git config --global http.postBuffer 157286400' shellCallTemplate(cmd) cmd = 'git -C ' + DATASET_PATH + ' clone ' + src shellCallTemplate(cmd) logging.info(repo) - getCommitFromRepo(join(REPO_PATH, repo), join(COMMIT_DFS, repo),branch) - rDF = makeDF(join(COMMIT_DFS,repo + '.commits')) + getCommitFromRepo(join(REPO_PATH, repo), join(COMMIT_DFS, repo), branch) + rDF = makeDF(join(COMMIT_DFS, repo + '.commits')) save_zipped_pickle(rDF, join(COMMIT_DFS, repo + ".pickle")) # return rDF commits = rDF - commits = markBugFixingPatches(commits,repo) + commits = markBugFixingPatches(commits, repo) commits = commits[commits.files.apply(lambda x: np.any([i == 'M' for i in x.values()]))] # keep only commits that are changing c files (.c) commits = commits[commits.files.apply(lambda x: np.all([i.endswith('.java') for i in x.keys()]))] - #not a revert commit + # not a revert commit # commits = commits[~commits.log.apply(lambda x: x.startswith('Revert'))] # commits = commits[commits.files.apply(lambda x: len(x) == 1)] # commits['cocci'] = commits.log.apply(lambda x: True if re.search('cocci|coccinelle', x) else False) # coccis = commits[commits.cocci].commit.values.tolist() - fixes = commits[commits.fixes.str.len()!=0].commit.values.tolist() + fixes = commits[commits.fixes.str.len() != 0].commit.values.tolist() # links = commits[commits.links.str.len()!=0].commit.values.tolist() # bugs = set(fixes).union(links).union(coccis) @@ -63,7 +58,7 @@ def createDS(): print(len(commits)) # for s in a.commit.values.tolist(): from otherDatasets import prepareFiles - parallelRun(prepareFiles,commits[['commit','files']].values.tolist(),repo) + parallelRun(prepareFiles, commits[['commit', 'files']].values.tolist(), repo) # # if job == 'clone': # for repo,src in subjects[['Repo','GitRepo']].values.tolist(): @@ -131,4 +126,4 @@ def createDS(): # workList = commits[['commit', 'repo']].values.tolist() # from dataset import prepareFiles # - # parallelRun(prepareFiles, workList) \ No newline at end of file + # parallelRun(prepareFiles, workList) diff --git a/python/main.py b/python/main.py index f7c7651..1ac029d 100644 --- a/python/main.py +++ b/python/main.py @@ -1,70 +1,60 @@ from common.commons import * - - - - +from python.settings import * if __name__ == '__main__': - - try: args = getRun() setLogg() - setEnv(args) job = args.job # job = "cluster" - ROOT_DIR = os.environ["ROOT_DIR"] - REPO_PATH = os.environ["REPO_PATH"] - CODE_PATH = os.environ["CODE_PATH"] - DATA_PATH = os.environ["DATA_PATH"] - COMMIT_DFS = os.environ["COMMIT_DFS"] - BUG_POINT = os.environ["BUG_POINT"] - COMMIT_FOLDER = os.environ["COMMIT_FOLDER"] - FEATURE_DIR = os.environ["FEATURE_DIR"] - DATASET_DIR = os.environ["DATASET_DIR"] - PROJECT_TYPE = os.environ["PROJECT_TYPE"] - REDIS_PORT = os.environ["REDIS_PORT"] - jdk8 = os.environ["JDK8"] pd.options.mode.chained_assignment = None - # subject = 'ALL' # rootType = 'if' print(job) - if job == 'dataset4j': from javaDS import createDS + createDS() - elif job =='dataset4c': + elif job == 'dataset4c': from otherDatasets import core + core() - elif job =='richedit': + elif job == 'richedit': dbDir = join(DATA_PATH, 'redis') stopDB(dbDir, REDIS_PORT) - cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target','FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " RICHEDITSCRIPT " + cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target', + 'FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " RICHEDITSCRIPT " output = shellCallTemplate(cmd) logging.info(output) - elif job =='actionSI': + elif job == 'actionSI': from pairs import actionPairs + matches = actionPairs() from pairs import createPairs + createPairs(matches) from pairs import importAction + importAction() - elif job =='compare': - # cmd = "mvn exec:java -f '/data/fixminer_source/' -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'" - cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target','FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " COMPARE " + elif job == 'compare': + # cmd = "mvn exec:java -f '/data/fixminer_source/' + # -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' + # -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + + # "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'" + cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target', + 'FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " COMPARE " output = shellCallTemplate4jar(cmd) logging.info(output) @@ -74,13 +64,15 @@ if __name__ == '__main__': dbDir = join(DATA_PATH, 'redis') startDB(dbDir, REDIS_PORT, PROJECT_TYPE) - cluster(join(DATA_PATH,'actions'),join(DATA_PATH, 'pairs'),'actions') + cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions') - elif job =='tokenSI': + elif job == 'tokenSI': from pairs import tokenPairs + tokenPairs() from pairs import importTokens + importTokens() elif job == 'clusterTokens': @@ -88,25 +80,29 @@ if __name__ == '__main__': dbDir = join(DATA_PATH, 'redis') startDB(dbDir, REDIS_PORT, PROJECT_TYPE) - cluster( join(DATA_PATH, 'tokens'),join(DATA_PATH, 'pairsToken'),'tokens') + cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens') elif job == 'codeflaws': from otherDatasets import codeflaws + codeflaws() - elif job =='indexClusters': + elif job == 'indexClusters': from sprinferIndex import runSpinfer + runSpinfer() from sprinferIndex import test + test() from sprinferIndex import divideCoccis + divideCoccis() from sprinferIndex import removeDuplicates - removeDuplicates() + removeDuplicates() # from patchManyBugs import patchCore # patchCore() @@ -118,11 +114,13 @@ if __name__ == '__main__': # # validate() - elif job =='patternOperations': + elif job == 'patternOperations': from sprinferIndex import patternOperations + patternOperations() elif job == 'patchManyBugs': from patchManyBugs import buildAll + buildAll() # from patchManyBugs import patchCore @@ -134,53 +132,64 @@ if __name__ == '__main__': # from patchManyBugs import exportSosPatches # exportSosPatches() - elif job =='patchIntro': + elif job == 'patchIntro': from sprinferIndex import patchCoreIntro + patchCoreIntro() # from sprinferIndex import patched # patched() - elif job =='validateIntro': + elif job == 'validateIntro': # from patch_validate_introClass2 import patch_validate # patch_validate() from test_patched_file import patch_validate + patch_validate() - elif job =='checkCorrectIntro': + elif job == 'checkCorrectIntro': from test_patched_file import checkCorrect + checkCorrect() elif job == 'manybugs': from getManybugs import export + export() elif job == 'validateMany': from patch_validate import patch_validate + patch_validate() elif job == 'introclass': from getIntroClass import export + export() - elif job =='stats': + elif job == 'stats': from stats import statsNormal + statsNormal(True) elif job == 'datasetDefects4J': from defects4JDataset import core + core() - elif job =='bug': + elif job == 'bug': from bugstats import bStats + bStats() elif job == 'defects4j': from stats import defects4jStats + defects4jStats() elif job == 'patterns': from stats import exportAbstractPatterns + exportAbstractPatterns() else: - logging.error('Unknown job %s',job) + logging.error('Unknown job %s', job) except Exception as e: logging.error(e) diff --git a/python/settings.py b/python/settings.py new file mode 100644 index 0000000..b822237 --- /dev/null +++ b/python/settings.py @@ -0,0 +1,14 @@ +import os + +ROOT_DIR = os.environ["ROOT_DIR"] +REPO_PATH = os.environ["REPO_PATH"] +CODE_PATH = os.environ["CODE_PATH"] +DATA_PATH = os.environ["DATA_PATH"] +COMMIT_DFS = os.environ["COMMIT_DFS"] +BUG_POINT = os.environ["BUG_POINT"] +COMMIT_FOLDER = os.environ["COMMIT_FOLDER"] +FEATURE_DIR = os.environ["FEATURE_DIR"] +DATASET_DIR = os.environ["DATASET_DIR"] +PROJECT_TYPE = os.environ["PROJECT_TYPE"] +REDIS_PORT = os.environ["REDIS_PORT"] +jdk8 = os.environ["JDK8"]