diff --git a/python/__pycache__/abstractPatch.cpython-37.pyc b/python/__pycache__/abstractPatch.cpython-37.pyc deleted file mode 100644 index 61ebda9..0000000 Binary files a/python/__pycache__/abstractPatch.cpython-37.pyc and /dev/null differ diff --git a/python/app.log b/python/app.log deleted file mode 100644 index acfbe1e..0000000 --- a/python/app.log +++ /dev/null @@ -1,13 +0,0 @@ -2020-04-07 17:31:35,222 - 31200 - INFO - commons.py:setEnv - ROOT_DIR : /Users/anilkoyuncu/projects/release/test/fixminer_source/python -2020-04-07 17:31:35,224 - 31200 - INFO - commons.py:setEnv - REPO_PATH : /Users/anilkoyuncu/projects/test/richedit-data/datasets -2020-04-07 17:31:35,224 - 31200 - INFO - commons.py:setEnv - CODE_PATH : /Users/anilkoyuncu/projects/release/test/fixminer_source/python/code/ -2020-04-07 17:31:35,224 - 31200 - INFO - commons.py:setEnv - COMMIT_DFS : /Users/anilkoyuncu/projects/test/richedit-data/commitsDF/ -2020-04-07 17:31:35,224 - 31200 - INFO - commons.py:setEnv - BUG_POINT : /Users/anilkoyuncu/projects/test/richedit-data/bugPoints/ -2020-04-07 17:31:35,224 - 31200 - INFO - commons.py:setEnv - COMMIT_FOLDER : /Users/anilkoyuncu/projects/test/richedit-data/commits/ -2020-04-07 17:31:35,225 - 31200 - INFO - commons.py:setEnv - FEATURE_DIR : /Users/anilkoyuncu/projects/test/richedit-data/features/ -2020-04-07 17:31:35,225 - 31200 - INFO - commons.py:setEnv - CLASSIFIER_DIR : /Users/anilkoyuncu/projects/test/richedit-data/classifiers/ -2020-04-07 17:31:35,225 - 31200 - INFO - commons.py:setEnv - PREDICTION_DIR : /Users/anilkoyuncu/projects/test/richedit-data/predictions/ -2020-04-07 17:31:35,225 - 31200 - INFO - commons.py:setEnv - DATASET_DIR : /Users/anilkoyuncu/projects/test/richedit-data/datasets/ -2020-04-07 17:31:41,346 - 31200 - INFO - commons.py:shellCallTemplate - JAVA_HOME='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' java -jar /Users/anilkoyuncu/projects/release/test/fixminer_source/target/FixPatternMiner-1.0.0-jar-with-dependencies.jar /Users/anilkoyuncu/projects/release/test/fixminer_source/src/main/resources/config.yml RICHEDITSCRIPT -2020-04-07 17:31:41,551 - 31200 - ERROR - commons.py:shellCallTemplate - Error: Could not find or load main class edu.lu.uni.serval.richedit.Launcher - diff --git a/python/common/__pycache__/commons.cpython-37.pyc b/python/common/__pycache__/commons.cpython-37.pyc deleted file mode 100644 index a5d87b2..0000000 Binary files a/python/common/__pycache__/commons.cpython-37.pyc and /dev/null differ diff --git a/python/common/commons.py b/python/common/commons.py index 2845156..3a727ef 100644 --- a/python/common/commons.py +++ b/python/common/commons.py @@ -97,6 +97,7 @@ def setEnv(args): os.environ["DATA_PATH"] = cfg['fixminer']['datapath'] os.environ["PROJECT_TYPE"] = cfg['fixminer']['projectType'] os.environ["PROJECT_LIST"] = cfg['fixminer']['projectList'] + os.environ["REDIS_PORT"] = str(cfg['fixminer']['portDumps']) # import yaml # @@ -177,7 +178,22 @@ def getRun(): - +# def shellCallTemplate(cmd,enc='utf-8'): +# process = subprocess.Popen(cmd, +# stdout=subprocess.PIPE,stderr=PIPE, shell=True,encoding=enc, +# universal_newlines=True) +# +# while True: +# output = process.stdout.readline() +# print(output.strip()) +# # Do something else +# return_code = process.poll() +# if return_code is not None: +# print('RETURN CODE', return_code) +# # Process has finished, read rest of the output +# for output in process.stdout.readlines(): +# print(output.strip()) +# break def shellCallTemplate(cmd,enc='utf-8'): try: @@ -510,9 +526,9 @@ def get_class_weights(y): return {cls: round(float(majority)/float(count), 2) for cls, count in counter.items()} -def stopDB(dbDir,portInner,dbName): - cmd = "bash " + dbDir + "/" + "stopServer.sh " + " " + portInner; - +def stopDB(dbDir,portInner): + # cmd = "bash " + dbDir + "/" + "stopServer.sh " + " " + portInner; + cmd = "redis-cli -p " + portInner + " shutdown save" o, e = shellGitCheckout(cmd) logging.info(o) diff --git a/python/data/redis/stopServer.sh b/python/data/redis/stopServer.sh index dd3bcfb..433c9ea 100644 --- a/python/data/redis/stopServer.sh +++ b/python/data/redis/stopServer.sh @@ -1,3 +1,4 @@ #!/bin/bash -source activate redisEnv + +#source activate redisEnv redis-cli -p $1 shutdown save diff --git a/python/javaDS.py b/python/javaDS.py index 89175d9..444e93b 100644 --- a/python/javaDS.py +++ b/python/javaDS.py @@ -1,64 +1,132 @@ -def createDS(subject): - # # if job == 'clone': - from commitCollector import * +from common.commons import * +from commitCollector import * +DATA_PATH = os.environ["DATA_PATH"] +COMMIT_DFS = os.environ["COMMIT_DFS"] +# DATASET_PATH = '/Users/anilkoyuncu/projects/datasets' +REPO_PATH = os.environ["REPO_PATH"] +DATASET_PATH = os.environ["REPO_PATH"] +DATASET = os.environ["dataset"] +ROOT = os.environ["ROOT_DIR"] +PROJECT_LIST = os.environ["PROJECT_LIST"] - caseClone(subject) - # # elif job == 'collect': - from commitCollector import * - - caseCollect(subject) - # # elif job == 'fix': - from filterBugFixingCommits import caseFix - - caseFix(subject) - # - # # elif job =='brDownload': - from bugReportDownloader import caseBRDownload - - caseBRDownload(subject) - # # elif job =='brParser': - from bugReportParser import step1 - - step1(subject) - - # elif job =='dataset': - - if not isfile(join(DATA_PATH, 'singleBR.pickle')): - - brs = load_zipped_pickle(join(DATA_PATH, subject + "bugReportsComplete.pickle")) - - subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) +from otherDatasets import markBugFixingPatches - def getCommit(x): - bid, project = x +def createDS(): - subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) - repo = subjects.query("Subject == '{0}'".format(project)).Repo.tolist()[0] - commits = load_zipped_pickle(join(DATA_PATH, COMMIT_DFS, repo + '.pickle')) - correspondingCommit = commits.query("fix =='{0}'".format(bid)).commit.tolist() - if len(correspondingCommit) == 1: - return [bid, correspondingCommit[0], project] - else: - return None - print('error') + pjList = PROJECT_LIST.split(',') + if not os.path.exists(DATASET_PATH): + os.mkdir(DATASET_PATH) + + subjects = pd.read_csv(join(ROOT,'data', 'dataset.csv')) - wl = brs[['bid', 'project']].values.tolist() - dataL = parallelRunMerge(getCommit, wl) - - commits = pd.DataFrame( - columns=['bid', 'commit', 'project'], - data=list(filter(None.__ne__, dataL))) - - save_zipped_pickle(commits, join(DATA_PATH, 'singleBR.pickle')) + if pjList == ['ALL']: + tuples = subjects[['Repo','GitRepo','Branch']].values.tolist() else: - commits = load_zipped_pickle(join(DATA_PATH, 'singleBR.pickle')) - subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) - logging.info('done matching commits') - commits['repo'] = commits.project.apply(lambda x: subjects.query("Subject == '{0}'".format(x)).Repo.tolist()[0]) + # repos = subjects.query("Subject == '{0}'".format(subject)).Repo.tolist() + tuples = subjects[subjects.Repo.isin(pjList)][['Repo', 'GitRepo','Branch']].values.tolist() - workList = commits[['commit', 'repo']].values.tolist() - from dataset import prepareFiles + for t in tuples: + repo,src,branch = t + logging.info(repo) + if isfile(join(COMMIT_DFS,repo+'Fix.pickle')): + commits = load_zipped_pickle(join(COMMIT_DFS,repo+'Fix.pickle')) + else: + cmd = 'git config --global http.postBuffer 157286400' + shellCallTemplate(cmd) + cmd = 'git -C ' + DATASET_PATH + ' clone ' + src + shellCallTemplate(cmd) + logging.info(repo) + getCommitFromRepo(join(REPO_PATH, repo), join(COMMIT_DFS, repo),branch) + rDF = makeDF(join(COMMIT_DFS,repo + '.commits')) + save_zipped_pickle(rDF, join(COMMIT_DFS, repo + ".pickle")) + # return rDF + commits = rDF + commits = markBugFixingPatches(commits,repo) + commits = commits[commits.files.apply(lambda x: np.any([i == 'M' for i in x.values()]))] + # keep only commits that are changing c files (.c) + commits = commits[commits.files.apply(lambda x: np.all([i.endswith('.java') for i in x.keys()]))] + #not a revert commit + # commits = commits[~commits.log.apply(lambda x: x.startswith('Revert'))] + # commits = commits[commits.files.apply(lambda x: len(x) == 1)] + # commits['cocci'] = commits.log.apply(lambda x: True if re.search('cocci|coccinelle', x) else False) + # coccis = commits[commits.cocci].commit.values.tolist() + fixes = commits[commits.fixes.str.len()!=0].commit.values.tolist() + # links = commits[commits.links.str.len()!=0].commit.values.tolist() - parallelRun(prepareFiles, workList) \ No newline at end of file + # bugs = set(fixes).union(links).union(coccis) + # bugs = set(fixes)#.union(coccis) + commits = commits[commits.commit.isin(fixes)] + print(len(commits)) + # for s in a.commit.values.tolist(): + from otherDatasets import prepareFiles + parallelRun(prepareFiles,commits[['commit','files']].values.tolist(),repo) + + # # if job == 'clone': + # for repo,src in subjects[['Repo','GitRepo']].values.tolist(): + # if(pjList != ['ALL']): + # if repo in pjList: + # print(repo) + # cmd = 'git -C ' + DATASET_PATH + ' clone ' + src + # shellCallTemplate(cmd) + # logging.info(repo) + + # caseClone(subject) + + # caseCollect(subject) + # # elif job == 'fix': + # from filterBugFixingCommits import caseFix + # + # caseFix(subject) + # # + # # # elif job =='brDownload': + # from bugReportDownloader import caseBRDownload + # + # caseBRDownload(subject) + # # # elif job =='brParser': + # from bugReportParser import step1 + # + # step1(subject) + # + # # elif job =='dataset': + # + # if not isfile(join(DATA_PATH, 'singleBR.pickle')): + # + # brs = load_zipped_pickle(join(DATA_PATH, subject + "bugReportsComplete.pickle")) + # + # subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) + # + # + # def getCommit(x): + # bid, project = x + # + # subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) + # repo = subjects.query("Subject == '{0}'".format(project)).Repo.tolist()[0] + # commits = load_zipped_pickle(join(DATA_PATH, COMMIT_DFS, repo + '.pickle')) + # correspondingCommit = commits.query("fix =='{0}'".format(bid)).commit.tolist() + # if len(correspondingCommit) == 1: + # return [bid, correspondingCommit[0], project] + # else: + # return None + # print('error') + # + # + # wl = brs[['bid', 'project']].values.tolist() + # dataL = parallelRunMerge(getCommit, wl) + # + # commits = pd.DataFrame( + # columns=['bid', 'commit', 'project'], + # data=list(filter(None.__ne__, dataL))) + # + # save_zipped_pickle(commits, join(DATA_PATH, 'singleBR.pickle')) + # else: + # commits = load_zipped_pickle(join(DATA_PATH, 'singleBR.pickle')) + # subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv')) + # logging.info('done matching commits') + # commits['repo'] = commits.project.apply(lambda x: subjects.query("Subject == '{0}'".format(x)).Repo.tolist()[0]) + # + # workList = commits[['commit', 'repo']].values.tolist() + # from dataset import prepareFiles + # + # parallelRun(prepareFiles, workList) \ No newline at end of file diff --git a/python/main.py b/python/main.py index 881e57a..20d55e7 100644 --- a/python/main.py +++ b/python/main.py @@ -15,6 +15,7 @@ if __name__ == '__main__': setEnv(args) job = args.job + job = "cluster" ROOT_DIR = os.environ["ROOT_DIR"] REPO_PATH = os.environ["REPO_PATH"] CODE_PATH = os.environ["CODE_PATH"] @@ -25,11 +26,12 @@ if __name__ == '__main__': FEATURE_DIR = os.environ["FEATURE_DIR"] DATASET_DIR = os.environ["DATASET_DIR"] PROJECT_TYPE = os.environ["PROJECT_TYPE"] + REDIS_PORT = os.environ["REDIS_PORT"] jdk8 = os.environ["JDK8"] pd.options.mode.chained_assignment = None - subject = 'ALL' + # subject = 'ALL' rootType = 'if' print(job) @@ -37,7 +39,7 @@ if __name__ == '__main__': if job == 'dataset4j': from javaDS import createDS - createDS(subject) + createDS() # elif job == 'linuxDS': # from linuxDataset import collectBugFixPatches # collectBugFixPatches() @@ -45,6 +47,8 @@ if __name__ == '__main__': from otherDatasets import core core() elif job =='richEditScript': + dbDir = join(DATA_PATH, 'redis') + stopDB(dbDir, REDIS_PORT) cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target','FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " RICHEDITSCRIPT " output = shellCallTemplate(cmd) logging.info(output) @@ -64,7 +68,7 @@ if __name__ == '__main__': from pairs import importShape importShape() - elif job =='compareShapes': + elif job =='compare': # cmd = "mvn exec:java -f '/data/fixminer_source/' -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'" cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(Path(ROOT_DIR).parent, 'target','FixPatternMiner-1.0.0-jar-with-dependencies.jar') + " " + args.prop + " COMPARE " output = shellCallTemplate(cmd) @@ -81,27 +85,27 @@ if __name__ == '__main__': startDB(dbDir, "6399", PROJECT_TYPE) cluster(join(DATA_PATH,'shapes'),join(DATA_PATH, 'pairs'),'shapes',rootType) - elif job =='actionSI': - from pairs import actionPairs - actionPairs(rootType) - - # elif job =='importActionPairs': - from pairs import importAction - importAction(rootType) - - elif job =='compareActions': - # cmd = "JAVA_HOME='"+jdk8+"' java -Xmx8096m -Djava.util.concurrent.ForkJoinPool.common.parallelism=64 -jar "+ join(DATA_PATH,'CompareTrees.jar') + " action " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl1-gumInputALL.rdb" - - cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L2' - output = shellCallTemplate(cmd) - logging.info(output) - - elif job == 'clusterActions': - from abstractPatch import cluster - - dbDir = join(DATA_PATH, 'redis') - startDB(dbDir, "6399", PROJECT_TYPE) - cluster( join(DATA_PATH, 'actions'),join(DATA_PATH, 'pairsAction'),'actions',rootType) + # elif job =='actionSI': + # from pairs import actionPairs + # actionPairs(rootType) + # + # # elif job =='importActionPairs': + # from pairs import importAction + # importAction(rootType) + # + # elif job =='compareActions': + # # cmd = "JAVA_HOME='"+jdk8+"' java -Xmx8096m -Djava.util.concurrent.ForkJoinPool.common.parallelism=64 -jar "+ join(DATA_PATH,'CompareTrees.jar') + " action " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl1-gumInputALL.rdb" + # + # cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L2' + # output = shellCallTemplate(cmd) + # logging.info(output) + # + # elif job == 'clusterActions': + # from abstractPatch import cluster + # + # dbDir = join(DATA_PATH, 'redis') + # startDB(dbDir, "6399", PROJECT_TYPE) + # cluster( join(DATA_PATH, 'actions'),join(DATA_PATH, 'pairsAction'),'actions',rootType) elif job == 'tokenSI': from pairs import tokenPairs diff --git a/python/otherDatasets.py b/python/otherDatasets.py index b9998db..d5b7589 100644 --- a/python/otherDatasets.py +++ b/python/otherDatasets.py @@ -120,8 +120,9 @@ def prepareFiles(t,dsName): nonTest = [] for k,v in files.items(): if v == 'M': - if k.endswith('.c') or k.endswith(u'.h'): - nonTest.append(k) + nonTest.append(k) + # if k.endswith('.c') or k.endswith(u'.h'): + # nonTest.append(k) # nonTest = [f for f in files.keys() if f.endswith('.c') or f.endswith(u'.h')] cmd = 'git -C ' + join(DATASET_PATH,dsName) + ' rev-parse --short=6 ' + shaOld @@ -276,6 +277,8 @@ def core(): if(pjList != ['ALL']): if repo in pjList: print(repo) + cmd = 'git config --global http.postBuffer 157286400' + shellCallTemplate(cmd) cmd = 'git -C ' + DATASET_PATH + ' clone ' + src shellCallTemplate(cmd) logging.info(repo)