diff --git a/python/common/commons.py b/python/common/commons.py index 71e36dd..9cf3c23 100644 --- a/python/common/commons.py +++ b/python/common/commons.py @@ -1,4 +1,7 @@ +from __future__ import annotations + import logging +import shlex import sys import gzip import traceback @@ -24,6 +27,7 @@ import concurrent.futures import time import math +import yaml from collections import Counter import datetime @@ -71,21 +75,9 @@ def setLogg(): def setEnv(args): - # env = args.env - - # logging.info('Environment: %s',env) - os.environ["ROOT_DIR"] = args.root sys.path.append(args.root) - import yaml - # if os.uname().nodename != '': - # with open(join(os.environ["ROOT_DIR"], os.uname().nodename + ".config.yml"), 'r') as ymlfile: - # cfg = yaml.load(ymlfile) - # else: - # with open(join(os.environ["ROOT_DIR"], "config.yml"), 'r') as ymlfile: - # cfg = yaml.load(ymlfile) - with open(args.prop, 'r') as ymlfile: cfg = yaml.safe_load(ymlfile) # for section in cfg: @@ -93,70 +85,43 @@ def setEnv(args): # print(cfg['mysql']) # print(cfg['other']) - # os.environ["JDK7"] = cfg['java']['7home'] - os.environ["JDK8"] = cfg['java']['8home'] - os.environ["spinfer"] = cfg['spinfer']['home'] - os.environ["coccinelle"] = cfg['coccinelle']['home'] - os.environ["dataset"] = cfg['dataset']['inputPath'] - os.environ["REPO_PATH"] = cfg['dataset']['repo'] - os.environ["DATA_PATH"] = cfg['fixminer']['datapath'] - os.environ["PROJECT_TYPE"] = cfg['fixminer']['projectType'] - os.environ["PROJECT_LIST"] = cfg['fixminer']['projectList'] - os.environ["REDIS_PORT"] = str(cfg['fixminer']['portDumps']) + env: dict[str, str] = { + "ROOT_DIR": args.root, + "JDK8": cfg['java']['8home'], + "spinfer": cfg['spinfer']['home'], + "coccinelle": cfg['coccinelle']['home'], + "dataset": cfg['dataset']['inputPath'], + "REPO_PATH": cfg['dataset']['repo'], + "DATA_PATH": cfg['fixminer']['datapath'], + "PROJECT_TYPE": cfg['fixminer']['projectType'], + "PROJECT_LIST": cfg['fixminer']['projectList'], + "REDIS_PORT": str(cfg['fixminer']['portDumps']) + } - # import yaml - # - # with open(join(os.environ["ROOT_DIR"],"config.yml"), 'r') as ymlfile: - # cfg = yaml.load(ymlfile) - # - # # for section in cfg: - # # print(section) - # # print(cfg['mysql']) - # # print(cfg['other']) - # - # os.environ["JDK7"] = cfg['java']['7home'] - # os.environ["JDK8"] = cfg['java']['8home'] - # os.environ["D4JHOME"] = cfg['defects4j']['home'] + env["CODE_PATH"] = join(env["ROOT_DIR"], 'code/') + env["COMMIT_DFS"] = join(env["DATA_PATH"], 'commitsDF/') + env["SIMI_DIR"] = join(env["DATA_PATH"], 'simi/') + env["DTM_PATH"] = join(env["DATA_PATH"], 'dtm/') + env["SIMI_SINGLE"] = join(env["DATA_PATH"], 'simiSingle/') + env["FEATURE_DIR"] = join(env["DATA_PATH"], 'features/') - os.environ["CODE_PATH"] = join(os.environ["ROOT_DIR"], 'code/') - # os.environ["DATA_PATH"] = join(os.environ["ROOT_DIR"],'data/') - # os.environ["REPO_PATH"] = join(os.environ["DATA_PATH"], 'gitrepo/') - os.environ["COMMIT_DFS"] = join(os.environ["DATA_PATH"], 'commitsDF/') - os.environ["SIMI_DIR"] = join(os.environ["DATA_PATH"], 'simi/') - os.environ["DTM_PATH"] = join(os.environ["DATA_PATH"], 'dtm/') - os.environ["SIMI_SINGLE"] = join(os.environ["DATA_PATH"], 'simiSingle/') - os.environ["FEATURE_DIR"] = join(os.environ["DATA_PATH"], 'features/') + env["BUG_POINT"] = join(env["DATA_PATH"], 'bugPoints/') + env["DEFECTS4J"] = join(env["DATA_PATH"], 'defects4jdata/') - os.environ["BUG_POINT"] = join(os.environ["DATA_PATH"], 'bugPoints/') - os.environ["DEFECTS4J"] = join(os.environ["DATA_PATH"], 'defects4jdata/') + env["BUG_REPORT"] = join(env["DATA_PATH"], 'bugReports/') + env["BUG_REPORT_FEATURES"] = join(env["DATA_PATH"], 'bugReportFeatures/') - os.environ["BUG_REPORT"] = join(os.environ["DATA_PATH"], 'bugReports/') - os.environ["BUG_REPORT_FEATURES"] = join(os.environ["DATA_PATH"], 'bugReportFeatures/') - # os.environ["PARSED_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesSingle/') - # os.environ["PARSED_M_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesMulti/') + env["PARSED"] = join(env["DATA_PATH"], 'parsedPj/') + env["PARSED_DIR"] = join(env["DATA_PATH"], 'parsedFilesSingle/') + env["COMMIT_FOLDER"] = join(env["DATA_PATH"], 'commits/') + env["CLASSIFIER_DIR"] = join(env["DATA_PATH"], 'classifiers/') + env["PREDICTION_DIR"] = join(env["DATA_PATH"], 'predictions/') + env["DATASET_DIR"] = join(env["DATA_PATH"], 'datasets/') + env["REMOTE_PATH"] = '/Volumes/Samsung_T5/data' - os.environ["PARSED"] = join(os.environ["DATA_PATH"], 'parsedPj/') - os.environ["PARSED_DIR"] = join(os.environ["DATA_PATH"], 'parsedFilesSingle/') - os.environ["COMMIT_FOLDER"] = join(os.environ["DATA_PATH"], 'commits/') - os.environ["CLASSIFIER_DIR"] = join(os.environ["DATA_PATH"], 'classifiers/') - os.environ["PREDICTION_DIR"] = join(os.environ["DATA_PATH"], 'predictions/') - os.environ["DATASET_DIR"] = join(os.environ["DATA_PATH"], 'datasets/') - os.environ["REMOTE_PATH"] = '/Volumes/Samsung_T5/data' + os.environ.update(env) - logging.info('ROOT_DIR : %s', os.environ["ROOT_DIR"]) - logging.info('REPO_PATH : %s', os.environ["REPO_PATH"]) - logging.info('CODE_PATH : %s', os.environ["CODE_PATH"]) - logging.info('COMMIT_DFS : %s', os.environ["COMMIT_DFS"]) - # logging.info('SIMI_DIR : %s', os.environ["SIMI_DIR"]) - logging.info('BUG_POINT : %s', os.environ["BUG_POINT"]) - # logging.info('PARSED_DIR : %s', os.environ["PARSED_DIR"]) - logging.info('COMMIT_FOLDER : %s', os.environ["COMMIT_FOLDER"]) - # logging.info('DTM_PATH : %s', os.environ["DTM_PATH"]) - # logging.info('SIMI_SINGLE : %s', os.environ["SIMI_SINGLE"]) - logging.info('FEATURE_DIR : %s', os.environ["FEATURE_DIR"]) - logging.info('CLASSIFIER_DIR : %s', os.environ["CLASSIFIER_DIR"]) - logging.info('PREDICTION_DIR : %s', os.environ["PREDICTION_DIR"]) - logging.info('DATASET_DIR : %s', os.environ["DATASET_DIR"]) + return cfg, env def shellCallTemplate4jar(cmd, enc='utf-8'): @@ -178,21 +143,29 @@ def shellCallTemplate4jar(cmd, enc='utf-8'): def shellCallTemplate(cmd, enc='utf-8'): + out, err = "", "" try: logging.info(cmd) with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p: - output, errors = p.communicate() + out, err = p.communicate() # print(output) - if errors: - m = re.search('unknown revision or path not in the working tree', errors) + if err: + m = re.search('unknown revision or path not in the working tree', err) if not m: - raise CalledProcessError(errors, '-1') + raise CalledProcessError(err, '-1') except CalledProcessError as e: - print(f'Error while executing {cmd}\n> {errors}') + print(f'Error while executing {cmd}') + if out: + print(f'STDOUT:\n> {out}') + if err: + print(f'STDERR:\n> {err}') traceback.print_exc() + exit(e.returncode) + except Exception as e: traceback.print_exc() - return output + + return out def getChildMem(pid, children): diff --git a/python/main.py b/python/main.py index 3215104..699ae55 100644 --- a/python/main.py +++ b/python/main.py @@ -1,3 +1,5 @@ +import inspect + from common.commons import * import argparse @@ -9,6 +11,7 @@ def parse_args(): args = parser.parse_args() + # Automatically set root to be the path of the current file args.root = str(Path(__file__).parent.absolute()) if args.root is None or args.job is None or args.prop is None: @@ -17,172 +20,210 @@ def parse_args(): return args +def job_dataset4j(): + from dataset4j import create_dataset + + create_dataset(cfg) + + +def job_dataset4c(): + from otherDatasets import core + + core() + + +def job_richedit(): + dbDir = join(DATA_PATH, 'redis') + stopDB(dbDir, REDIS_PORT) + cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT " + output = shellCallTemplate(cmd) + logging.info(output) + + +def job_actionSI(): + from pairs import actionPairs, createPairs, importAction + + matches = actionPairs() + createPairs(matches) + importAction() + + +def job_compare(): + # cmd = "mvn exec:java -f '/data/fixminer_source/' + # -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' + # -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + + # "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'" + cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE " + output = shellCallTemplate4jar(cmd) + logging.info(output) + + +def job_cluster(): + from abstractPatch import cluster + + dbDir = join(DATA_PATH, 'redis') + startDB(dbDir, REDIS_PORT, PROJECT_TYPE) + cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions') + + +def job_tokenSI(): + from pairs import tokenPairs, importTokens + + tokenPairs() + importTokens() + + +def job_clusterTokens(): + from abstractPatch import cluster + + dbDir = join(DATA_PATH, 'redis') + startDB(dbDir, REDIS_PORT, PROJECT_TYPE) + cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens') + + +def job_codeflaws(): + from otherDatasets import codeflaws + + codeflaws() + + +def job_indexClusters(): + from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates + + runSpinfer() + test() + divideCoccis() + removeDuplicates() + + # from patchManyBugs import patchCore + # patchCore() + # # from patchManyBugs import patched + # # patched() + # from patchManyBugs import exportSosPatches + # exportSosPatches() + # from validate_manybugs import validate + # + # validate() + + +def job_patternOperations(): + from sprinferIndex import patternOperations + + patternOperations() + + +def job_patchManyBugs(): + from patchManyBugs import buildAll + + buildAll() + + # from patchManyBugs import patchCore + # patchCore() + # # from patch_validate import patch_validate_mine + # # patch_validate_mine() + # from patchManyBugs import patched + # patched() + # from patchManyBugs import exportSosPatches + # exportSosPatches() + + +def job_patchIntro(): + from sprinferIndex import patchCoreIntro + + patchCoreIntro() + # from sprinferIndex import patched + # patched() + + +def job_validateIntro(): + # from patch_validate_introClass2 import patch_validate + # patch_validate() + from test_patched_file import patch_validate + + patch_validate() + + +def job_checkCorrectIntro(): + from test_patched_file import checkCorrect + + checkCorrect() + + +def job_manybugs(): + from getManybugs import export + + export() + + +def job_validateMany(): + from patch_validate import patch_validate + + patch_validate() + + +def job_introclass(): + from getIntroClass import export + + export() + + +def job_stats(): + from stats import statsNormal + + statsNormal(True) + + +def job_datasetDefects4J(): + from defects4JDataset import core + + core() + + +def job_bug(): + from bugstats import bStats + + bStats() + + +def job_defects4j(): + from stats import defects4jStats + + defects4jStats() + + +def job_patterns(): + from stats import exportAbstractPatterns + + exportAbstractPatterns() + + +def job_pipeline(): + fs = [job_dataset4j, job_richedit, job_actionSI, job_compare, job_cluster, job_tokenSI, + job_compare, job_stats, job_patterns] + + for i, f in enumerate(fs): + print(f'Running {i + 1}: {f.__name__}...') + f() + + +JOBS = {name[4:]: f for name, f in inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(f) and name.startswith('job_')} + + if __name__ == '__main__': args = parse_args() setLogg() - setEnv(args) + cfg, _ = setEnv(args) - job = args.job + # Parse job + job: str = args.job.strip() + if job not in JOBS: + print(f'Job "{job}" is not supported. Available jobs: {", ".join(JOBS.keys())}') + exit(-1) - from python.settings import * + from settings import * pd.options.mode.chained_assignment = None print(f'Executing {job}...') - - if job == 'dataset4j': - from dataset4j import create_dataset - - create_dataset() - - elif job == 'dataset4c': - from otherDatasets import core - - core() - - elif job == 'richedit': - dbDir = join(DATA_PATH, 'redis') - stopDB(dbDir, REDIS_PORT) - cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT " - output = shellCallTemplate(cmd) - logging.info(output) - - elif job == 'actionSI': - from pairs import actionPairs, createPairs, importAction - - matches = actionPairs() - createPairs(matches) - importAction() - - elif job == 'compare': - # cmd = "mvn exec:java -f '/data/fixminer_source/' - # -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' - # -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + - # "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'" - cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE " - output = shellCallTemplate4jar(cmd) - logging.info(output) - - elif job == 'cluster': - from abstractPatch import cluster - - dbDir = join(DATA_PATH, 'redis') - startDB(dbDir, REDIS_PORT, PROJECT_TYPE) - cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions') - - elif job == 'tokenSI': - from pairs import tokenPairs, importTokens - - tokenPairs() - importTokens() - - elif job == 'clusterTokens': - from abstractPatch import cluster - - dbDir = join(DATA_PATH, 'redis') - startDB(dbDir, REDIS_PORT, PROJECT_TYPE) - cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens') - - elif job == 'codeflaws': - from otherDatasets import codeflaws - - codeflaws() - - elif job == 'indexClusters': - from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates - - runSpinfer() - test() - divideCoccis() - removeDuplicates() - - # from patchManyBugs import patchCore - # patchCore() - # # from patchManyBugs import patched - # # patched() - # from patchManyBugs import exportSosPatches - # exportSosPatches() - # from validate_manybugs import validate - # - # validate() - - elif job == 'patternOperations': - from sprinferIndex import patternOperations - - patternOperations() - - elif job == 'patchManyBugs': - from patchManyBugs import buildAll - - buildAll() - - # from patchManyBugs import patchCore - # patchCore() - # # from patch_validate import patch_validate_mine - # # patch_validate_mine() - # from patchManyBugs import patched - # patched() - # from patchManyBugs import exportSosPatches - # exportSosPatches() - - elif job == 'patchIntro': - from sprinferIndex import patchCoreIntro - - patchCoreIntro() - # from sprinferIndex import patched - # patched() - - elif job == 'validateIntro': - # from patch_validate_introClass2 import patch_validate - # patch_validate() - from test_patched_file import patch_validate - - patch_validate() - - elif job == 'checkCorrectIntro': - from test_patched_file import checkCorrect - - checkCorrect() - - elif job == 'manybugs': - from getManybugs import export - - export() - - elif job == 'validateMany': - from patch_validate import patch_validate - - patch_validate() - - elif job == 'introclass': - from getIntroClass import export - - export() - - elif job == 'stats': - from stats import statsNormal - - statsNormal(True) - - elif job == 'datasetDefects4J': - from defects4JDataset import core - - core() - - elif job == 'bug': - from bugstats import bStats - - bStats() - - elif job == 'defects4j': - from stats import defects4jStats - - defects4jStats() - - elif job == 'patterns': - from stats import exportAbstractPatterns - - exportAbstractPatterns() - - else: - logging.error('Unknown job %s', job) + JOBS[job]()