[O] Dynamic job parsing
This commit is contained in:
+49
-76
@@ -1,4 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shlex
|
||||
import sys
|
||||
import gzip
|
||||
import traceback
|
||||
@@ -24,6 +27,7 @@ import concurrent.futures
|
||||
|
||||
import time
|
||||
import math
|
||||
import yaml
|
||||
from collections import Counter
|
||||
|
||||
import datetime
|
||||
@@ -71,21 +75,9 @@ def setLogg():
|
||||
|
||||
|
||||
def setEnv(args):
|
||||
# env = args.env
|
||||
|
||||
# logging.info('Environment: %s',env)
|
||||
|
||||
os.environ["ROOT_DIR"] = args.root
|
||||
sys.path.append(args.root)
|
||||
|
||||
import yaml
|
||||
# if os.uname().nodename != '':
|
||||
# with open(join(os.environ["ROOT_DIR"], os.uname().nodename + ".config.yml"), 'r') as ymlfile:
|
||||
# cfg = yaml.load(ymlfile)
|
||||
# else:
|
||||
# with open(join(os.environ["ROOT_DIR"], "config.yml"), 'r') as ymlfile:
|
||||
# cfg = yaml.load(ymlfile)
|
||||
|
||||
with open(args.prop, 'r') as ymlfile:
|
||||
cfg = yaml.safe_load(ymlfile)
|
||||
# for section in cfg:
|
||||
@@ -93,70 +85,43 @@ def setEnv(args):
|
||||
# print(cfg['mysql'])
|
||||
# print(cfg['other'])
|
||||
|
||||
# os.environ["JDK7"] = cfg['java']['7home']
|
||||
os.environ["JDK8"] = cfg['java']['8home']
|
||||
os.environ["spinfer"] = cfg['spinfer']['home']
|
||||
os.environ["coccinelle"] = cfg['coccinelle']['home']
|
||||
os.environ["dataset"] = cfg['dataset']['inputPath']
|
||||
os.environ["REPO_PATH"] = cfg['dataset']['repo']
|
||||
os.environ["DATA_PATH"] = cfg['fixminer']['datapath']
|
||||
os.environ["PROJECT_TYPE"] = cfg['fixminer']['projectType']
|
||||
os.environ["PROJECT_LIST"] = cfg['fixminer']['projectList']
|
||||
os.environ["REDIS_PORT"] = str(cfg['fixminer']['portDumps'])
|
||||
env: dict[str, str] = {
|
||||
"ROOT_DIR": args.root,
|
||||
"JDK8": cfg['java']['8home'],
|
||||
"spinfer": cfg['spinfer']['home'],
|
||||
"coccinelle": cfg['coccinelle']['home'],
|
||||
"dataset": cfg['dataset']['inputPath'],
|
||||
"REPO_PATH": cfg['dataset']['repo'],
|
||||
"DATA_PATH": cfg['fixminer']['datapath'],
|
||||
"PROJECT_TYPE": cfg['fixminer']['projectType'],
|
||||
"PROJECT_LIST": cfg['fixminer']['projectList'],
|
||||
"REDIS_PORT": str(cfg['fixminer']['portDumps'])
|
||||
}
|
||||
|
||||
# import yaml
|
||||
#
|
||||
# with open(join(os.environ["ROOT_DIR"],"config.yml"), 'r') as ymlfile:
|
||||
# cfg = yaml.load(ymlfile)
|
||||
#
|
||||
# # for section in cfg:
|
||||
# # print(section)
|
||||
# # print(cfg['mysql'])
|
||||
# # print(cfg['other'])
|
||||
#
|
||||
# os.environ["JDK7"] = cfg['java']['7home']
|
||||
# os.environ["JDK8"] = cfg['java']['8home']
|
||||
# os.environ["D4JHOME"] = cfg['defects4j']['home']
|
||||
env["CODE_PATH"] = join(env["ROOT_DIR"], 'code/')
|
||||
env["COMMIT_DFS"] = join(env["DATA_PATH"], 'commitsDF/')
|
||||
env["SIMI_DIR"] = join(env["DATA_PATH"], 'simi/')
|
||||
env["DTM_PATH"] = join(env["DATA_PATH"], 'dtm/')
|
||||
env["SIMI_SINGLE"] = join(env["DATA_PATH"], 'simiSingle/')
|
||||
env["FEATURE_DIR"] = join(env["DATA_PATH"], 'features/')
|
||||
|
||||
os.environ["CODE_PATH"] = join(os.environ["ROOT_DIR"], 'code/')
|
||||
# os.environ["DATA_PATH"] = join(os.environ["ROOT_DIR"],'data/')
|
||||
# os.environ["REPO_PATH"] = join(os.environ["DATA_PATH"], 'gitrepo/')
|
||||
os.environ["COMMIT_DFS"] = join(os.environ["DATA_PATH"], 'commitsDF/')
|
||||
os.environ["SIMI_DIR"] = join(os.environ["DATA_PATH"], 'simi/')
|
||||
os.environ["DTM_PATH"] = join(os.environ["DATA_PATH"], 'dtm/')
|
||||
os.environ["SIMI_SINGLE"] = join(os.environ["DATA_PATH"], 'simiSingle/')
|
||||
os.environ["FEATURE_DIR"] = join(os.environ["DATA_PATH"], 'features/')
|
||||
env["BUG_POINT"] = join(env["DATA_PATH"], 'bugPoints/')
|
||||
env["DEFECTS4J"] = join(env["DATA_PATH"], 'defects4jdata/')
|
||||
|
||||
os.environ["BUG_POINT"] = join(os.environ["DATA_PATH"], 'bugPoints/')
|
||||
os.environ["DEFECTS4J"] = join(os.environ["DATA_PATH"], 'defects4jdata/')
|
||||
env["BUG_REPORT"] = join(env["DATA_PATH"], 'bugReports/')
|
||||
env["BUG_REPORT_FEATURES"] = join(env["DATA_PATH"], 'bugReportFeatures/')
|
||||
|
||||
os.environ["BUG_REPORT"] = join(os.environ["DATA_PATH"], 'bugReports/')
|
||||
os.environ["BUG_REPORT_FEATURES"] = join(os.environ["DATA_PATH"], 'bugReportFeatures/')
|
||||
# os.environ["PARSED_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesSingle/')
|
||||
# os.environ["PARSED_M_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesMulti/')
|
||||
env["PARSED"] = join(env["DATA_PATH"], 'parsedPj/')
|
||||
env["PARSED_DIR"] = join(env["DATA_PATH"], 'parsedFilesSingle/')
|
||||
env["COMMIT_FOLDER"] = join(env["DATA_PATH"], 'commits/')
|
||||
env["CLASSIFIER_DIR"] = join(env["DATA_PATH"], 'classifiers/')
|
||||
env["PREDICTION_DIR"] = join(env["DATA_PATH"], 'predictions/')
|
||||
env["DATASET_DIR"] = join(env["DATA_PATH"], 'datasets/')
|
||||
env["REMOTE_PATH"] = '/Volumes/Samsung_T5/data'
|
||||
|
||||
os.environ["PARSED"] = join(os.environ["DATA_PATH"], 'parsedPj/')
|
||||
os.environ["PARSED_DIR"] = join(os.environ["DATA_PATH"], 'parsedFilesSingle/')
|
||||
os.environ["COMMIT_FOLDER"] = join(os.environ["DATA_PATH"], 'commits/')
|
||||
os.environ["CLASSIFIER_DIR"] = join(os.environ["DATA_PATH"], 'classifiers/')
|
||||
os.environ["PREDICTION_DIR"] = join(os.environ["DATA_PATH"], 'predictions/')
|
||||
os.environ["DATASET_DIR"] = join(os.environ["DATA_PATH"], 'datasets/')
|
||||
os.environ["REMOTE_PATH"] = '/Volumes/Samsung_T5/data'
|
||||
os.environ.update(env)
|
||||
|
||||
logging.info('ROOT_DIR : %s', os.environ["ROOT_DIR"])
|
||||
logging.info('REPO_PATH : %s', os.environ["REPO_PATH"])
|
||||
logging.info('CODE_PATH : %s', os.environ["CODE_PATH"])
|
||||
logging.info('COMMIT_DFS : %s', os.environ["COMMIT_DFS"])
|
||||
# logging.info('SIMI_DIR : %s', os.environ["SIMI_DIR"])
|
||||
logging.info('BUG_POINT : %s', os.environ["BUG_POINT"])
|
||||
# logging.info('PARSED_DIR : %s', os.environ["PARSED_DIR"])
|
||||
logging.info('COMMIT_FOLDER : %s', os.environ["COMMIT_FOLDER"])
|
||||
# logging.info('DTM_PATH : %s', os.environ["DTM_PATH"])
|
||||
# logging.info('SIMI_SINGLE : %s', os.environ["SIMI_SINGLE"])
|
||||
logging.info('FEATURE_DIR : %s', os.environ["FEATURE_DIR"])
|
||||
logging.info('CLASSIFIER_DIR : %s', os.environ["CLASSIFIER_DIR"])
|
||||
logging.info('PREDICTION_DIR : %s', os.environ["PREDICTION_DIR"])
|
||||
logging.info('DATASET_DIR : %s', os.environ["DATASET_DIR"])
|
||||
return cfg, env
|
||||
|
||||
|
||||
def shellCallTemplate4jar(cmd, enc='utf-8'):
|
||||
@@ -178,21 +143,29 @@ def shellCallTemplate4jar(cmd, enc='utf-8'):
|
||||
|
||||
|
||||
def shellCallTemplate(cmd, enc='utf-8'):
|
||||
out, err = "", ""
|
||||
try:
|
||||
logging.info(cmd)
|
||||
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p:
|
||||
output, errors = p.communicate()
|
||||
out, err = p.communicate()
|
||||
# print(output)
|
||||
if errors:
|
||||
m = re.search('unknown revision or path not in the working tree', errors)
|
||||
if err:
|
||||
m = re.search('unknown revision or path not in the working tree', err)
|
||||
if not m:
|
||||
raise CalledProcessError(errors, '-1')
|
||||
raise CalledProcessError(err, '-1')
|
||||
except CalledProcessError as e:
|
||||
print(f'Error while executing {cmd}\n> {errors}')
|
||||
print(f'Error while executing {cmd}')
|
||||
if out:
|
||||
print(f'STDOUT:\n> {out}')
|
||||
if err:
|
||||
print(f'STDERR:\n> {err}')
|
||||
traceback.print_exc()
|
||||
exit(e.returncode)
|
||||
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return output
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def getChildMem(pid, children):
|
||||
|
||||
+201
-160
@@ -1,3 +1,5 @@
|
||||
import inspect
|
||||
|
||||
from common.commons import *
|
||||
import argparse
|
||||
|
||||
@@ -9,6 +11,7 @@ def parse_args():
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Automatically set root to be the path of the current file
|
||||
args.root = str(Path(__file__).parent.absolute())
|
||||
|
||||
if args.root is None or args.job is None or args.prop is None:
|
||||
@@ -17,172 +20,210 @@ def parse_args():
|
||||
return args
|
||||
|
||||
|
||||
def job_dataset4j():
|
||||
from dataset4j import create_dataset
|
||||
|
||||
create_dataset(cfg)
|
||||
|
||||
|
||||
def job_dataset4c():
|
||||
from otherDatasets import core
|
||||
|
||||
core()
|
||||
|
||||
|
||||
def job_richedit():
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
stopDB(dbDir, REDIS_PORT)
|
||||
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT "
|
||||
output = shellCallTemplate(cmd)
|
||||
logging.info(output)
|
||||
|
||||
|
||||
def job_actionSI():
|
||||
from pairs import actionPairs, createPairs, importAction
|
||||
|
||||
matches = actionPairs()
|
||||
createPairs(matches)
|
||||
importAction()
|
||||
|
||||
|
||||
def job_compare():
|
||||
# cmd = "mvn exec:java -f '/data/fixminer_source/'
|
||||
# -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees'
|
||||
# -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " +
|
||||
# "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
|
||||
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE "
|
||||
output = shellCallTemplate4jar(cmd)
|
||||
logging.info(output)
|
||||
|
||||
|
||||
def job_cluster():
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions')
|
||||
|
||||
|
||||
def job_tokenSI():
|
||||
from pairs import tokenPairs, importTokens
|
||||
|
||||
tokenPairs()
|
||||
importTokens()
|
||||
|
||||
|
||||
def job_clusterTokens():
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens')
|
||||
|
||||
|
||||
def job_codeflaws():
|
||||
from otherDatasets import codeflaws
|
||||
|
||||
codeflaws()
|
||||
|
||||
|
||||
def job_indexClusters():
|
||||
from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates
|
||||
|
||||
runSpinfer()
|
||||
test()
|
||||
divideCoccis()
|
||||
removeDuplicates()
|
||||
|
||||
# from patchManyBugs import patchCore
|
||||
# patchCore()
|
||||
# # from patchManyBugs import patched
|
||||
# # patched()
|
||||
# from patchManyBugs import exportSosPatches
|
||||
# exportSosPatches()
|
||||
# from validate_manybugs import validate
|
||||
#
|
||||
# validate()
|
||||
|
||||
|
||||
def job_patternOperations():
|
||||
from sprinferIndex import patternOperations
|
||||
|
||||
patternOperations()
|
||||
|
||||
|
||||
def job_patchManyBugs():
|
||||
from patchManyBugs import buildAll
|
||||
|
||||
buildAll()
|
||||
|
||||
# from patchManyBugs import patchCore
|
||||
# patchCore()
|
||||
# # from patch_validate import patch_validate_mine
|
||||
# # patch_validate_mine()
|
||||
# from patchManyBugs import patched
|
||||
# patched()
|
||||
# from patchManyBugs import exportSosPatches
|
||||
# exportSosPatches()
|
||||
|
||||
|
||||
def job_patchIntro():
|
||||
from sprinferIndex import patchCoreIntro
|
||||
|
||||
patchCoreIntro()
|
||||
# from sprinferIndex import patched
|
||||
# patched()
|
||||
|
||||
|
||||
def job_validateIntro():
|
||||
# from patch_validate_introClass2 import patch_validate
|
||||
# patch_validate()
|
||||
from test_patched_file import patch_validate
|
||||
|
||||
patch_validate()
|
||||
|
||||
|
||||
def job_checkCorrectIntro():
|
||||
from test_patched_file import checkCorrect
|
||||
|
||||
checkCorrect()
|
||||
|
||||
|
||||
def job_manybugs():
|
||||
from getManybugs import export
|
||||
|
||||
export()
|
||||
|
||||
|
||||
def job_validateMany():
|
||||
from patch_validate import patch_validate
|
||||
|
||||
patch_validate()
|
||||
|
||||
|
||||
def job_introclass():
|
||||
from getIntroClass import export
|
||||
|
||||
export()
|
||||
|
||||
|
||||
def job_stats():
|
||||
from stats import statsNormal
|
||||
|
||||
statsNormal(True)
|
||||
|
||||
|
||||
def job_datasetDefects4J():
|
||||
from defects4JDataset import core
|
||||
|
||||
core()
|
||||
|
||||
|
||||
def job_bug():
|
||||
from bugstats import bStats
|
||||
|
||||
bStats()
|
||||
|
||||
|
||||
def job_defects4j():
|
||||
from stats import defects4jStats
|
||||
|
||||
defects4jStats()
|
||||
|
||||
|
||||
def job_patterns():
|
||||
from stats import exportAbstractPatterns
|
||||
|
||||
exportAbstractPatterns()
|
||||
|
||||
|
||||
def job_pipeline():
|
||||
fs = [job_dataset4j, job_richedit, job_actionSI, job_compare, job_cluster, job_tokenSI,
|
||||
job_compare, job_stats, job_patterns]
|
||||
|
||||
for i, f in enumerate(fs):
|
||||
print(f'Running {i + 1}: {f.__name__}...')
|
||||
f()
|
||||
|
||||
|
||||
JOBS = {name[4:]: f for name, f in inspect.getmembers(sys.modules[__name__])
|
||||
if inspect.isfunction(f) and name.startswith('job_')}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
setLogg()
|
||||
|
||||
setEnv(args)
|
||||
cfg, _ = setEnv(args)
|
||||
|
||||
job = args.job
|
||||
# Parse job
|
||||
job: str = args.job.strip()
|
||||
if job not in JOBS:
|
||||
print(f'Job "{job}" is not supported. Available jobs: {", ".join(JOBS.keys())}')
|
||||
exit(-1)
|
||||
|
||||
from python.settings import *
|
||||
from settings import *
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
print(f'Executing {job}...')
|
||||
|
||||
if job == 'dataset4j':
|
||||
from dataset4j import create_dataset
|
||||
|
||||
create_dataset()
|
||||
|
||||
elif job == 'dataset4c':
|
||||
from otherDatasets import core
|
||||
|
||||
core()
|
||||
|
||||
elif job == 'richedit':
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
stopDB(dbDir, REDIS_PORT)
|
||||
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT "
|
||||
output = shellCallTemplate(cmd)
|
||||
logging.info(output)
|
||||
|
||||
elif job == 'actionSI':
|
||||
from pairs import actionPairs, createPairs, importAction
|
||||
|
||||
matches = actionPairs()
|
||||
createPairs(matches)
|
||||
importAction()
|
||||
|
||||
elif job == 'compare':
|
||||
# cmd = "mvn exec:java -f '/data/fixminer_source/'
|
||||
# -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees'
|
||||
# -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " +
|
||||
# "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
|
||||
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE "
|
||||
output = shellCallTemplate4jar(cmd)
|
||||
logging.info(output)
|
||||
|
||||
elif job == 'cluster':
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions')
|
||||
|
||||
elif job == 'tokenSI':
|
||||
from pairs import tokenPairs, importTokens
|
||||
|
||||
tokenPairs()
|
||||
importTokens()
|
||||
|
||||
elif job == 'clusterTokens':
|
||||
from abstractPatch import cluster
|
||||
|
||||
dbDir = join(DATA_PATH, 'redis')
|
||||
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
|
||||
cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens')
|
||||
|
||||
elif job == 'codeflaws':
|
||||
from otherDatasets import codeflaws
|
||||
|
||||
codeflaws()
|
||||
|
||||
elif job == 'indexClusters':
|
||||
from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates
|
||||
|
||||
runSpinfer()
|
||||
test()
|
||||
divideCoccis()
|
||||
removeDuplicates()
|
||||
|
||||
# from patchManyBugs import patchCore
|
||||
# patchCore()
|
||||
# # from patchManyBugs import patched
|
||||
# # patched()
|
||||
# from patchManyBugs import exportSosPatches
|
||||
# exportSosPatches()
|
||||
# from validate_manybugs import validate
|
||||
#
|
||||
# validate()
|
||||
|
||||
elif job == 'patternOperations':
|
||||
from sprinferIndex import patternOperations
|
||||
|
||||
patternOperations()
|
||||
|
||||
elif job == 'patchManyBugs':
|
||||
from patchManyBugs import buildAll
|
||||
|
||||
buildAll()
|
||||
|
||||
# from patchManyBugs import patchCore
|
||||
# patchCore()
|
||||
# # from patch_validate import patch_validate_mine
|
||||
# # patch_validate_mine()
|
||||
# from patchManyBugs import patched
|
||||
# patched()
|
||||
# from patchManyBugs import exportSosPatches
|
||||
# exportSosPatches()
|
||||
|
||||
elif job == 'patchIntro':
|
||||
from sprinferIndex import patchCoreIntro
|
||||
|
||||
patchCoreIntro()
|
||||
# from sprinferIndex import patched
|
||||
# patched()
|
||||
|
||||
elif job == 'validateIntro':
|
||||
# from patch_validate_introClass2 import patch_validate
|
||||
# patch_validate()
|
||||
from test_patched_file import patch_validate
|
||||
|
||||
patch_validate()
|
||||
|
||||
elif job == 'checkCorrectIntro':
|
||||
from test_patched_file import checkCorrect
|
||||
|
||||
checkCorrect()
|
||||
|
||||
elif job == 'manybugs':
|
||||
from getManybugs import export
|
||||
|
||||
export()
|
||||
|
||||
elif job == 'validateMany':
|
||||
from patch_validate import patch_validate
|
||||
|
||||
patch_validate()
|
||||
|
||||
elif job == 'introclass':
|
||||
from getIntroClass import export
|
||||
|
||||
export()
|
||||
|
||||
elif job == 'stats':
|
||||
from stats import statsNormal
|
||||
|
||||
statsNormal(True)
|
||||
|
||||
elif job == 'datasetDefects4J':
|
||||
from defects4JDataset import core
|
||||
|
||||
core()
|
||||
|
||||
elif job == 'bug':
|
||||
from bugstats import bStats
|
||||
|
||||
bStats()
|
||||
|
||||
elif job == 'defects4j':
|
||||
from stats import defects4jStats
|
||||
|
||||
defects4jStats()
|
||||
|
||||
elif job == 'patterns':
|
||||
from stats import exportAbstractPatterns
|
||||
|
||||
exportAbstractPatterns()
|
||||
|
||||
else:
|
||||
logging.error('Unknown job %s', job)
|
||||
JOBS[job]()
|
||||
|
||||
Reference in New Issue
Block a user