[O] Dynamic job parsing

This commit is contained in:
Azalea (on HyDEV-Daisy)
2022-05-09 01:50:24 -04:00
parent 3a4a81d4e6
commit 039fe87d29
2 changed files with 250 additions and 236 deletions
+49 -76
View File
@@ -1,4 +1,7 @@
from __future__ import annotations
import logging
import shlex
import sys
import gzip
import traceback
@@ -24,6 +27,7 @@ import concurrent.futures
import time
import math
import yaml
from collections import Counter
import datetime
@@ -71,21 +75,9 @@ def setLogg():
def setEnv(args):
# env = args.env
# logging.info('Environment: %s',env)
os.environ["ROOT_DIR"] = args.root
sys.path.append(args.root)
import yaml
# if os.uname().nodename != '':
# with open(join(os.environ["ROOT_DIR"], os.uname().nodename + ".config.yml"), 'r') as ymlfile:
# cfg = yaml.load(ymlfile)
# else:
# with open(join(os.environ["ROOT_DIR"], "config.yml"), 'r') as ymlfile:
# cfg = yaml.load(ymlfile)
with open(args.prop, 'r') as ymlfile:
cfg = yaml.safe_load(ymlfile)
# for section in cfg:
@@ -93,70 +85,43 @@ def setEnv(args):
# print(cfg['mysql'])
# print(cfg['other'])
# os.environ["JDK7"] = cfg['java']['7home']
os.environ["JDK8"] = cfg['java']['8home']
os.environ["spinfer"] = cfg['spinfer']['home']
os.environ["coccinelle"] = cfg['coccinelle']['home']
os.environ["dataset"] = cfg['dataset']['inputPath']
os.environ["REPO_PATH"] = cfg['dataset']['repo']
os.environ["DATA_PATH"] = cfg['fixminer']['datapath']
os.environ["PROJECT_TYPE"] = cfg['fixminer']['projectType']
os.environ["PROJECT_LIST"] = cfg['fixminer']['projectList']
os.environ["REDIS_PORT"] = str(cfg['fixminer']['portDumps'])
env: dict[str, str] = {
"ROOT_DIR": args.root,
"JDK8": cfg['java']['8home'],
"spinfer": cfg['spinfer']['home'],
"coccinelle": cfg['coccinelle']['home'],
"dataset": cfg['dataset']['inputPath'],
"REPO_PATH": cfg['dataset']['repo'],
"DATA_PATH": cfg['fixminer']['datapath'],
"PROJECT_TYPE": cfg['fixminer']['projectType'],
"PROJECT_LIST": cfg['fixminer']['projectList'],
"REDIS_PORT": str(cfg['fixminer']['portDumps'])
}
# import yaml
#
# with open(join(os.environ["ROOT_DIR"],"config.yml"), 'r') as ymlfile:
# cfg = yaml.load(ymlfile)
#
# # for section in cfg:
# # print(section)
# # print(cfg['mysql'])
# # print(cfg['other'])
#
# os.environ["JDK7"] = cfg['java']['7home']
# os.environ["JDK8"] = cfg['java']['8home']
# os.environ["D4JHOME"] = cfg['defects4j']['home']
env["CODE_PATH"] = join(env["ROOT_DIR"], 'code/')
env["COMMIT_DFS"] = join(env["DATA_PATH"], 'commitsDF/')
env["SIMI_DIR"] = join(env["DATA_PATH"], 'simi/')
env["DTM_PATH"] = join(env["DATA_PATH"], 'dtm/')
env["SIMI_SINGLE"] = join(env["DATA_PATH"], 'simiSingle/')
env["FEATURE_DIR"] = join(env["DATA_PATH"], 'features/')
os.environ["CODE_PATH"] = join(os.environ["ROOT_DIR"], 'code/')
# os.environ["DATA_PATH"] = join(os.environ["ROOT_DIR"],'data/')
# os.environ["REPO_PATH"] = join(os.environ["DATA_PATH"], 'gitrepo/')
os.environ["COMMIT_DFS"] = join(os.environ["DATA_PATH"], 'commitsDF/')
os.environ["SIMI_DIR"] = join(os.environ["DATA_PATH"], 'simi/')
os.environ["DTM_PATH"] = join(os.environ["DATA_PATH"], 'dtm/')
os.environ["SIMI_SINGLE"] = join(os.environ["DATA_PATH"], 'simiSingle/')
os.environ["FEATURE_DIR"] = join(os.environ["DATA_PATH"], 'features/')
env["BUG_POINT"] = join(env["DATA_PATH"], 'bugPoints/')
env["DEFECTS4J"] = join(env["DATA_PATH"], 'defects4jdata/')
os.environ["BUG_POINT"] = join(os.environ["DATA_PATH"], 'bugPoints/')
os.environ["DEFECTS4J"] = join(os.environ["DATA_PATH"], 'defects4jdata/')
env["BUG_REPORT"] = join(env["DATA_PATH"], 'bugReports/')
env["BUG_REPORT_FEATURES"] = join(env["DATA_PATH"], 'bugReportFeatures/')
os.environ["BUG_REPORT"] = join(os.environ["DATA_PATH"], 'bugReports/')
os.environ["BUG_REPORT_FEATURES"] = join(os.environ["DATA_PATH"], 'bugReportFeatures/')
# os.environ["PARSED_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesSingle/')
# os.environ["PARSED_M_DIR"] = join(os.environ["CODE_PATH"], 'parsedFilesMulti/')
env["PARSED"] = join(env["DATA_PATH"], 'parsedPj/')
env["PARSED_DIR"] = join(env["DATA_PATH"], 'parsedFilesSingle/')
env["COMMIT_FOLDER"] = join(env["DATA_PATH"], 'commits/')
env["CLASSIFIER_DIR"] = join(env["DATA_PATH"], 'classifiers/')
env["PREDICTION_DIR"] = join(env["DATA_PATH"], 'predictions/')
env["DATASET_DIR"] = join(env["DATA_PATH"], 'datasets/')
env["REMOTE_PATH"] = '/Volumes/Samsung_T5/data'
os.environ["PARSED"] = join(os.environ["DATA_PATH"], 'parsedPj/')
os.environ["PARSED_DIR"] = join(os.environ["DATA_PATH"], 'parsedFilesSingle/')
os.environ["COMMIT_FOLDER"] = join(os.environ["DATA_PATH"], 'commits/')
os.environ["CLASSIFIER_DIR"] = join(os.environ["DATA_PATH"], 'classifiers/')
os.environ["PREDICTION_DIR"] = join(os.environ["DATA_PATH"], 'predictions/')
os.environ["DATASET_DIR"] = join(os.environ["DATA_PATH"], 'datasets/')
os.environ["REMOTE_PATH"] = '/Volumes/Samsung_T5/data'
os.environ.update(env)
logging.info('ROOT_DIR : %s', os.environ["ROOT_DIR"])
logging.info('REPO_PATH : %s', os.environ["REPO_PATH"])
logging.info('CODE_PATH : %s', os.environ["CODE_PATH"])
logging.info('COMMIT_DFS : %s', os.environ["COMMIT_DFS"])
# logging.info('SIMI_DIR : %s', os.environ["SIMI_DIR"])
logging.info('BUG_POINT : %s', os.environ["BUG_POINT"])
# logging.info('PARSED_DIR : %s', os.environ["PARSED_DIR"])
logging.info('COMMIT_FOLDER : %s', os.environ["COMMIT_FOLDER"])
# logging.info('DTM_PATH : %s', os.environ["DTM_PATH"])
# logging.info('SIMI_SINGLE : %s', os.environ["SIMI_SINGLE"])
logging.info('FEATURE_DIR : %s', os.environ["FEATURE_DIR"])
logging.info('CLASSIFIER_DIR : %s', os.environ["CLASSIFIER_DIR"])
logging.info('PREDICTION_DIR : %s', os.environ["PREDICTION_DIR"])
logging.info('DATASET_DIR : %s', os.environ["DATASET_DIR"])
return cfg, env
def shellCallTemplate4jar(cmd, enc='utf-8'):
@@ -178,21 +143,29 @@ def shellCallTemplate4jar(cmd, enc='utf-8'):
def shellCallTemplate(cmd, enc='utf-8'):
out, err = "", ""
try:
logging.info(cmd)
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p:
output, errors = p.communicate()
out, err = p.communicate()
# print(output)
if errors:
m = re.search('unknown revision or path not in the working tree', errors)
if err:
m = re.search('unknown revision or path not in the working tree', err)
if not m:
raise CalledProcessError(errors, '-1')
raise CalledProcessError(err, '-1')
except CalledProcessError as e:
print(f'Error while executing {cmd}\n> {errors}')
print(f'Error while executing {cmd}')
if out:
print(f'STDOUT:\n> {out}')
if err:
print(f'STDERR:\n> {err}')
traceback.print_exc()
exit(e.returncode)
except Exception as e:
traceback.print_exc()
return output
return out
def getChildMem(pid, children):
+201 -160
View File
@@ -1,3 +1,5 @@
import inspect
from common.commons import *
import argparse
@@ -9,6 +11,7 @@ def parse_args():
args = parser.parse_args()
# Automatically set root to be the path of the current file
args.root = str(Path(__file__).parent.absolute())
if args.root is None or args.job is None or args.prop is None:
@@ -17,172 +20,210 @@ def parse_args():
return args
def job_dataset4j():
from dataset4j import create_dataset
create_dataset(cfg)
def job_dataset4c():
from otherDatasets import core
core()
def job_richedit():
dbDir = join(DATA_PATH, 'redis')
stopDB(dbDir, REDIS_PORT)
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT "
output = shellCallTemplate(cmd)
logging.info(output)
def job_actionSI():
from pairs import actionPairs, createPairs, importAction
matches = actionPairs()
createPairs(matches)
importAction()
def job_compare():
# cmd = "mvn exec:java -f '/data/fixminer_source/'
# -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees'
# -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " +
# "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE "
output = shellCallTemplate4jar(cmd)
logging.info(output)
def job_cluster():
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions')
def job_tokenSI():
from pairs import tokenPairs, importTokens
tokenPairs()
importTokens()
def job_clusterTokens():
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens')
def job_codeflaws():
from otherDatasets import codeflaws
codeflaws()
def job_indexClusters():
from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates
runSpinfer()
test()
divideCoccis()
removeDuplicates()
# from patchManyBugs import patchCore
# patchCore()
# # from patchManyBugs import patched
# # patched()
# from patchManyBugs import exportSosPatches
# exportSosPatches()
# from validate_manybugs import validate
#
# validate()
def job_patternOperations():
from sprinferIndex import patternOperations
patternOperations()
def job_patchManyBugs():
from patchManyBugs import buildAll
buildAll()
# from patchManyBugs import patchCore
# patchCore()
# # from patch_validate import patch_validate_mine
# # patch_validate_mine()
# from patchManyBugs import patched
# patched()
# from patchManyBugs import exportSosPatches
# exportSosPatches()
def job_patchIntro():
from sprinferIndex import patchCoreIntro
patchCoreIntro()
# from sprinferIndex import patched
# patched()
def job_validateIntro():
# from patch_validate_introClass2 import patch_validate
# patch_validate()
from test_patched_file import patch_validate
patch_validate()
def job_checkCorrectIntro():
from test_patched_file import checkCorrect
checkCorrect()
def job_manybugs():
from getManybugs import export
export()
def job_validateMany():
from patch_validate import patch_validate
patch_validate()
def job_introclass():
from getIntroClass import export
export()
def job_stats():
from stats import statsNormal
statsNormal(True)
def job_datasetDefects4J():
from defects4JDataset import core
core()
def job_bug():
from bugstats import bStats
bStats()
def job_defects4j():
from stats import defects4jStats
defects4jStats()
def job_patterns():
from stats import exportAbstractPatterns
exportAbstractPatterns()
def job_pipeline():
fs = [job_dataset4j, job_richedit, job_actionSI, job_compare, job_cluster, job_tokenSI,
job_compare, job_stats, job_patterns]
for i, f in enumerate(fs):
print(f'Running {i + 1}: {f.__name__}...')
f()
JOBS = {name[4:]: f for name, f in inspect.getmembers(sys.modules[__name__])
if inspect.isfunction(f) and name.startswith('job_')}
if __name__ == '__main__':
args = parse_args()
setLogg()
setEnv(args)
cfg, _ = setEnv(args)
job = args.job
# Parse job
job: str = args.job.strip()
if job not in JOBS:
print(f'Job "{job}" is not supported. Available jobs: {", ".join(JOBS.keys())}')
exit(-1)
from python.settings import *
from settings import *
pd.options.mode.chained_assignment = None
print(f'Executing {job}...')
if job == 'dataset4j':
from dataset4j import create_dataset
create_dataset()
elif job == 'dataset4c':
from otherDatasets import core
core()
elif job == 'richedit':
dbDir = join(DATA_PATH, 'redis')
stopDB(dbDir, REDIS_PORT)
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} RICHEDITSCRIPT "
output = shellCallTemplate(cmd)
logging.info(output)
elif job == 'actionSI':
from pairs import actionPairs, createPairs, importAction
matches = actionPairs()
createPairs(matches)
importAction()
elif job == 'compare':
# cmd = "mvn exec:java -f '/data/fixminer_source/'
# -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees'
# -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " +
# "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
cmd = f"JAVA_HOME='{jdk8}' java -jar {JAR_PATH} {args.prop} COMPARE "
output = shellCallTemplate4jar(cmd)
logging.info(output)
elif job == 'cluster':
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster(join(DATA_PATH, 'actions'), join(DATA_PATH, 'pairs'), 'actions')
elif job == 'tokenSI':
from pairs import tokenPairs, importTokens
tokenPairs()
importTokens()
elif job == 'clusterTokens':
from abstractPatch import cluster
dbDir = join(DATA_PATH, 'redis')
startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'), 'tokens')
elif job == 'codeflaws':
from otherDatasets import codeflaws
codeflaws()
elif job == 'indexClusters':
from sprinferIndex import runSpinfer, test, divideCoccis, removeDuplicates
runSpinfer()
test()
divideCoccis()
removeDuplicates()
# from patchManyBugs import patchCore
# patchCore()
# # from patchManyBugs import patched
# # patched()
# from patchManyBugs import exportSosPatches
# exportSosPatches()
# from validate_manybugs import validate
#
# validate()
elif job == 'patternOperations':
from sprinferIndex import patternOperations
patternOperations()
elif job == 'patchManyBugs':
from patchManyBugs import buildAll
buildAll()
# from patchManyBugs import patchCore
# patchCore()
# # from patch_validate import patch_validate_mine
# # patch_validate_mine()
# from patchManyBugs import patched
# patched()
# from patchManyBugs import exportSosPatches
# exportSosPatches()
elif job == 'patchIntro':
from sprinferIndex import patchCoreIntro
patchCoreIntro()
# from sprinferIndex import patched
# patched()
elif job == 'validateIntro':
# from patch_validate_introClass2 import patch_validate
# patch_validate()
from test_patched_file import patch_validate
patch_validate()
elif job == 'checkCorrectIntro':
from test_patched_file import checkCorrect
checkCorrect()
elif job == 'manybugs':
from getManybugs import export
export()
elif job == 'validateMany':
from patch_validate import patch_validate
patch_validate()
elif job == 'introclass':
from getIntroClass import export
export()
elif job == 'stats':
from stats import statsNormal
statsNormal(True)
elif job == 'datasetDefects4J':
from defects4JDataset import core
core()
elif job == 'bug':
from bugstats import bStats
bStats()
elif job == 'defects4j':
from stats import defects4jStats
defects4jStats()
elif job == 'patterns':
from stats import exportAbstractPatterns
exportAbstractPatterns()
else:
logging.error('Unknown job %s', job)
JOBS[job]()