[O] Reformat, optimize, add comments

This commit is contained in:
Azalea (on HyDEV-Daisy)
2022-05-08 23:54:51 -04:00
parent 5eca74116e
commit bfbd5f8a2a
4 changed files with 275 additions and 233 deletions
+119 -112
View File
@@ -1,7 +1,8 @@
import logging
import sys
import gzip
from typing import Union
import numpy as np
from tqdm import tqdm
import shutil
@@ -28,16 +29,18 @@ import datetime
import subprocess
from pathlib import Path
PathLike = Union[os.PathLike, str]
sourceCodeColumns = ['packageName', 'className', 'methodNames', 'formalParameter',
'methodInvocation', 'memberReference', 'documentation', 'literal', 'rawSource', 'hunks',
'commitLogs', 'classNameExt']
'methodInvocation', 'memberReference', 'documentation', 'literal', 'rawSource',
'hunks',
'commitLogs', 'classNameExt']
def nap():
time.sleep(1)
def setLogg():
# logging.basicConfig(filename='app.log', filemode='w',level=logging.DEBUG)
root = logging.getLogger()
@@ -45,7 +48,8 @@ def setLogg():
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.WARNING)
formatter = logging.Formatter('%(asctime)s - %(process)d - %(levelname)s - %(filename)s:%(funcName)s - %(message)s')
formatter = logging.Formatter(
'%(asctime)s - %(process)d - %(levelname)s - %(filename)s:%(funcName)s - %(message)s')
ch.setFormatter(formatter)
# ch.addFilter(lambda record: record.levelno <= logging.)
root.addHandler(ch)
@@ -64,6 +68,7 @@ def setLogg():
h2.setFormatter(formatter)
root.addHandler(h2)
def setEnv(args):
# env = args.env
@@ -72,7 +77,6 @@ def setEnv(args):
os.environ["ROOT_DIR"] = args.root
sys.path.append(args.root)
import yaml
# if os.uname().nodename != '':
# with open(join(os.environ["ROOT_DIR"], os.uname().nodename + ".config.yml"), 'r') as ymlfile:
@@ -113,16 +117,15 @@ def setEnv(args):
# os.environ["JDK8"] = cfg['java']['8home']
# os.environ["D4JHOME"] = cfg['defects4j']['home']
os.environ["CODE_PATH"] = join(os.environ["ROOT_DIR"],'code/')
os.environ["CODE_PATH"] = join(os.environ["ROOT_DIR"], 'code/')
# os.environ["DATA_PATH"] = join(os.environ["ROOT_DIR"],'data/')
# os.environ["REPO_PATH"] = join(os.environ["DATA_PATH"], 'gitrepo/')
os.environ["COMMIT_DFS"]= join(os.environ["DATA_PATH"],'commitsDF/')
os.environ["SIMI_DIR"]= join(os.environ["DATA_PATH"],'simi/')
os.environ["COMMIT_DFS"] = join(os.environ["DATA_PATH"], 'commitsDF/')
os.environ["SIMI_DIR"] = join(os.environ["DATA_PATH"], 'simi/')
os.environ["DTM_PATH"] = join(os.environ["DATA_PATH"], 'dtm/')
os.environ["SIMI_SINGLE"] = join(os.environ["DATA_PATH"], 'simiSingle/')
os.environ["FEATURE_DIR"] = join(os.environ["DATA_PATH"],'features/')
os.environ["FEATURE_DIR"] = join(os.environ["DATA_PATH"], 'features/')
os.environ["BUG_POINT"] = join(os.environ["DATA_PATH"], 'bugPoints/')
os.environ["DEFECTS4J"] = join(os.environ["DATA_PATH"], 'defects4jdata/')
@@ -139,10 +142,6 @@ def setEnv(args):
os.environ["DATASET_DIR"] = join(os.environ["DATA_PATH"], 'datasets/')
os.environ["REMOTE_PATH"] = '/Volumes/Samsung_T5/data'
logging.info('ROOT_DIR : %s', os.environ["ROOT_DIR"])
logging.info('REPO_PATH : %s', os.environ["REPO_PATH"])
logging.info('CODE_PATH : %s', os.environ["CODE_PATH"])
@@ -159,15 +158,13 @@ def setEnv(args):
logging.info('DATASET_DIR : %s', os.environ["DATASET_DIR"])
def getRun():
import argparse
parser = argparse.ArgumentParser(description='')
# parser.add_argument('-subject', dest='subject', help='Environment')
parser.add_argument('-root', dest='root', help='root folder')
parser.add_argument('-job',dest='job',help='job name')
parser.add_argument('-prop',dest='prop',help='property file')
parser.add_argument('-job', dest='job', help='job name')
parser.add_argument('-prop', dest='prop', help='property file')
args = parser.parse_args()
@@ -177,10 +174,9 @@ def getRun():
return args
def shellCallTemplate4jar(cmd,enc='utf-8'):
def shellCallTemplate4jar(cmd, enc='utf-8'):
process = subprocess.Popen(cmd,
stdout=subprocess.PIPE,stderr=PIPE, shell=True,encoding=enc,
stdout=subprocess.PIPE, stderr=PIPE, shell=True, encoding=enc,
universal_newlines=True)
while True:
@@ -195,10 +191,11 @@ def shellCallTemplate4jar(cmd,enc='utf-8'):
print(output.strip())
break
def shellCallTemplate(cmd,enc='utf-8'):
def shellCallTemplate(cmd, enc='utf-8'):
try:
logging.info(cmd)
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,encoding=enc) as p:
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p:
output, errors = p.communicate()
# print(output)
if errors:
@@ -212,30 +209,32 @@ def shellCallTemplate(cmd,enc='utf-8'):
logging.error(e)
return output
def getChildMem(pid,children):
def getChildMem(pid, children):
out = subprocess.Popen(['pgrep', '-P', str(pid)],
stdout=subprocess.PIPE).communicate()[0].split(b'\n')
child = out[0].decode()
if child !='':
if child != '':
children.append(child)
getChildMem(child,children)
getChildMem(child, children)
else:
return children
def getAllChildMe(pid):
def getAllChildMe(pid):
childrenProcess = []
getChildMem(pid,childrenProcess)
getChildMem(pid, childrenProcess)
# if child == '':
return sum(map(memory_usage_ps,childrenProcess)) + memory_usage_ps(pid)
return sum(map(memory_usage_ps, childrenProcess)) + memory_usage_ps(pid)
# else:
# return memory_usage_ps(child) + memory_usage_ps(pid)
def memory_usage_ps(pid):
import subprocess
out = subprocess.Popen(['ps', 'v', '-p', str(pid)],
stdout=subprocess.PIPE).communicate()[0].split(b'\n')
stdout=subprocess.PIPE).communicate()[0].split(b'\n')
vsz_index = out[0].split().index(b'RSS')
if out[1].decode() != '':
mem = float(out[1].split()[vsz_index]) / 1024
@@ -243,73 +242,74 @@ def memory_usage_ps(pid):
mem = float(0)
return mem
def raiseTime(cmd,timeout,my_timer):
def raiseTime(cmd, timeout, my_timer):
my_timer.cancel()
raise TimeoutExpired(cmd, timeout)
def killP(pid):
out = subprocess.Popen(['kill', str(pid)],
stdout=subprocess.PIPE).communicate()[0].split(b'\n')
out = subprocess.Popen(['kill', str(pid)], stdout=subprocess.PIPE).communicate()[0].split(b'\n')
out
def shellGitCheckout(cmd,timeout =600,enc='utf-8'):
def shellGitCheckout(cmd, timeout=600, enc='utf-8'):
output = ''
errors = ''
# logging.debug(cmd)
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,encoding=enc) as p:
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p:
try:
output, errors = p.communicate(timeout=timeout)
# print(output)
logging.debug(cmd + '\t' +output)
logging.debug(cmd + '\t' + output)
# logging.info(errors)
if errors:
raise CalledProcessError(errors, '-1')
output
except CalledProcessError as e:
logging.debug(cmd +'\t'+ errors)
logging.debug(cmd + '\t' + errors)
except TimeoutExpired as t:
p.terminate()
p.communicate()
# p.kill()
logging.warning(cmd +'\t'+str(t))
return output,errors
logging.warning(cmd + '\t' + str(t))
return output, errors
def callSpinfer(cmd,timeout =600,enc='utf-8'):
def callSpinfer(cmd, timeout=600, enc='utf-8'):
output = ''
errors = ''
# logging.debug(cmd)
my_timer = None
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True,encoding=enc) as p:
with Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, encoding=enc) as p:
try:
start = datetime.datetime.now()
memusage = getAllChildMe(p.pid)
# isExit = False
while(memusage != 0.0):
while (memusage != 0.0):
end = datetime.datetime.now()
elapsed = end - start
if(elapsed.seconds > timeout):
raise TimeoutExpired(cmd,timeout)
if (elapsed.seconds > timeout):
raise TimeoutExpired(cmd, timeout)
memusage = getAllChildMe(p.pid)
# print(str(p.pid) + " ; " + str(memusage))
if memusage > 2000:
# isExit = True
raise TimeoutExpired(cmd,timeout)
raise TimeoutExpired(cmd, timeout)
output, errors = p.communicate(timeout=timeout)
# print(output)
logging.debug(cmd + '\t' +output)
logging.debug(cmd + '\t' + output)
# logging.info(errors)
if errors:
raise CalledProcessError(errors, '-1')
output
except CalledProcessError as e:
logging.debug(cmd +'\t'+ errors)
logging.debug(cmd + '\t' + errors)
except TimeoutExpired as t:
# my_timer.cancel()
childrenProcess = []
getChildMem(p.pid, childrenProcess)
[killP(i) for i in childrenProcess]
@@ -317,30 +317,35 @@ def callSpinfer(cmd,timeout =600,enc='utf-8'):
p.terminate()
p.communicate()
# p.kill()
logging.warning(cmd +'\t'+str(t))
return output,errors
logging.warning(cmd + '\t' + str(t))
return output, errors
def save_zipped_pickle(obj, filename, protocol=-1):
with gzip.open(filename, 'wb') as f:
p.dump(obj, f, protocol)
def load_zipped_pickle(filename):
with gzip.open(filename, 'rb') as f:
loaded_object = p.load(f)
return loaded_object
def file2path(file):
count = file.count(".") - 1
file = file.replace('.', '/', count)
return file
def isFileInList(file,checkList):
def isFileInList(file, checkList):
for f in checkList:
if f in file:
return True
return False
# [i for i in ansFiles if 'org/fusesource/esb/itests/basic/fabric/EsbFeatureTest.java' in i]
def get_venn_sections(sets):
"""
Given a list of sets, return a new list of sets with all the possible
@@ -366,7 +371,7 @@ def get_venn_sections(sets):
bit_flags = [2 ** n for n in range(len(sets))]
flags_zip_sets = [z for z in zip(bit_flags, sets)]
#combo_sets = []
# combo_sets = []
combo_sets = dict()
for bits in range(num_combinations - 1, 0, -1):
include_sets = [s for flag, s in flags_zip_sets if bits & flag]
@@ -374,17 +379,19 @@ def get_venn_sections(sets):
combo = set.intersection(*include_sets)
combo = set.difference(combo, *exclude_sets)
tag = ''.join([str(int((bits & flag) > 0)) for flag in bit_flags])
#combo_sets.append((tag, combo))
# combo_sets.append((tag, combo))
combo_sets[tag] = combo
return combo_sets
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
def RR_XGB(x,ao,column):
def RR_XGB(x, ao, column):
if x[ao] == 1:
return (1.0 / (x[column]))
elif pd.isnull(x[ao]):
@@ -392,10 +399,11 @@ def RR_XGB(x,ao,column):
else:
return 0
def parallelRunNo(coreFun,elements,*args):
def parallelRunNo(coreFun, elements, *args):
with concurrent.futures.ProcessPoolExecutor(max_workers=int(8)) as executor:
try:
futures = {executor.submit(coreFun, l,*args): l for l in elements}
futures = {executor.submit(coreFun, l, *args): l for l in elements}
kwargs = {
'total': len(futures),
@@ -420,10 +428,10 @@ def parallelRunNo(coreFun,elements,*args):
raise
def parallelRun(coreFun,elements,*args,max_workers=os.cpu_count()):
def parallelRun(coreFun, elements, *args, max_workers=os.cpu_count()):
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
try:
futures = {executor.submit(coreFun, l,*args): l for l in elements}
futures = {executor.submit(coreFun, l, *args): l for l in elements}
kwargs = {
'total': len(futures),
@@ -445,11 +453,11 @@ def parallelRun(coreFun,elements,*args,max_workers=os.cpu_count()):
raise
def parallelRunMerge(coreFun,elements,*args,max_workers=os.cpu_count()):
def parallelRunMerge(coreFun, elements, *args, max_workers=os.cpu_count()):
dataL = []
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
try:
futures = {executor.submit(coreFun, l,*args): l for l in elements}
futures = {executor.submit(coreFun, l, *args): l for l in elements}
kwargs = {
'total': len(futures),
'unit': 'files',
@@ -473,12 +481,11 @@ def parallelRunMerge(coreFun,elements,*args,max_workers=os.cpu_count()):
raise
def parallelRunMergeNew(coreFun,elements,*args,max_workers=os.cpu_count()):
def parallelRunMergeNew(coreFun, elements, *args, max_workers=os.cpu_count()):
res = []
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
try:
futures = {executor.submit(coreFun, l,*args): l for l in elements}
futures = {executor.submit(coreFun, l, *args): l for l in elements}
kwargs = {
'total': len(futures),
@@ -503,8 +510,8 @@ def parallelRunMergeNew(coreFun,elements,*args,max_workers=os.cpu_count()):
aDF = pd.concat(res)
return aDF
def get_filepaths(directory,extension):
def get_filepaths(directory, extension):
file_paths = [] # List which will store all of the full filepaths.\n,
exclude = '.git'
# Walk the tree.\n,
@@ -520,27 +527,27 @@ def get_filepaths(directory,extension):
return file_paths # Self-explanatory.\n,
def get_class_weights(y):
counter = Counter(y)
majority = max(counter.values())
return {cls: round(float(majority)/float(count), 2) for cls, count in counter.items()}
return {cls: round(float(majority) / float(count), 2) for cls, count in counter.items()}
def stopDB(dbDir,portInner):
def stopDB(dbDir, portInner):
# cmd = "bash " + dbDir + "/" + "stopServer.sh " + " " + portInner;
cmd = "redis-cli -p " + portInner + " shutdown save"
o, e = shellGitCheckout(cmd)
logging.info(o)
def startDB(dbDir,portInner,projectType):
dbName = "dumps-"+projectType+".rdb"
def startDB(dbDir, portInner, projectType):
dbName = "dumps-" + projectType + ".rdb"
# portInner = '6380'
cmd = "bash " + dbDir + "/" + "startServer.sh " + dbDir + " "+dbName+ " " + portInner;
cmd = "bash " + dbDir + "/" + "startServer.sh " + dbDir + " " + dbName + " " + portInner;
o, e = shellGitCheckout(cmd)
ping = "redis-cli -p "+portInner+" ping"
ping = "redis-cli -p " + portInner + " ping"
o, e = shellGitCheckout(ping)
m = re.search('PONG', o)
@@ -569,23 +576,23 @@ def unique_everseen(iterable, key=None):
seen_add(k)
yield element
def plotBox(yList,labels, fn, rotate=False,limit=True):
def plotBox(yList, labels, fn, rotate=False, limit=True):
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111)
meanpointsprops = dict(markeredgecolor ='blue',markerfacecolor=
'blue')
meanpointsprops = dict(markeredgecolor='blue', markerfacecolor=
'blue')
flierprops = dict(markeredgecolor ='black',markerfacecolor=
'black',marker='.',markersize=2)
box = ax1.boxplot(yList, 0, flierprops=flierprops,widths=0.5, showmeans=False, vert=True,meanprops=meanpointsprops)
flierprops = dict(markeredgecolor='black', markerfacecolor=
'black', marker='.', markersize=2)
box = ax1.boxplot(yList, 0, flierprops=flierprops, widths=0.5, showmeans=False, vert=True,
meanprops=meanpointsprops)
for line in box['medians']:
x,y = line.get_xydata()[1]
x, y = line.get_xydata()[1]
line.set(linewidth=3)
line.set_color('blue')
# plt.scatter(labels, yList, color='r')
@@ -601,8 +608,8 @@ def plotBox(yList,labels, fn, rotate=False,limit=True):
ax1.get_xaxis().set_ticklabels([])
# sns.boxplot(yList, ax=ax1)
if limit:
ax1.set_ylim(top=1.1,bottom=0)
ax1.yaxis.set_ticks([0.0,1.0])
ax1.set_ylim(top=1.1, bottom=0)
ax1.yaxis.set_ticks([0.0, 1.0])
else:
ax1.set_yscale('log')
ax1.set_xlabel('Cluster Member Size')
@@ -616,33 +623,32 @@ def plotBox(yList,labels, fn, rotate=False,limit=True):
fig.set_size_inches(7, 1, forward=True)
fig.savefig(fn, dpi=100, bbox_inches='tight')
plt.show()
def plotBox2(ys,labels, fn,means, rotate=False,limit=True):
def plotBox2(ys, labels, fn, means, rotate=False, limit=True):
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=3, ncols=1)
fig,axes = plt.subplots(nrows=3,ncols=1)
for ax1,yList,l,l2,mean in zip(axes.flat,ys,labels,['Shapes','Actions','Tokens'],means):
for ax1, yList, l, l2, mean in zip(axes.flat, ys, labels, ['Shapes', 'Actions', 'Tokens'],
means):
# plt.setp(ax1.get_xticks(),visible=False)
# ax1 = fig.add_subplot(111)
meanpointsprops = dict(markeredgecolor ='blue',markerfacecolor=
'blue')
meanpointsprops = dict(markeredgecolor='blue', markerfacecolor=
'blue')
flierprops = dict(markeredgecolor ='black',markerfacecolor=
'black',marker='.',markersize=2)
box = ax1.boxplot(yList, 0, flierprops=flierprops,widths=0.5, showmeans=False, vert=True,meanprops=meanpointsprops)
flierprops = dict(markeredgecolor='black', markerfacecolor=
'black', marker='.', markersize=2)
box = ax1.boxplot(yList, 0, flierprops=flierprops, widths=0.5, showmeans=False, vert=True,
meanprops=meanpointsprops)
ax1.axhline(linewidth=2, color='r',y=mean)
ax1.axhline(linewidth=2, color='r', y=mean)
for line in box['medians']:
x,y = line.get_xydata()[1]
x, y = line.get_xydata()[1]
line.set(linewidth=3)
line.set_color('blue')
# plt.scatter(labels, yList, color='r')
@@ -659,14 +665,14 @@ def plotBox2(ys,labels, fn,means, rotate=False,limit=True):
# ax1.get_xaxis().set_ticks([])
# sns.boxplot(yList, ax=ax1)
if limit:
if l2 !='Tokens':
ax1.set_ylim(top=1,bottom=0)
if l2 != 'Tokens':
ax1.set_ylim(top=1, bottom=0)
else:
ax1.set_ylim(top=1.1, bottom=0)
ax1.yaxis.set_ticks([0.0,mean,0.5,1.0])
ax1.yaxis.set_ticklabels([0,'',0.5,1])
ax1.yaxis.set_ticks([0.0, mean, 0.5, 1.0])
ax1.yaxis.set_ticklabels([0, '', 0.5, 1])
ax1.tick_params(direction='out', length=6, width=2, axis='y',
grid_color='r', grid_alpha=0.5)
grid_color='r', grid_alpha=0.5)
else:
# ax1.set_yscale('log')
@@ -675,7 +681,7 @@ def plotBox2(ys,labels, fn,means, rotate=False,limit=True):
ax1.set_aspect('auto')
ax1.set_ylabel(l2)
labels = ['C-'+str(i+1) for i in labels[0]]
labels = ['C-' + str(i + 1) for i in labels[0]]
ax1.set_xticklabels(labels)
ax1.set_xticklabels(labels, rotation=45, ha='right')
# plt.setp(ax1.get_xticks(), visible=True)
@@ -687,16 +693,14 @@ def plotBox2(ys,labels, fn,means, rotate=False,limit=True):
plt.subplots_adjust(wspace=0, hspace=0.05)
fig = plt.gcf()
# fig.tight_layout()
fig.set_size_inches(7, 7, forward=True)
fig.savefig(fn, dpi=100, bbox_inches='tight')
plt.show()
def plotScatter(s1,s2,vs,label,limits,type):
def plotScatter(s1, s2, vs, label, limits, type):
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
@@ -719,8 +723,8 @@ def plotScatter(s1,s2,vs,label,limits,type):
stepsize = 1
ax.xaxis.set_ticks(np.arange(0, end, stepsize))
ax.yaxis.set_ticks(np.arange(0, end, stepsize))
x = np.linspace(start, end, limits+1)
y = np.linspace(start, end, limits+1)
x = np.linspace(start, end, limits + 1)
y = np.linspace(start, end, limits + 1)
ax.fill_between(x, y, end, facecolor='b', alpha=0.3)
# plt.plot(np.linspace(0, 1, 10), np.linspace(0, 1, 10), lw=1)
ax.spines['top'].set_visible(True)
@@ -744,14 +748,17 @@ def plotScatter(s1,s2,vs,label,limits,type):
tight_bbox=True
)
import threading
class BackgroundTask(object):
""" Threading example class
The run() method will be started and it will run in the background
until the application exits.
"""
def __init__(self, model,PATH, interval=1):
def __init__(self, model, PATH, interval=1):
""" Constructor
:type interval: int
:param interval: Check interval, in seconds
@@ -761,10 +768,10 @@ class BackgroundTask(object):
self.path = PATH
thread = threading.Thread(target=self.run, args=())
thread.daemon = True # Daemonize thread
thread.start() # Start the execution
thread.daemon = True # Daemonize thread
thread.start() # Start the execution
def run(self):
""" Method that runs forever """
self.model.save_model(self.path,
num_iteration=self.model.best_iteration)
num_iteration=self.model.best_iteration)