[-] Remove commented code
This commit is contained in:
@@ -31,57 +31,10 @@ def getCommitFromRepo(f: PathLike, gitrepo: str, branch: str):
|
||||
shellCallTemplate(f"git -C {f} log --no-merges --pretty=format:'{form}' > {file}", enc='latin1')
|
||||
|
||||
# Collect commits
|
||||
commits = json.loads(f'[{Path(file).read_text()}]')
|
||||
content = Path(file).read_text().replace("\n", ",")
|
||||
commits = json.loads(f'[{content}]')
|
||||
|
||||
# Convert to DataFrame
|
||||
ds = pd.DataFrame.from_dict(commits)
|
||||
ds['commitDate'] = pd.to_datetime(ds['commitDate'])
|
||||
return ds
|
||||
|
||||
|
||||
def caseCollect(subject):
|
||||
if not os.path.exists(COMMIT_FOLDER):
|
||||
os.mkdir(COMMIT_FOLDER)
|
||||
if not os.path.exists(COMMIT_DFS):
|
||||
os.mkdir(COMMIT_DFS)
|
||||
|
||||
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
if subject == 'ALL':
|
||||
tuples = subjects[['Repo', 'Branch']].values.tolist()
|
||||
else:
|
||||
# repos = subjects.query("Subject == '{0}'".format(subject)).Repo.tolist()
|
||||
tuples = subjects.query("Subject == '{0}'".format(subject))[
|
||||
['Repo', 'Branch']].values.tolist()
|
||||
|
||||
for t in tuples:
|
||||
repo, branch = t
|
||||
logging.info(repo)
|
||||
getCommitFromRepo(join(REPO_PATH, repo), join(COMMIT_FOLDER, repo), branch)
|
||||
|
||||
if subject == 'ALL':
|
||||
commits = listdir(COMMIT_FOLDER)
|
||||
else:
|
||||
commits = [i for i in listdir(COMMIT_FOLDER) if i.startswith(repo)]
|
||||
|
||||
for commit in commits:
|
||||
logging.info(commit)
|
||||
rDF = makeDF(join(COMMIT_FOLDER, commit))
|
||||
repoName = commit.split('.')[0]
|
||||
save_zipped_pickle(rDF, join(COMMIT_DFS, repoName + ".pickle"))
|
||||
# p.dump(rDF, open(join(COMMIT_DFS, repoName + ".pickle"), "wb"))
|
||||
|
||||
|
||||
def caseClone(subject):
|
||||
if not os.path.exists(REPO_PATH):
|
||||
os.mkdir(REPO_PATH)
|
||||
|
||||
subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
if subject == 'ALL':
|
||||
gitrepos = subjects.GitRepo.tolist()
|
||||
else:
|
||||
gitrepos = subjects.query("Subject == '{0}'".format(subject)).GitRepo.tolist()
|
||||
os.getcwd()
|
||||
os.chdir(REPO_PATH)
|
||||
for gitrepo in gitrepos:
|
||||
cmd = 'git clone ' + gitrepo
|
||||
out = shellCallTemplate(cmd)
|
||||
|
||||
@@ -89,71 +89,3 @@ def createDS(project_list: str = PROJECT_LIST):
|
||||
print(len(commits))
|
||||
# for s in a.commit.values.tolist():
|
||||
parallelRun(prepareFiles, commits[['commit', 'files']].values.tolist(), repo)
|
||||
|
||||
# # if job == 'clone':
|
||||
# for repo,src in subjects[['Repo','GitRepo']].values.tolist():
|
||||
# if(pjList != ['ALL']):
|
||||
# if repo in pjList:
|
||||
# print(repo)
|
||||
# cmd = 'git -C ' + DATASET_PATH + ' clone ' + src
|
||||
# shellCallTemplate(cmd)
|
||||
# logging.info(repo)
|
||||
|
||||
# caseClone(subject)
|
||||
|
||||
# caseCollect(subject)
|
||||
# # elif job == 'fix':
|
||||
# from filterBugFixingCommits import caseFix
|
||||
#
|
||||
# caseFix(subject)
|
||||
# #
|
||||
# # # elif job =='brDownload':
|
||||
# from bugReportDownloader import caseBRDownload
|
||||
#
|
||||
# caseBRDownload(subject)
|
||||
# # # elif job =='brParser':
|
||||
# from bugReportParser import step1
|
||||
#
|
||||
# step1(subject)
|
||||
#
|
||||
# # elif job =='dataset':
|
||||
#
|
||||
# if not isfile(join(DATA_PATH, 'singleBR.pickle')):
|
||||
#
|
||||
# brs = load_zipped_pickle(join(DATA_PATH, subject + "bugReportsComplete.pickle"))
|
||||
#
|
||||
# subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
#
|
||||
#
|
||||
# def getCommit(x):
|
||||
# bid, project = x
|
||||
#
|
||||
# subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
# repo = subjects.query("Subject == '{0}'".format(project)).Repo.tolist()[0]
|
||||
# commits = load_zipped_pickle(join(DATA_PATH, COMMIT_DFS, repo + '.pickle'))
|
||||
# correspondingCommit = commits.query("fix =='{0}'".format(bid)).commit.tolist()
|
||||
# if len(correspondingCommit) == 1:
|
||||
# return [bid, correspondingCommit[0], project]
|
||||
# else:
|
||||
# return None
|
||||
# print('error')
|
||||
#
|
||||
#
|
||||
# wl = brs[['bid', 'project']].values.tolist()
|
||||
# dataL = parallelRunMerge(getCommit, wl)
|
||||
#
|
||||
# commits = pd.DataFrame(
|
||||
# columns=['bid', 'commit', 'project'],
|
||||
# data=list(filter(None.__ne__, dataL)))
|
||||
#
|
||||
# save_zipped_pickle(commits, join(DATA_PATH, 'singleBR.pickle'))
|
||||
# else:
|
||||
# commits = load_zipped_pickle(join(DATA_PATH, 'singleBR.pickle'))
|
||||
# subjects = pd.read_csv(join(DATA_PATH, 'subjects.csv'))
|
||||
# logging.info('done matching commits')
|
||||
# commits['repo'] = commits.project.apply(lambda x: subjects.query("Subject == '{0}'".format(x)).Repo.tolist()[0])
|
||||
#
|
||||
# workList = commits[['commit', 'repo']].values.tolist()
|
||||
# from dataset import prepareFiles
|
||||
#
|
||||
# parallelRun(prepareFiles, workList)
|
||||
|
||||
Reference in New Issue
Block a user