From 6ed2a2fa73084e7598a84fbe9a3eaf638c975f34 Mon Sep 17 00:00:00 2001 From: "Azalea (on HyDEV-Daisy)" Date: Mon, 16 May 2022 05:25:46 -0400 Subject: [PATCH] [O] Remove redundant code --- python/main.py | 1 + python/pairs.py | 71 +++++++++---------------------------------------- 2 files changed, 13 insertions(+), 59 deletions(-) diff --git a/python/main.py b/python/main.py index 52f3374..ff4e83d 100644 --- a/python/main.py +++ b/python/main.py @@ -48,6 +48,7 @@ def job_richedit(): def job_actionSI(): from pairs import actionPairs, createPairs, importAction + job_start_redis() matches = actionPairs() createPairs(matches) importAction() diff --git a/python/pairs.py b/python/pairs.py index 52899e8..19d21ec 100644 --- a/python/pairs.py +++ b/python/pairs.py @@ -1,31 +1,23 @@ from common.commons import * +import redis + DATA_PATH = os.environ["DATA_PATH"] ROOT = os.environ["ROOT_DIR"] -INNER_DATA_PATH = join(ROOT,'data') +INNER_DATA_PATH = join(ROOT, 'data') PROJECT_TYPE = os.environ["PROJECT_TYPE"] REDIS_PORT = os.environ["REDIS_PORT"] def importTokens(): - # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, shapeSize, cluster, cmd, o, e, indexFile, iFile, idx, i, k, v, key - dbDir = join(INNER_DATA_PATH, 'redis') - # portInner = '6380' - # startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb") - portInner = REDIS_PORT - redis_start(dbDir, portInner, PROJECT_TYPE) - - import redis - # import pairs pairsAction = join(DATA_PATH, 'pairsToken') - redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1) + redis_db = redis.StrictRedis(host="localhost", port=int(REDIS_PORT), db=1) pairs = get_filepaths(pairsAction, '.txt') for pair in pairs: split = pair.split("/") shapeName = split[-3] shapeSize = split[-2] cluster = split[-1].replace('.txt', '') - cmd = "bash " + join(INNER_DATA_PATH, - 'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]); + cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + shapeSize + "-" + cluster; # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]); o, e = shellGitCheckout(cmd) print(o) @@ -35,24 +27,13 @@ def importTokens(): for i in idx: k, v = i.split(',') key = shapeName + "-" + shapeSize + "-" + cluster + "-" + k - # redis_db.set(key, v.strip()) redis_db.hset('filenames', key, v.strip()) redis_db.set("level", "l2") - def importAction(): - # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key - dbDir = join(INNER_DATA_PATH, 'redis') - # portInner = '6380' - # startDB(dbDir, portInner, "clusterl0-gumInputALL.rdb") - - portInner = REDIS_PORT - redis_start(dbDir, portInner, PROJECT_TYPE) - - import redis pairsShapes = join(DATA_PATH, 'pairs') - redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1) + redis_db = redis.StrictRedis(host="localhost", port=int(REDIS_PORT), db=1) pairs = get_filepaths(pairsShapes, '.txt') for pair in pairs: split = pair.split("/") @@ -72,15 +53,15 @@ def importAction(): redis_db.hset('filenames',key,v.strip()) redis_db.set("level","l1") + def tokenPairs(): - # global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b shapes = listdir(join(DATA_PATH, 'actions')) shapes = [f for f in shapes if isdir(join(DATA_PATH, 'actions', f))] if os.path.exists(join(DATA_PATH, 'pairsToken')): import shutil shutil.rmtree(join(DATA_PATH, 'pairsToken')) - # shapes = [rootType] + for shape in shapes: sizes = listdir(join(DATA_PATH, 'actions', shape)) sizes = [f for f in sizes if isdir(join(DATA_PATH, 'actions', shape, f))] @@ -134,53 +115,25 @@ def tokenPairs(): a, b = row out.write(a + ',' + b + '\n') + def actionPairs(): - # global dbDir, portInner, redis_db, keys, matches, roots, sizes, sf, files, indexCompared, out, idx, val, pairs, row, a, b - # if not (isfile(join(DATA_PATH, 'studyDataset.pickle'))): - dbDir = join(INNER_DATA_PATH, 'redis') - - portInner = REDIS_PORT - redis_start(dbDir, portInner, PROJECT_TYPE) - - import redis - redis_db = redis.StrictRedis(host="localhost", port=portInner, db=0) - keys = redis_db.hkeys("dump")#hkeys "dump" - # keys = redis_db.scan(0, match='*', count='1000000') + redis_db = redis.StrictRedis(host="localhost", port=int(REDIS_PORT), db=0) + keys = redis_db.hkeys("dump") matches = pd.DataFrame(keys, columns=['pairs_key']) - # matches = load_zipped_pickle(join(DATA_PATH,'singleHunks')) matches['pairs_key'] = matches['pairs_key'].apply(lambda x: x.decode()) matches['root'] = matches['pairs_key'].apply(lambda x: x.split('/')[0]) matches['size'] = matches['pairs_key'].apply(lambda x: x.split('/')[1]) matches['file'] = matches['pairs_key'].apply(lambda x: x.split('/')[2]) - - # matches[matches.file.apply( - # lambda i: (i.startswith('commons-math.git') or i.startswith('commons-lang.git') or i.startswith( - # 'closure-compiler.git') or i.startswith('joda-time.git') or i.startswith('mockito.git')))] matches['fileName'] = matches['pairs_key'].apply(lambda x: '_'.join(x.split('/')[2].split('_')[:-1])) - # else: - # matches = load_zipped_pickle(join(DATA_PATH, 'studyDataset.pickle')) - # matches = matches[matches.repo.apply(lambda i: not ( - # i.startswith('commons-math') or i.startswith('commons-lang') or i.startswith( - # 'closure-compiler.git') or i.startswith('joda-time.git') or i.startswith('mockito.git')))] - - # matches = matches[matches['size'] != '1'] matches['hunk'] = matches['pairs_key'].apply(lambda x: x.split('/')[2].split('_')[-1]) - # test = matches[['fileName', 'hunk']] - # df = test.groupby(by=['fileName'], as_index=False).agg(lambda x: x.tolist()) - # sDF = df[df.hunk.apply(lambda x: True if x == ['0'] else False)] - # # sDF = df[df.hunk.apply(lambda x: True if len(x)<10005 else False)] - # singleHunkedFiles = sDF.fileName.unique().tolist() - # # singleHunkedFiles = [i.replace('.txt', '') for i in singleHunkedFiles] - # matches = matches[matches.fileName.isin(singleHunkedFiles)] - return matches -def createPairs(matches): +def createPairs(matches): if len(matches) == 0: return True roots = matches.root.unique().tolist()