enabled back token iteration

2020-06-09 14:51:19 +02:00
parent aa2a17bc4a
commit a26275cf4a
12 changed files with 256 additions and 464 deletions
@@ -6,6 +6,8 @@ from common.commons import *
 DATA_PATH = os.environ["DATA_PATH"]
 DATASET = os.environ["dataset"]
 jdk8 = os.environ["JDK8"]
+REDIS_PORT = os.environ["REDIS_PORT"]
+
 # def localPairCore(aTuple):
 #     redis_db = redis.StrictRedis(host="localhost", port=6380, db=1)
 #     idx, key = aTuple
@@ -54,12 +56,15 @@ def loadPairMulti(root,clusterPath,level):

    # root = 'BreakStatement'
    logging.info(root)
-    port = 6399
+    port = REDIS_PORT
    # if isfile(clusterPath +"/"+root+".pickle"):
    #     return load_zipped_pickle(clusterPath +"/"+root+".pickle")
    # else:
        # redis_db = redis.StrictRedis(host="localhost", port=port, db=1)  #L1
-    redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
+    if level == 'tokens':
+        redis_db = redis.StrictRedis(host="localhost", port=port, db=3)
+    else:
+        redis_db = redis.StrictRedis(host="localhost", port=port, db=2)
    keys = redis_db.scan(0, match=root+'-*', count='100000000')
    # keys = redis_db.hkeys("dump")

@@ -81,11 +86,11 @@ def loadPairMulti(root,clusterPath,level):
    matches['path2']=matches['pairs'].apply(lambda x:x[1])
    # matches['sizes']=matches['pairs_key'].apply(lambda x:x.split('_')[0].split('-')[1])
    matches['sizes']=matches['pairs_key'].apply(lambda x:x.split(root)[1].split('/')[0].split('-')[1])
-    if level == 'actions':
-        matches['actions']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
    if level == 'tokens':
-        matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
-        matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])
+        matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[2])
+    # if level == 'tokens':
+    #     matches['actions'] = matches['pairs_key'].apply(lambda x: x.split('/')[0].split('-')[2])
+    #     matches['tokens']=matches['pairs_key'].apply(lambda x:x.split('/')[0].split('-')[3])


    # save_zipped_pickle(matches,clusterPath +"/"+root+".pickle")
@@ -119,19 +124,19 @@ def cluster(clusterPath,pairsPath, level):
                for s in sizes:
                    match = matches[matches['sizes'] == s]

-                    if level == 'actions':
-                        actions = match['actions'].unique().tolist()
+                    if level == 'tokens':
+                        actions = match['tokens'].unique().tolist()
                        for action in actions:
-                            match = match[match['actions'] == action]
+                            match = match[match['tokens'] == action]
                            clusterCore(clusterPath,  level, match, pairsPath, root, s,action)
-                    elif level == 'tokens':
-                        actions = match['actions'].unique().tolist()
-                        for action in actions:
-                            match = match[match['actions'] == action]
-                            tokens = match['tokens'].unique().tolist()
-                            for token in tokens:
-                                match = match[match['tokens']==token]
-                                clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
+                    # elif level == 'tokens':
+                    #     actions = match['actions'].unique().tolist()
+                    #     for action in actions:
+                    #         match = match[match['actions'] == action]
+                    #         tokens = match['tokens'].unique().tolist()
+                    #         for token in tokens:
+                    #             match = match[match['tokens']==token]
+                    #             clusterCore(clusterPath, level, match, pairsPath, root, s, action,token)
                    else:
                        clusterCore(clusterPath,  level, match, pairsPath, root, s,'')

@@ -158,12 +163,12 @@ def clusterCore(clusterPath, level, match, pairsPath, root, s,action ,token=''):
        cluster.append(subgraph.nodes())
    cluster
    pathMapping = dict()
-    if level == 'actions':
+    if level == 'tokens':
        indexFile = join(pairsPath, root, s,action+'.index')
-    elif level == 'shapes':
+    elif level == 'actions':
        indexFile = join(pairsPath, root, s + '.index')
-    else:
-        indexFile =join(pairsPath, root, s,action,token+'.index')
+    # else:
+    #     indexFile =join(pairsPath, root, s,action,token+'.index')
    df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
    pathMapping = df.to_dict()

@@ -40,9 +40,7 @@ if __name__ == '__main__':
        if job == 'dataset4j':
            from javaDS import createDS
            createDS()
-        # elif job == 'linuxDS':
-        #     from linuxDataset import collectBugFixPatches
-        #     collectBugFixPatches()
+
        elif job =='dataset4c':
            from otherDatasets import core
            core()
@@ -53,20 +51,16 @@ if __name__ == '__main__':
            output = shellCallTemplate(cmd)
            logging.info(output)

-        # elif job =='loadRES':
-        #     cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " LOAD " + rootType
-        #     output = shellCallTemplate(cmd)
-        #     logging.info(output)

-        elif job =='shapeSI':
-            from pairs import shapePairs
-            matches = shapePairs()
+        elif job =='actionSI':
+            from pairs import actionPairs
+            matches = actionPairs()

            from pairs import createPairs
            createPairs(matches)

-            from pairs import importShape
-            importShape()
+            from pairs import importAction
+            importAction()

        elif job =='compare':
             # cmd = "mvn exec:java -f '/data/fixminer_source/' -Dexec.mainClass='edu.lu.uni.serval.richedit.akka.compare.CompareTrees' -Dexec.args='"+ " shape " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl0-gumInputALL.rdb /data/richedit-core/python/data/richEditScript'"
@@ -74,65 +68,28 @@ if __name__ == '__main__':
            output = shellCallTemplate4jar(cmd)
            logging.info(output)

-        # elif job == 'clusterAdditional':
-        #     from addNewData import cluster
-        #     cluster()

        elif job == 'cluster':
            from abstractPatch import cluster

            dbDir = join(DATA_PATH, 'redis')
-            startDB(dbDir, "6399", PROJECT_TYPE)
-            cluster(join(DATA_PATH,'shapes'),join(DATA_PATH, 'pairs'),'shapes')
+            startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
+            cluster(join(DATA_PATH,'actions'),join(DATA_PATH, 'pairs'),'actions')

-        # elif job =='actionSI':
-        #     from pairs import actionPairs
-        #     actionPairs(rootType)
-        #
-        # # elif job =='importActionPairs':
-        #     from pairs import importAction
-        #     importAction(rootType)
-        #
-        # elif job =='compareActions':
-        #     # cmd = "JAVA_HOME='"+jdk8+"' java -Xmx8096m -Djava.util.concurrent.ForkJoinPool.common.parallelism=64 -jar "+  join(DATA_PATH,'CompareTrees.jar') + " action " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl1-gumInputALL.rdb"
-        #
-        #     cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L2'
-        #     output = shellCallTemplate(cmd)
-        #     logging.info(output)
-        #
-        # elif job == 'clusterActions':
-        #     from abstractPatch import cluster
-        #
-        #     dbDir = join(DATA_PATH, 'redis')
-        #     startDB(dbDir, "6399", PROJECT_TYPE)
-        #     cluster( join(DATA_PATH, 'actions'),join(DATA_PATH, 'pairsAction'),'actions',rootType)
-
-        elif job == 'tokenSI':
+        elif job =='tokenSI':
            from pairs import tokenPairs
            tokenPairs()
-            from pairs import importToken
-            importToken()

-        elif job == 'compareTokens':
-            # cmd = "JAVA_HOME='"+jdk8+"' java -jar "+  join(DATA_PATH,'CompareTrees.jar') + " token " + join(DATA_PATH,"redis") +" ALLdumps-gumInput.rdb " + "clusterl2-gumInputALL.rdb"
-            cmd = "JAVA_HOME='" + jdk8 + "' java -jar " + join(DATA_PATH, 'FixPatternMiner-1.0.1.jar') + " " + join(DATA_PATH, 'app.properties') + " COMPARE " + 'L3'
-            output = shellCallTemplate(cmd)
-            logging.info(output)
+            from pairs import importTokens
+            importTokens()

        elif job == 'clusterTokens':
            from abstractPatch import cluster

            dbDir = join(DATA_PATH, 'redis')
-            startDB(dbDir, "6399", PROJECT_TYPE)
-            startDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
-            cluster(join(DATA_PATH, 'tokens'), join(DATA_PATH, 'pairsToken'),'tokens')
-            stopDB(dbDir, "6380", "clusterl2-gumInputALL.rdb")
+            startDB(dbDir, REDIS_PORT, PROJECT_TYPE)
+            cluster( join(DATA_PATH, 'tokens'),join(DATA_PATH, 'pairsToken'),'tokens')

-        # elif job == 'additional':
-        #     from addNewData import core
-        #     core()
-        #     # from addNewData import checkWrongMembers
-        #     # checkWrongMembers()

        elif job == 'codeflaws':
            from otherDatasets import codeflaws
@@ -218,19 +175,10 @@ if __name__ == '__main__':
        elif job == 'defects4j':
            from stats import defects4jStats
            defects4jStats()
-        elif job == 'exportPatterns':
+        elif job == 'patterns':
            from stats import exportAbstractPatterns
            exportAbstractPatterns()
-        elif job =='export':
-            patternPath = join(DATA_PATH,'actions','ExpressionStatement','3','0','0')
-            patterns = listdir(patternPath)
-            for pattern in patterns:
-                repo = pattern.split('_')[0]
-                file = pattern.replace(repo+'_','')
-                print(file)
-                filename = file.rsplit('_',1)[0]
-                print(join(DATA_PATH,'gumInput',repo,'DiffEntries',filename))
-                break
+

        else:
            logging.error('Unknown job %s',job)
@@ -3,50 +3,20 @@ DATA_PATH = os.environ["DATA_PATH"]
 ROOT = os.environ["ROOT_DIR"]
 INNER_DATA_PATH = join(ROOT,'data')
 PROJECT_TYPE = os.environ["PROJECT_TYPE"]
-def importToken():
-    # global dbDir, portInner, redis_db, pairs, cluster, action, e, idx, v, key
-    dbDir = join(INNER_DATA_PATH, 'redis')
-    portInner = '6380'
-    dbDir = join(INNER_DATA_PATH, 'redis')
-    startDB(dbDir, portInner, "clusterl2-gumInputALL.rdb")
-    import redis
-    pairsToken = join(DATA_PATH, 'pairsToken')
-    redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
-    pairs = get_filepaths(pairsToken, '.txt')
-    for pair in pairs:
-        split = pair.split("/")
+REDIS_PORT = os.environ["REDIS_PORT"]

-        shapeName = split[-4]
-        shapeSize = split[-3]
-        cluster = split[-2]
-        action = split[-1].replace('.txt', '')

-        # cmd ="bash " + join(DATA_PATH,'redisSingleImport.sh') + " " +  pair + " 6380 " +  shapeName + "-"+sizeCluster+"-"+actionCluster ;#+, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
-        cmd = "bash " + join(INNER_DATA_PATH,
-                             'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + shapeSize + "-" + cluster + "-" + action;  # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
-
-        o, e = shellGitCheckout(cmd)
-        o
-        indexFile = pair.replace('.txt', '.index')
-        with open(indexFile, 'r') as iFile:
-            idx = iFile.readlines()
-        for i in idx:
-            k, v = i.split(',')
-            key = shapeName + "-" + shapeSize + "-" + cluster + "-" + action + "-" + k
-            # redis_db.set(key, v.strip())
-            redis_db.hset('filenames', key, v.strip())
-
-def importAction(rootType):
+def importTokens():
    # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, shapeSize, cluster, cmd, o, e, indexFile, iFile, idx, i, k, v, key
    dbDir = join(INNER_DATA_PATH, 'redis')
    # portInner = '6380'
    # startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
-    portInner = '6399'
+    portInner = REDIS_PORT
    startDB(dbDir, portInner, PROJECT_TYPE)

    import redis
    # import pairs
-    pairsAction = join(DATA_PATH, 'pairsAction',rootType)
+    pairsAction = join(DATA_PATH, 'pairsToken')
    redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
    pairs = get_filepaths(pairsAction, '.txt')
    for pair in pairs:
@@ -55,7 +25,7 @@ def importAction(rootType):
        shapeSize = split[-2]
        cluster = split[-1].replace('.txt', '')
        cmd = "bash " + join(INNER_DATA_PATH,
-                             'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + shapeSize + "-" + cluster;  # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);
+                             'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + shapeSize + "-" + cluster;  # +, portInner,f.getName()+"-"+pair.getName().split("\\.")[0]);

        o, e = shellGitCheckout(cmd)
        print(o)
@@ -67,49 +37,17 @@ def importAction(rootType):
            key = shapeName + "-" + shapeSize + "-" + cluster + "-" + k
            # redis_db.set(key, v.strip())
            redis_db.hset('filenames', key, v.strip())
+    redis_db.set("level", "l2")


-# def importShape():
-#     # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
-#     dbDir = join(DATA_PATH, 'redis')
-#     portInner = '6380'
-#     startDB(dbDir, portInner, "clusterl1-gumInputALL.rdb")
-#     import redis
-#     pairsShapes = join(DATA_PATH, 'pairs')
-#     redis_db = redis.StrictRedis(host="localhost", port=portInner, db=1)
-#     pairs = get_filepaths(pairsShapes, '.index')
-#     l = []
-#     for pair in pairs:
-#         # split = pair.split("/")
-#         # shapeName = split[-2]
-#         # sizeCluster = split[-1].replace('.txt', '')
-#         # cmd = "bash " + join(DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6380 " + shapeName + "-" + sizeCluster;
-#         #
-#         # o, e = shellGitCheckout(cmd)
-#         # print(o)
-#         # indexFile = pair.replace('.txt', '.index')
-#         with open(pair, 'r') as iFile:
-#             idx = iFile.readlines()
-#             idx = [i.split(',')[1] for i in idx]
-#             l.append(idx)
-#     l = list(itertools.chain.from_iterable(l))
-#     l = [i for i in l if not (i.startswith('commons-math') or i.startswith('commons-lang') or i.startswith(
-#         'closure-compiler') or i.startswith('joda-time') or i.startswith('mockito') or i.startswith('jfreechart'))]
-#     l
-#
-#         # for i in idx:
-#         #     k, v = i.split(',')
-#         #     key = shapeName + "-" + sizeCluster + "-" + k
-#         #     redis_db.set(key, v.strip())

-
-def importShape():
+def importAction():
    # global dbDir, portInner, redis_db, pairs, pair, split, shapeName, cmd, o, e, indexFile, iFile, idx, i, k, v, key
    dbDir = join(INNER_DATA_PATH, 'redis')
    # portInner = '6380'
    # startDB(dbDir, portInner, "clusterl0-gumInputALL.rdb")

-    portInner = '6399'
+    portInner = REDIS_PORT
    startDB(dbDir, portInner, PROJECT_TYPE)

    import redis
@@ -120,7 +58,7 @@ def importShape():
        split = pair.split("/")
        shapeName = split[-2]
        sizeCluster = split[-1].replace('.txt', '')
-        cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " 6399 " + shapeName + "-" + sizeCluster;
+        cmd = "bash " + join(INNER_DATA_PATH, 'redisSingleImport.sh') + " " + pair + " "+REDIS_PORT+" " + shapeName + "-" + sizeCluster;

        o, e = shellGitCheckout(cmd)
        print(o)
@@ -132,43 +70,45 @@ def importShape():
            key = shapeName + "-" + sizeCluster + "-" + k
            #redis_db.set(key, v.strip())
            redis_db.hset('filenames',key,v.strip())
+    redis_db.set("level","l1")

 def tokenPairs():
-    global shapes, shape, sizes, clusters, cluster, actions, action, idx, val, pairs
+    # global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
    shapes = listdir(join(DATA_PATH, 'actions'))
    shapes = [f for f in shapes if isdir(join(DATA_PATH, 'actions', f))]
+
+    if os.path.exists(join(DATA_PATH, 'pairsToken')):
+        import shutil
+        shutil.rmtree(join(DATA_PATH, 'pairsToken'))
+    # shapes = [rootType]
    for shape in shapes:
        sizes = listdir(join(DATA_PATH, 'actions', shape))
        sizes = [f for f in sizes if isdir(join(DATA_PATH, 'actions', shape, f))]
        for sf in sizes:
            if sf.startswith('.'):
                continue
-            if sf == '1':
-                continue
            clusters = listdir(join(DATA_PATH, 'actions', shape, sf))
            for cluster in clusters:
                if cluster.startswith('.'):
                    continue
-                actions = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
-                for action in actions:
-
-                    files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster, action))
+                files = listdir(join(DATA_PATH, 'actions', shape, sf, cluster))
+                if len(files) > 1:
                    indexCompared = []
-                    if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf, cluster)):
-                        os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf, cluster))
+                    if not os.path.exists(join(DATA_PATH, 'pairsToken', shape, sf)):
+                        os.makedirs(join(DATA_PATH, 'pairsToken', shape, sf))

-                    infexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.index')
-                    if isfile(infexFile):
-                        test = pd.read_csv(infexFile, header=None, index_col=0)
+                    indexFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.index')
+                    if isfile(indexFile):
+                        test = pd.read_csv(indexFile, header=None, index_col=0)
                        test.rename(columns={1: 'filename'}, inplace=True)

                        newFiles = [i for i in files if i not in test.filename.values.tolist()]
                        for newFile in newFiles:
                            test = test.append(pd.DataFrame(columns=['filename'], data=[newFile]), ignore_index=True)
                        indexCompared = test.index.values.tolist()
-                        test.to_csv(infexFile, header=None)
+                        test.to_csv(indexFile, header=None)
                    else:
-                        with open(infexFile, 'w') as out:
+                        with open(indexFile, 'w') as out:
                            # csv_out = csv.writer(out)

                            for idx, val in enumerate(files):
@@ -176,7 +116,8 @@ def tokenPairs():
                                indexCompared.append(str(idx))

                    pairs = list(itertools.combinations(indexCompared, 2))
-                    pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster, action + '.txt')
+
+                    pairsFile = join(DATA_PATH, 'pairsToken', shape, sf, cluster + '.txt')
                    if isfile(pairsFile):
                        test = pd.read_csv(pairsFile, header=None)
                        test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
@@ -193,69 +134,12 @@ def tokenPairs():
                                a, b = row
                                out.write(a + ',' + b + '\n')

-def actionPairs(rootType):
-    # global shapes, shape, sizes, sf, clusters, cluster, files, indexCompared, out, idx, val, pairs, row, a, b
-    # shapes = listdir(join(DATA_PATH, 'shapes'))
-    # shapes = [f for f in shapes if isdir(join(DATA_PATH, 'shapes', f))]
-    shapes = [rootType]
-    for shape in shapes:
-        sizes = listdir(join(DATA_PATH, 'shapes', shape))
-        sizes = [f for f in sizes if isdir(join(DATA_PATH, 'shapes', shape, f))]
-        for sf in sizes:
-            if sf.startswith('.'):
-                continue
-            clusters = listdir(join(DATA_PATH, 'shapes', shape, sf))
-            for cluster in clusters:
-                if cluster.startswith('.'):
-                    continue
-                files = listdir(join(DATA_PATH, 'shapes', shape, sf, cluster))
-                indexCompared = []
-                if not os.path.exists(join(DATA_PATH, 'pairsAction', shape, sf)):
-                    os.makedirs(join(DATA_PATH, 'pairsAction', shape, sf))
-
-                indexFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.index')
-                if isfile(indexFile):
-                    test = pd.read_csv(indexFile, header=None, index_col=0)
-                    test.rename(columns={1: 'filename'}, inplace=True)
-
-                    newFiles = [i for i in files if i not in test.filename.values.tolist()]
-                    for newFile in newFiles:
-                        test = test.append(pd.DataFrame(columns=['filename'], data=[newFile]), ignore_index=True)
-                    indexCompared = test.index.values.tolist()
-                    test.to_csv(indexFile, header=None)
-                else:
-                    with open(indexFile, 'w') as out:
-                        # csv_out = csv.writer(out)
-
-                        for idx, val in enumerate(files):
-                            out.write(str(idx) + ',' + val + '\n')
-                            indexCompared.append(str(idx))
-
-                pairs = list(itertools.combinations(indexCompared, 2))
-
-                pairsFile = join(DATA_PATH, 'pairsAction', shape, sf, cluster + '.txt')
-                if isfile(pairsFile):
-                    test = pd.read_csv(pairsFile, header=None)
-                    test['pairs'] = test.apply(lambda x: tuple([x[0], x[1]]), axis=1)
-                    newPairs = [i for i in pairs if i not in test['pairs'].values.tolist()]
-                    with open(pairsFile, 'w') as out:
-                        # csv_out = csv.writer(out)
-                        for row in newPairs:
-                            a, b = row
-                            out.write(a + ',' + b + '\n')
-                else:
-                    with open(pairsFile, 'w') as out:
-                        # csv_out = csv.writer(out)
-                        for row in pairs:
-                            a, b = row
-                            out.write(a + ',' + b + '\n')
-
-def shapePairs():
+def actionPairs():
    # global dbDir, portInner, redis_db, keys, matches, roots, sizes, sf, files, indexCompared, out, idx, val, pairs, row, a, b
    # if not (isfile(join(DATA_PATH, 'studyDataset.pickle'))):
    dbDir = join(INNER_DATA_PATH, 'redis')

-    portInner = '6399'
+    portInner = REDIS_PORT
    startDB(dbDir, portInner, PROJECT_TYPE)

    import redis
@@ -1,12 +1,13 @@
 from common.commons import *
 DATA_PATH = os.environ["DATA_PATH"]
 PROJECT_TYPE = os.environ["PROJECT_TYPE"]
+REDIS_PORT = os.environ["REDIS_PORT"]

 def statsNormal(isFixminer=True):
    # tokens = join(DATA_PATH, 'tokens')
    # actions = join(DATA_PATH, 'actions')
    import redis
-    redis_db = redis.StrictRedis(host="localhost", port=6399, db=0)
+    redis_db = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)
    # keys = redis_db.scan(0, match='*', count='1000000')
    keys = redis_db.hkeys("dump")  # hkeys "dump"
    matches = pd.DataFrame(keys, columns=['pairs_key'])
@@ -95,7 +96,7 @@ def statsNormal(isFixminer=True):


    # for type in ['tokens', 'actions', 'shapes']:
-    for type in ['shapes']:
+    for type in ['actions']:
        statsS,clusterDF = stats(type,isFixminer)
        if isFixminer:
            clusterDF = clusterDF[clusterDF.members.str.len() > 1]
@@ -136,25 +137,7 @@ def statsNormal(isFixminer=True):
        matches
        if isFixminer:
            matches.to_csv(join(DATA_PATH, "stats" + type + ".csv"), index=False)
-            if type == 'actions':
-                clusterDF['ms'] = clusterDF.members.str.len()
-                clusterDF.sort_values(by='ms', ascending=False, inplace=True)
-                top50 = clusterDF.head(50)
-                top50['member'] = top50.members.apply(lambda x: x[0])
-                top50['cid'] = top50.cid.apply(lambda x: x[0])
-                top50['path'] = top50.apply(lambda x:x['cid'].replace('-','/')+'/'+x['member'],axis=1)
-                def readFile(x):
-                    with open(join(DATA_PATH,'actions',x), 'r', encoding='utf-8') as writeFile:
-                        lines = writeFile.read()
-                        return lines
-                        # if lines.startswith('UPD'):
-                        #     return lines
-                        # else:
-                        #     return ''
-                    # return lines

-                top50['pattern'] = top50.path.apply(lambda x:readFile(x))
-                top50[['cid','pattern']].to_csv('actionsPattern2verify.csv',index=False,header=None)



@@ -253,7 +236,7 @@ def stats(type,isFixminer=True):
                    continue
                cs = listdir(join(shapesPath, shape, size, cluster))

-                if shapesPath.endswith('shapes'):
+                if shapesPath.endswith('actions'):
                    cs = listdir(join(shapesPath, shape, size, cluster))
                    statsCore(cs)
                else:
@@ -262,14 +245,14 @@ def stats(type,isFixminer=True):
                        if action.startswith('.'):
                            continue
                        tokens = listdir(join(shapesPath, shape, size, cluster, action))
-                        if shapesPath.endswith('actions'):
+                        if shapesPath.endswith('tokens'):
                            statsCore(tokens)
-                        else:
-                            for token in tokens:
-                                if token.startswith('.'):
-                                    continue
-                                cs = listdir(join(shapesPath, shape, size, cluster, action, token))
-                                statsCore(cs)
+                        # else:
+                        #     for token in tokens:
+                        #         if token.startswith('.'):
+                        #             continue
+                        #         cs = listdir(join(shapesPath, shape, size, cluster, action, token))
+                        #         statsCore(cs)
    return statsS,clustersDF


@@ -282,7 +265,7 @@ def defects4jStats(isFixminer=False):
        mapping.rename(columns={0: 'repo', 1: "commit", 2: 'defects4jID'}, inplace=True)
        dbDir = join(DATA_PATH, 'redis')

-        portInner = '6399'
+        portInner = REDIS_PORT
        startDB(dbDir, portInner, PROJECT_TYPE )

        import redis
@@ -474,8 +457,8 @@ cAst = ["unit","comment","literal","operator","modifier","name","type","conditio


 def exportAbstractPatterns():
-    clusterStats,df = stats('shapes')
-    port = 6399
+    clusterStats,df = stats('actions')
+    port = REDIS_PORT
    import redis
    redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
    isJava = False
@@ -1,5 +1,5 @@
 from common.commons import *
-
+REDIS_PORT = os.environ["REDIS_PORT"]
 DATA_PATH = os.environ["DATA_PATH"]
 ast = ["AnonymousClassDeclaration", "ArrayAccess", "ArrayCreation", "ArrayInitializer", "ArrayType", "AssertStatement",
       "Assignment", "Block", "BooleanLiteral", "BreakStatement", "CastExpression", "CatchClause", "CharacterLiteral",
@@ -31,7 +31,7 @@ redis_db = redis.StrictRedis(host="localhost", port=port, db=0)
 redis_db1 = redis.StrictRedis(host="localhost", port=port, db=1)
 redis_db2 = redis.StrictRedis(host="localhost", port=port, db=2)

-redis_out  = redis.StrictRedis(host="localhost", port=6399, db=0)
+redis_out  = redis.StrictRedis(host="localhost", port=REDIS_PORT, db=0)

 def getTokens(prefix, i):
    dist2load = redis_db1.get(prefix + "-" + i);