merge python scripts
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
from common.commons import *
|
||||
DATA_PATH = os.environ["DATA_PATH"]
|
||||
|
||||
def core():
|
||||
clusterPath = join(DATA_PATH, 'shapes')
|
||||
roots = listdir(clusterPath)
|
||||
roots = [i for i in roots if not (i.startswith('.') or i.endswith('.pickle'))]
|
||||
pattern = {}
|
||||
for root in roots:
|
||||
root
|
||||
sizes = listdir(join(clusterPath, root))
|
||||
for size in sizes:
|
||||
# actions = listdir(join(clusterPath,root,size))
|
||||
# for action in actions:
|
||||
clusters = listdir(join(clusterPath, root, size))
|
||||
for cluster in clusters:
|
||||
members = listdir(join(clusterPath, root, size, cluster))
|
||||
|
||||
# pattern[root+'/'+size+'/'+cluster]= root +'/' +size +'/'+ members[0]
|
||||
pattern[root+'/'+size+'/'+cluster]= members[0]
|
||||
pattern
|
||||
|
||||
from pairs import shapePairs
|
||||
matches = shapePairs()
|
||||
# 'FFmpeg','curl','nginx','openssl','redis','tmux','vlc']
|
||||
matches = matches[matches.file.apply(lambda x: x in list(pattern.values()) or not ( x.startswith('linux_') or x.startswith('FFmpeg_') or x.startswith('curl_') or x.startswith('nginx_') or x.startswith('openssl_') or x.startswith('redis_') or x.startswith('tmux_') or x.startswith('vlc_')))]
|
||||
from pairs import createPairs
|
||||
createPairs(matches)
|
||||
# # # elif job == 'importShapesPairs':
|
||||
from pairs import importShape
|
||||
importShape()
|
||||
|
||||
def checkWrongMembers():
|
||||
clusterPath = join(DATA_PATH, 'shapes')
|
||||
roots = listdir(clusterPath)
|
||||
roots = [i for i in roots if not (i.startswith('.') or i.endswith('.pickle'))]
|
||||
pattern = {}
|
||||
for root in roots:
|
||||
root
|
||||
sizes = listdir(join(clusterPath, root))
|
||||
for size in sizes:
|
||||
# actions = listdir(join(clusterPath,root,size))
|
||||
# for action in actions:
|
||||
clusters = listdir(join(clusterPath, root, size))
|
||||
for cluster in clusters:
|
||||
members = listdir(join(clusterPath, root, size, cluster))
|
||||
sizeDict = {}
|
||||
for s in [(i,os.path.getsize(join(clusterPath, root, size, cluster,i))) for i in members]:
|
||||
sizeDict[s[1]] = s[0]
|
||||
sizeDict
|
||||
if len(sizeDict) > 1:
|
||||
print(join(clusterPath, root, size, cluster))
|
||||
print(sizeDict.values())
|
||||
|
||||
def cluster():
|
||||
clusterPath = join(DATA_PATH, 'shapes')
|
||||
roots = listdir(clusterPath)
|
||||
roots = [i for i in roots if not (i.startswith('.') or i.endswith('.pickle'))]
|
||||
pattern = {}
|
||||
for root in roots:
|
||||
root
|
||||
sizes = listdir(join(clusterPath, root))
|
||||
for size in sizes:
|
||||
# actions = listdir(join(clusterPath,root,size))
|
||||
# for action in actions:
|
||||
clusters = listdir(join(clusterPath, root, size))
|
||||
for cluster in clusters:
|
||||
members = listdir(join(clusterPath, root, size, cluster))
|
||||
|
||||
# pattern[root+'/'+size+'/'+cluster]= root +'/' +size +'/'+ members[0]
|
||||
pattern[root+'/'+size+'/'+cluster]= members[0]
|
||||
pattern
|
||||
|
||||
pairsPath = join(DATA_PATH, 'pairs')
|
||||
from abstractPatch import loadPairMulti
|
||||
for root in roots:
|
||||
matches =loadPairMulti(root,'','shapes')
|
||||
matches
|
||||
sizes = matches['sizes'].unique().tolist()
|
||||
for s in sizes:
|
||||
match = matches[matches['sizes'] == s]
|
||||
match
|
||||
clusterCore(pattern,clusterPath, 'shapes', match, pairsPath, root, s, '')
|
||||
|
||||
def clusterCore(pattern,clusterPath, level, match, pairsPath, root, s,action ,token=''):
|
||||
col_combi = match.tuples.values.tolist()
|
||||
import networkx
|
||||
g = networkx.Graph(col_combi)
|
||||
cluster = []
|
||||
for subgraph in networkx.connected_component_subgraphs(g):
|
||||
logging.info('Cluster size %d',len(subgraph.nodes()))
|
||||
cluster.append(subgraph.nodes())
|
||||
cluster
|
||||
pathMapping = dict()
|
||||
if level == 'actions':
|
||||
indexFile = join(pairsPath, root, s,action+'.index')
|
||||
elif level == 'shapes':
|
||||
indexFile = join(pairsPath, root, s + '.index')
|
||||
else:
|
||||
indexFile =join(pairsPath, root, s,action,token+'.index')
|
||||
df = pd.read_csv(indexFile, header=None, usecols=[0, 1], index_col=[0])
|
||||
pathMapping = df.to_dict()
|
||||
|
||||
workList = []
|
||||
|
||||
exportCLusters ={}
|
||||
if not os.path.exists(join(clusterPath, root, s)):
|
||||
print()
|
||||
existingClusters = 0
|
||||
else:
|
||||
existingClusters = len(listdir(join(clusterPath, root, s)))
|
||||
|
||||
for clus in cluster:
|
||||
members = [pathMapping[1][int(i)] for i in clus]
|
||||
members
|
||||
potentialClusters = [(key, value) for key, value in pattern.items() if key.startswith(root + '/' + s)]
|
||||
potentialClusters
|
||||
foundExisting = False
|
||||
for pc,pcMember in potentialClusters:
|
||||
if pcMember in members:
|
||||
pc
|
||||
foundExisting = True
|
||||
exportCLusters[pc.split('/')[-1]] = members
|
||||
if not foundExisting:
|
||||
exportCLusters[existingClusters] = members
|
||||
existingClusters= existingClusters+1
|
||||
exportCLusters
|
||||
for k,v in exportCLusters.items():
|
||||
for f in v:
|
||||
t = f, root, level, clusterPath, s, action, token, k
|
||||
workList.append(t)
|
||||
# for idx, clus in enumerate(cluster):
|
||||
# logging.info('exporting cluster %s %s %s %d', root,s,action,idx)
|
||||
# for f in clus:
|
||||
# dumpFile = pathMapping[1][int(f)]
|
||||
#
|
||||
# t = dumpFile,root,level,clusterPath,s,action,token,idx
|
||||
# workList.append(t)
|
||||
from abstractPatch import dumpFilesCore
|
||||
parallelRun(dumpFilesCore,workList)
|
||||
# for wl in workList:
|
||||
# dumpFilesCore(wl)
|
||||
Reference in New Issue
Block a user