|
-
- import numpy as np
- from scipy.sparse.linalg import svds
- from sklearn import cluster
- from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
- from sklearn.preprocessing import normalize
- from munkres import Munkres
-
-
-
- def best_map(L1,L2):
- #L1 should be the groundtruth labels and L2 should be the clustering labels we got
- Label1 = np.unique(L1)
- nClass1 = len(Label1)
- Label2 = np.unique(L2)
-
- nClass2 = len(Label2)
- nClass = np.maximum(nClass1,nClass2)
- G = np.zeros((nClass,nClass))
- for i in range(nClass1):
- ind_cla1 = L1 == Label1[i]
- ind_cla1 = ind_cla1.astype(float)
- for j in range(nClass2):
- ind_cla2 = L2 == Label2[j]
- ind_cla2 = ind_cla2.astype(float)
- G[i,j] = np.sum(ind_cla2 * ind_cla1)
- m = Munkres()
- index = m.compute(-G.T)
- index = np.array(index)
- c = index[:,1]
- newL2 = np.zeros(L2.shape)
- for i in range(nClass2):
- newL2[L2 == Label2[i]] = Label1[c[i]]
- return newL2
-
- def thrC(C,ro):
- if ro < 1:
- N = C.shape[1]
- Cp = np.zeros((N,N))
- S = np.abs(np.sort(-np.abs(C),axis=0))
- Ind = np.argsort(-np.abs(C),axis=0)
- for i in range(N):
- cL1 = np.sum(S[:,i]).astype(float)
- stop = False
- csum = 0
- t = 0
- while(stop == False):
- csum = csum + S[t,i]
- if csum > ro*cL1:
- stop = True
- Cp[Ind[0:t+1,i],i] = C[Ind[0:t+1,i],i]
- t = t + 1
- else:
- Cp = C
-
- return Cp
-
- def build_aff(C):
- N = C.shape[0]
- Cabs = np.abs(C)
- ind = np.argsort(-Cabs,0)
- for i in range(N):
- Cabs[:,i]= Cabs[:,i] / (Cabs[ind[0,i],i] + 1e-6)
- Cksym = Cabs + Cabs.T;
- return Cksym
-
- def post_proC(C, K, d=6, alpha=8):
- # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
- C = 0.5*(C + C.T)
- r = d*K + 1
- U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
- U = U[:,::-1]
- S = np.sqrt(S[::-1])
- S = np.diag(S)
- U = U.dot(S)
- U = normalize(U, norm='l2', axis = 1)
- Z = U.dot(U.T)
- Z = Z * (Z>0)
- L = np.abs(Z ** alpha)
- L = L/L.max()
- L = 0.5 * (L + L.T)
- spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
- spectral.fit(L)
- grp = spectral.fit_predict(L) + 1
- return grp, L
-
- def err_rate(gt_s, s):
- c_x = best_map(gt_s,s)
- err_x = np.sum(gt_s[:] != c_x[:])
- nmi = normalized_mutual_info_score(gt_s[:], c_x[:])
- ari = adjusted_rand_score(gt_s[:], c_x[:])
- missrate = err_x.astype(float) / (gt_s.shape[0])
- return missrate, nmi, ari
-
- def build_laplacian(C):
- C = 0.5 * (np.abs(C) + np.abs(C.T))
- W = np.sum(C,axis=0)
- W = np.diag(1.0/W)
- L = W.dot(C)
- return L
|