-
Notifications
You must be signed in to change notification settings - Fork 1
/
custom_spectral_biclustering.py
66 lines (45 loc) · 1.99 KB
/
custom_spectral_biclustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from scipy.sparse import csgraph
import numpy as np
import scipy as sp
import math
from scipy.sparse.linalg import svds
from scipy.sparse import dia_matrix
import custom_iterative_k_means as ck
def rearrange(labels, A, k):
#matrix of clusters
clusters = [[] for i in range(k)]
#feed result with A rows to build clusters
for i in range(len(A)):
clusters[labels[i]].append(A[i])
#merge clusters to create a matrix
A_rearranged = [item for sublist in clusters for item in sublist]
return A_rearranged
#inspired by sklearn library https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/cluster/bicluster.py
def custom_spectral_biclustering(A, k, k_means_iterations):
n_rows = len(A)
n_cols = len(A[0])
row_diag = np.asarray(1.0 / np.sqrt(A.sum(axis=1))).squeeze()
col_diag = np.asarray(1.0 / np.sqrt(A.sum(axis=0))).squeeze()
#check if there is no nan value because of the division of the previoius operation
row_diag = np.where(np.isnan(row_diag), 0, row_diag)
col_diag = np.where(np.isnan(col_diag), 0, col_diag)
# #Compute diagonal degree matrix
D1 = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows))
D2 = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols))
A_norm = D1 * A * D2
num_of_pcs = 1 + int(np.ceil(np.log2(k)))
#Compute SVD of A_norm
u, s, v = svds(A_norm, k=num_of_pcs)
z = np.vstack((row_diag[:, np.newaxis] * u[:, 1:], col_diag[:, np.newaxis] * v.T[:, 1:]))
#apply k means
labels = ck.custom_iterative_kmeans(z, k, k_means_iterations)
row_labels_ = labels[:n_rows]
column_labels_ = labels[n_rows:]
#compute biclusters to calculate consensus score
biclusters_rows = np.vstack(row_labels_ == c for c in range(k))
biclusters_cols = np.vstack(column_labels_ == c for c in range(k))
#rearrange rows and cols or data matrix according to the clusters
A_rearranged = rearrange(row_labels_, A, k)
A_rearranged = np.transpose(A_rearranged)
A_rearranged = rearrange(column_labels_, A_rearranged, k)
return np.transpose(A_rearranged), biclusters_rows, biclusters_cols