-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocess.py
executable file
·143 lines (115 loc) · 4.72 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import torch
import random
import numpy as np
import scanpy as sc
import scipy.sparse as sp
from torch.backends import cudnn
from scipy.sparse.csc import csc_matrix
from scipy.sparse.csr import csr_matrix
from sklearn.neighbors import NearestNeighbors
def permutation(feature):
# fix_seed(FLAGS.random_seed)
ids = np.arange(feature.shape[0])
ids = np.random.permutation(ids)
feature_permutated = feature[ids]
return feature_permutated
def construct_interaction(adata, n_neighbors=3):
import ot
"""Constructing spot-to-spot interactive graph"""
position = adata.obsm['spatial']
# calculate distance matrix
distance_matrix = ot.dist(position, position, metric='euclidean')
n_spot = distance_matrix.shape[0]
adata.obsm['distance_matrix'] = distance_matrix
# find k-nearest neighbors
interaction = np.zeros([n_spot, n_spot])
for i in range(n_spot):
vec = distance_matrix[i, :]
distance = vec.argsort()
for t in range(1, n_neighbors + 1):
y = distance[t]
interaction[i, y] = 1
adata.obsm['graph_neigh'] = interaction
#transform adj to symmetrical adj
adj = interaction
adj = adj + adj.T
adj = np.where(adj>1, 1, adj)
adata.obsm['adj'] = adj
def construct_interaction_KNN(adata, n_neighbors=3):
position = adata.obsm['spatial']
n_spot = position.shape[0]
nbrs = NearestNeighbors(n_neighbors=n_neighbors+1).fit(position)
_ , indices = nbrs.kneighbors(position)
x = indices[:, 0].repeat(n_neighbors)
y = indices[:, 1:].flatten()
interaction = np.zeros([n_spot, n_spot])
interaction[x, y] = 1
adata.obsm['graph_neigh'] = interaction
#transform adj to symmetrical adj
adj = interaction
adj = adj + adj.T
adj = np.where(adj>1, 1, adj)
adata.obsm['adj'] = adj
print('Graph constructed!')
def preprocess(adata, min_cells=None, n_top_genes=3000, max_value=10, dataset=None):
print("preprocessing " + dataset)
if min_cells is not None:
sc.pp.filter_genes(adata, min_cells=min_cells)
print(f'filter mincells={min_cells}, shape {adata.shape}')
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=n_top_genes)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.scale(adata, zero_center=False, max_value=max_value)
return adata
def get_feature(adata, deconvolution=False):
if deconvolution:
adata_Vars = adata
else:
adata_Vars = adata[:, adata.var['highly_variable']]
if isinstance(adata_Vars.X, csc_matrix) or isinstance(adata_Vars.X, csr_matrix):
feat = adata_Vars.X.toarray()[:, ]
else:
feat = adata_Vars.X[:, ]
# data augmentation
feat_a = permutation(feat)
adata.obsm['feat'] = feat
adata.obsm['feat_a'] = feat_a
def normalize_adj(adj):
"""Symmetrically normalize adjacency matrix."""
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
adj = adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt)
return adj.toarray()
def preprocess_adj(adj):
"""Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
adj_normalized = normalize_adj(adj)+np.eye(adj.shape[0])
return adj_normalized
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def preprocess_adj_sparse(adj):
adj = sp.coo_matrix(adj)
adj_ = adj + sp.eye(adj.shape[0])
rowsum = np.array(adj_.sum(1))
degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
return sparse_mx_to_torch_sparse_tensor(adj_normalized)
def fix_seed(seed):
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
cudnn.benchmark = False
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'