-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCreate_BayesPrism_Level_2_annotations
98 lines (88 loc) · 5.21 KB
/
Create_BayesPrism_Level_2_annotations
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
setwd("/gpfs3/well/immune-rep/users/wph959/cibersort/")
library(Seurat)
library(BayesPrism)
#input 1 - bulk RNA seq (rows as samples, columns as genes)
x <- read.delim("/gpfs3/well/immune-rep/users/wph959/pan-cancer/PDAC_raw_unique_GEX.txt", header=T)
rownames(x) <- x$gene
x$gene <- NULL
x <- t(x)
merged = readRDS("/gpfs3/well/immune-rep/users/wph959/cibersort/merged_all_cells_PDAC150K_PENGSTEELE.rds")
[email protected]$Level_2 <- NA
#[email protected]$Level_2[!([email protected]$Level_2 %in% c("B cell naive", "B cell ASC"))] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell activated pre-memory", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell GC", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell LZ GC", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell memory", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell MZ", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell GZMB", merged$refined_annotations)] <- "B cell antigen experienced"
[email protected]$Level_2[grep("B cell naive", merged$refined_annotations)] <- "B cell naive"
[email protected]$Level_2[grep("B cell plasma", merged$refined_annotations)] <- "B cell ASC"
[email protected]$Level_2[grep("DC CD207", merged$refined_annotations)] <- "DC DC2"
[email protected]$Level_2[grep("DC cDC", merged$refined_annotations)] <- "DC DC2"
[email protected]$Level_2[grep("DC CLEC9A", merged$refined_annotations)] <- "DC DC1"
[email protected]$Level_2[grep("DC DC2", merged$refined_annotations)] <- "DC DC2"
[email protected]$Level_2[grep("DC pDC", merged$refined_annotations)] <- "DC pDC"
[email protected]$Level_2[grep("DC moDC", merged$refined_annotations)] <- "DC moDC"
[email protected]$Level_2[grep("DC mreg", merged$refined_annotations)] <- "DC mregDC"
[email protected]$Level_2[grep("monocyte", merged$refined_annotations)] <- "Monocytes"
[email protected]$Level_2[grep("momac", merged$refined_annotations)] <- "Momac"
[email protected]$Level_2[grep("granulocyte", merged$refined_annotations)] <- "granulocyte"
[email protected]$Level_2[grep("Mast", merged$refined_annotations)] <- "Mast"
[email protected]$Level_2[grep("megakaryocyte", merged$refined_annotations)] <- "megakaryocyte"
[email protected]$Level_2[grep("CD4", merged$refined_annotations)] <- "CD4"
[email protected]$Level_2[grep("CD8", merged$refined_annotations)] <- "CD8"
[email protected]$Level_2[grep("gdT", merged$refined_annotations)] <- "gdT"
[email protected]$Level_2[grep("MAIT", merged$refined_annotations)] <- "MAIT"
[email protected]$Level_2[grep("NK", merged$refined_annotations)] <- "NK-like cell"
[email protected]$Level_2[grep("ILC", merged$refined_annotations)] <- "ILC"
[email protected]$Level_2[grep("Acinar", merged$cell_type_refined)] <- "Acinar"
[email protected]$Level_2[grep("Centroacinar", merged$cell_type_refined)] <- "Centroacinar"
[email protected]$Level_2[grep("Ductal", merged$cell_type_refined)] <- "Epithelial"
[email protected]$Level_2[grep("Endocrine", merged$cell_type_refined)] <- "Endocrine"
[email protected]$Level_2[grep("Fibroblast", merged$cell_type_refined)] <- "Fibroblast"
[email protected]$Level_2[grep("Endothelial", merged$cell_type_refined)] <- "Endothelial cell"
[email protected]$Level_2[grep("Stellate", merged$cell_type_refined)] <- "Stellate cell"
#[email protected]$Level_2[!([email protected]$Level_2 %in% c("DC moDC", "DC pDC"))] <- "DC"
table([email protected]$Level_2)
set.seed(120)
n <- 50000
total_cells <- ncol(merged@assays$RNA@counts)
include <- sample(1:total_cells, n)
include <- include[!is.na([email protected]$Level_2[include])]
while (length(include) < n) {
additional_cells <- sample(setdiff(1:total_cells, include), n - length(include))
include <- c(include, additional_cells[!is.na([email protected]$Level_2[additional_cells])])
}
new <- merged@assays$RNA@counts[, include]
metadata <- [email protected][include, ]
metadata$state <- paste(metadata$orig.ident, sep="_", metadata$Level_2)
cell.state.labels<- metadata$state
state_counts <- table(metadata$state)
valid_values <- names(state_counts[state_counts >= 50])
metadata <- metadata[metadata$state %in% valid_values, ]
cell.state.labels<- metadata$state
cell.type.labels <- metadata$Level_2
new <- new[,colnames(new) %in% rownames(metadata)]
sc.dat <- as.data.frame(new)
sc.dat <- t(sc.dat)
sc.dat <- as.data.frame(sc.dat)
sc.dat.filtered <- cleanup.genes (input=sc.dat,
input.type="count.matrix",
species="hs",
gene.group=c( "Rb","Mrp","other_Rb","chrM","MALAT1","chrX","chrY") ,
exp.cells=5)
myPrism <- new.prism(
reference=sc.dat.filtered,
mixture=x,
input.type="count.matrix",
cell.type.labels = cell.type.labels,
cell.state.labels = cell.state.labels,
key=NULL,
outlier.cut=0.01,
outlier.fraction=0.1,
)
bp.res <- run.prism(prism = myPrism, n.cores=10)
save(bp.res, file="/gpfs3/well/immune-rep/users/wph959/cibersort/all_cells_Level_2.rdata")
theta <- get.fraction (bp=bp.res,
which.theta="final",
state.or.type="type")