-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNormalization.R
170 lines (132 loc) · 7.5 KB
/
Normalization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
##################################################################################
###### Main Script ######
##################################################################################
################### Load all required libraries ########################
# Attempt to install and load the 'vegan' package
tryCatch({
# webr::shim_install()
# webr::install("vegan")
library("vegan")
}, error = function(e) {
cat("An error occurred:", e$message, "\n")
})
################### Read all required input files ####################
# Load the tab-delimited file containing the values to be be checked (rownames in the first column)
otu_table <- read.table(file_name, check.names = FALSE, header = TRUE, dec = ".", sep = "\t", row.names = 1, comment.char = "")
if (dim(otu_table)[1] == 0) {
stop("OTU table is empty or not properly formatted.")
} else {
print(sprintf("OTU table loaded with %d rows and %d columns.", nrow(otu_table), ncol(otu_table)))
}
# Making sure tha taxonomy column gets lower-case
col_names <- colnames(otu_table)
tax_ind <- which(sapply(tolower(col_names),
function(x) "taxonomy" %in% x, USE.NAMES = FALSE))
if (length(tax_ind) != 0) {
col_names[tax_ind] <- tolower(col_names[tax_ind])
colnames(otu_table) <- col_names
}
rm(col_names)
rm(tax_ind)
# Clean table from empty lines
otu_table <- otu_table[!apply(is.na(otu_table) | otu_table=="",1,all),]
#################### Normalize OTU Table ###################
# Save taxonomy information in vector
taxonomy <- as.vector(otu_table$taxonomy)
# Delete column with taxonomy information in dataframe
otu_table$taxonomy <- NULL
if (level == 0) {
# Calculate the minimum sum of all columns/samples
min_sum <- min(colSums(otu_table))
} else {
# The minimum size is set to a fixed reference level
min_sum <- normCutoff
}
if (method == 0) {
# Divide each value by the sum of the sample and multiply by the minimal sample sum
norm_otu_table <- t(min_sum * t(otu_table) / colSums(otu_table))
} else {
# Rarefy the OTU table to an equal sequencing depth
norm_otu_table <- Rarefy(t(otu_table),depth = min_sum)
norm_otu_table <- t(as.data.frame(norm_otu_table$otu.tab.rff))
}
# Calculate relative abundances for all OTUs over all samples
# Divide each value by the sum of the sample and multiply by 100
rel_otu_table <- t(100 * t(otu_table) / colSums(otu_table))
# Re-insert the taxonomy information in normalized counts table
norm_otu_table_tax <- cbind(norm_otu_table,taxonomy)
# Re-insert the taxonomy information in relative abundance table
rel_otu_table_tax <- cbind(rel_otu_table,taxonomy)
# Debugging: Check the contents of the tables
print(head(norm_otu_table))
print(head(rel_otu_table))
print("Summaries of processed tables:")
print(summary(norm_otu_table))
print(summary(rel_otu_table))
################################################################################
# Generate a two-sided pdf with a rarefaction curve for all samples and a curve
pdf(file = "RarefactionCurve.pdf")
# Plot the rarefaction curve for all samples
rarefactionCurve <- rarecurve(data.frame(t(otu_table)),
step = 20,
col = "black",
lty = "solid",
label = F,
xlab = "Number of Reads",
ylab = "Number of Species",
main = "Rarefaction Curves of All Samples")
# Generate empty vectors for the analysis of the rarefaction curve
slope = vector()
SampleID = vector()
angle <- c()
# Iterate through all samples
for (i in seq_along(rarefactionCurve)) {
# If the sequencing depth is greater than 100, the slope of the line that passes between the last and last-100 count is calculated
richness <- ifelse(length(rarefactionCurve[[i]]) > 6,
(rarefactionCurve[[i]][length(rarefactionCurve[[i]])] - rarefactionCurve[[i]][length(rarefactionCurve[[i]])-5])/(attr(rarefactionCurve[[i]], "Subsample")[length(rarefactionCurve[[i]])]-attr(rarefactionCurve[[i]], "Subsample")[length(rarefactionCurve[[i]])-5]) , 1000)
angle[i] <- ifelse(richness!=1000, atan(richness)*180/pi, NA)
slope <- c(slope,richness)
SampleID <- c(SampleID,as.character(names(otu_table)[i]))
}
# Generate the output table for rarefaction curve
curvedf <- cbind(SampleID, slope, angle)
ordered_vector <- order(as.numeric(curvedf[,2]), decreasing = TRUE)
curvedf <- curvedf[order(as.numeric(curvedf[,2]), decreasing = TRUE),]
# Generates a graph with all samples
# Underestimated cases are shown in red
for (i in 1:labelCutoff) {
N <- attr(rarefactionCurve[[ordered_vector[i]]], "Subsample")
lines(N, rarefactionCurve[[ordered_vector[i]]],col="red")
}
# Determine the plotting width and height
Nmax <- sapply(rarefactionCurve[ordered_vector[1:labelCutoff]], function(x) max(attr(x, "Subsample")))
Smax <- sapply(rarefactionCurve[ordered_vector[1:labelCutoff]], max)
# Creates an empty plot for rarefaction curves of underestimated cases
plot(c(1, max(Nmax)), c(1, max(Smax)), xlab = "Number of Reads",
ylab = "Number of Species", type = "n", main=paste(labelCutoff,"- most undersampled cases"))
for (i in 1:labelCutoff) {
N <- attr(rarefactionCurve[[ordered_vector[i]]], "Subsample")
lines(N, rarefactionCurve[[ordered_vector[i]]],col="red")
text(max(attr(rarefactionCurve[[ordered_vector[i]]],"Subsample")), max(rarefactionCurve[[ordered_vector[i]]]), curvedf[i,1],cex=0.6)
}
dev.off()
#################################################################################
###### Write Output Files ######
#################################################################################
# Write the normalized table in a file and copy in directories alpha-diversity and beta-diversity if existing
write.table(norm_otu_table, "OTUs_Table-norm.tab", sep = "\t",col.names = NA, quote = FALSE)
suppressWarnings (try(write.table(norm_otu_table, "../2.Alpha-Diversity/OTUs_Table-norm.tab", sep = "\t",col.names = NA, quote = FALSE), silent =TRUE))
suppressWarnings (try(write.table(norm_otu_table, "../3.Beta-Diversity/OTUs_Table-norm.tab", sep = "\t",col.names = NA, quote = FALSE), silent =TRUE))
# Write the normalized table with taxonomy in a file
write.table(norm_otu_table_tax, "OTUs_Table-norm-tax.tab", sep = "\t",col.names = NA, quote = FALSE)
# Write the normalized relative abundance table in a file and copy in directory Serial-Group-Comparisons if existing
write.table(rel_otu_table, "OTUs_Table-norm-rel.tab", sep = "\t",col.names = NA, quote = FALSE)
suppressWarnings (try(write.table(rel_otu_table, "../5.Serial-Group-Comparisons/OTUs_Table-norm-rel.tab", sep = "\t",col.names = NA, quote = FALSE), silent =TRUE))
# Write the normalized relative abundance with taxonomy table in a file and copy in directory Taxonomic-Binning if existing
write.table(rel_otu_table_tax, "OTUs_Table-norm-rel-tax.tab", sep ="\t",col.names = NA, quote = FALSE)
suppressWarnings (try(write.table(rel_otu_table_tax, "../4.Taxonomic-Binning/OTUs_Table-norm-rel-tax.tab", sep ="\t",col.names = NA, quote = FALSE), silent =TRUE))
# Write the rarefaction table
write.table(curvedf, "RarefactionCurve.tab", sep ="\t", quote = FALSE, row.names = FALSE)
#################################################################################
###### End of Script ######
#################################################################################