Skip to content

Commit

Permalink
Resample
Browse files Browse the repository at this point in the history
  • Loading branch information
elijahedmondson committed Sep 5, 2016
1 parent 890ad82 commit a1f6c7b
Show file tree
Hide file tree
Showing 4 changed files with 308 additions and 42 deletions.
Binary file added .DS_Store
Binary file not shown.
129 changes: 87 additions & 42 deletions 8. Resample model averaging.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ pheno = data.frame(row.names = Total$row.names, rownames = Total$row.names,
ectoderm = as.numeric(Total$Ectoderm),
endoderm = as.numeric(Total$Endoderm),
mesoderm = as.numeric(Total$Mesoderm),
PSC = as.numeric(Total$Pulmonary.Sarcomatoid.Carcinoma))
PSC = as.numeric(Total$Pulmonary.Sarcomatoid.Carcinoma),
Cat2 = as.numeric(Total$Cataract.2.0.Score.Date),
days2 = as.numeric(Total$Cataract.2.0.Score.Days))
addcovar = matrix(pheno$sex, ncol = 1, dimnames = list(row.names(pheno), "sex"))
HZE <- subset(pheno, group == "HZE")
Gamma <- subset(pheno, group == "Gamma")
Expand All @@ -63,24 +65,37 @@ bootstrap <- HS.assoc.bootstrap(perms = 200, chr = 4, pheno = Gamma, pheno.col =
peakMB = 82878119, window = 8000000)


quant = quantile(thy1$average, c(0.025,0.975))
print("95% Confidence Interval for QTL:")
print(paste(quant))

Cat2.AI.18 <- HS.cox.RMA.chrom(perms = 200, chr = 18, pheno = All.irr, pheno.col = "Cat2", days.col = "days2",
probs, K, addcovar, markers, snp.file, outdir = "~/Desktop/",
tx = "All Irradiated", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")


MammaACA.HZE <- HS.RMA.chrom(perms = 200, chr = 11, pheno = HZE, pheno.col = "MammACA", probs, K, addcovar,
markers, snp.file, outdir = "~/Desktop/",
tx = "HZE", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")
MammaACA.Gamma <- HS.RMA.chrom(perms = 200, chr = 11, pheno = Gamma, pheno.col = "MammACA", probs, K, addcovar,
markers, snp.file, outdir = "~/Desktop/",
tx = "Gamma", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")
MammaACA.Un <- HS.RMA.chrom(perms = 200, chr = 11, pheno = Un, pheno.col = "MammACA", probs, K, addcovar,
markers, snp.file, outdir = "~/Desktop/",
tx = "Unirradiated", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")
MammaACA.AI <- HS.RMA.chrom(perms = 200, chr = 11, pheno = All.irr, pheno.col = "MammACA", probs, K, addcovar,
markers, snp.file, outdir = "~/Desktop/",
tx = "Unirradiated", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")

### PLOTTING BOOTSTRAP RESULTS (HISTOGRAM) ###
### PLOTTING BOOTSTRAP RESULTS (HISTOGRAM) ###
### PLOTTING BOOTSTRAP RESULTS (HISTOGRAM) ###
### PLOTTING BOOTSTRAP RESULTS (HISTOGRAM) ###
### PLOTTING BOOTSTRAP RESULTS (HISTOGRAM) ###
Thy.boot <- read.csv("~/Desktop/QTL Data/Bootstrap.overall-Table 1.csv")
Thy.boot2 = Thy.boot[which(Thy.boot$AML.LOD > 7),]
boot <- read.csv("~/Desktop/Whole Chr Resample/Raw Data-Cataract Chr 1.csv")
#Thy.boot2 = Thy.boot[which(Thy.boot$AML.LOD > 7),]

gamma = Thy.boot[which(Thy.boot$TX == "gamma"),]
hze = Thy.boot[which(Thy.boot$TX == "HZE"),]
un = Thy.boot[which(Thy.boot$TX == "Unirradiated"),]
allirr = Thy.boot[which(Thy.boot$TX == "All.irradiated"),]
gamma = boot[which(boot$X == "gamma"),]
hze = boot[which(boot$X == "HZE"),]
un = boot[which(boot$X == "unirradiated"),]
#allirr = boot[which(Thy.boot$TX == "All.irradiated"),]

gamma = Thy.boot[which(Thy.boot$TX == "gamma" & Thy.boot$AML.LOD > 5),]
hze = Thy.boot[which(Thy.boot$TX == "HZE" & Thy.boot$AML.LOD > 5),]
Expand All @@ -91,12 +106,12 @@ allirr = Thy.boot[which(Thy.boot$TX == "All.irradiated" & Thy.boot$AML.LOD > 5),

#HISTOGRAM
layout(matrix(3:1, 3, 1))
hist(un$AML.Locus, breaks=150, col="blue", main = "Unirradiated", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(un$AML.Locus), col="black")
hist(gamma$AML.Locus, breaks=150, col="green", main = "Gamma", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(gamma$AML.Locus), col="black")
hist(hze$AML.Locus, breaks=150, col="red", main = "HZE", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(hze$AML.Locus), col="black", lwd = 1)
hist(un$average, breaks=150, col="blue", main = "Unirradiated", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(un$average), col="black")
hist(gamma$average, breaks=150, col="green", main = "Gamma", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(gamma$average), col="black")
hist(hze$average, breaks=150, col="red", main = "HZE", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
lines(density(hze$average), col="black", lwd = 1)

layout(matrix(4:1, 4, 1))
hist(un$AML.Locus, breaks=150, col="blue", main = "Unirradiated", xlab="Chromosome 2", prob = T, xlim = c(0, 182113224))
Expand All @@ -110,33 +125,43 @@ lines(density(All.irradiated$AML.Locus), col="black", lwd = 1)

#HISTOGRAM WITHOUT DENSITY
layout(matrix(3:1, 3, 1))
hist(un$AML.Locus, breaks=150, col="blue", main = "Unirradiated", xlab="Chromosome 2",xlim = c(110000000, 130000000))
hist(gamma$AML.Locus, breaks=150, col="green", main = "Gamma", xlab="Chromosome 2",xlim = c(110000000, 130000000))
hist(hze$AML.Locus, breaks=150, col="red", main = "HZE", xlab="Chromosome 2",xlim = c(110000000, 130000000))
hist(un$average, breaks=150, col="blue", main = "Unirradiated", xlab="Chromosome 2")
#,xlim = c(110000000, 130000000))
hist(gamma$average, breaks=150, col="green", main = "Gamma", xlab="Chromosome 2")
#,xlim = c(110000000, 130000000))
hist(hze$average, breaks=150, col="red", main = "HZE", xlab="Chromosome 2")
#,xlim = c(110000000, 130000000))




#KERNAL DENSITY
layout(matrix(3:1, 3, 1))
d = density(hze$AML.Locus)
plot(d, col="red", main = "HZE", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(gamma$AML.Locus)
plot(d, col="green", main = "Gamma", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(un$AML.Locus)
plot(d, col="blue", main = "Unirradiated", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(hze$average)
plot(d, col="red", main = "HZE", xlab="Chromosome 2")
#, xlim = c(0, 182113224))
d = density(gamma$average)
plot(d, col="green", main = "Gamma", xlab="Chromosome 2")
#, xlim = c(0, 182113224))
d = density(un$average)
plot(d, col="blue", main = "Unirradiated", xlab="Chromosome 2")
#, xlim = c(0, 182113224))

layout(matrix(4:1, 4, 1))
d = density(un$AML.Locus)
plot(d, col="blue", main = "Unirradiated", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(gamma$AML.Locus)
plot(d, col="green", main = "Gamma", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(hze$AML.Locus)
plot(d, col="red", main = "HZE", xlab="Chromosome 2", xlim = c(0, 182113224))
d = density(allirr$AML.Locus)
d = density(un$average)
plot(d, col="blue", main = "Unirradiated", xlab="Chromosome 2")
#, xlim = c(0, 182113224))
d = density(gamma$average)
plot(d, col="green", main = "Gamma", xlab="Chromosome 2")
#, xlim = c(0, 182113224))
d = density(hze$average)
plot(d, col="red", main = "HZE", xlab="Chromosome 2")
#, xlim = c(0, 182113224))

d = density(allirr$average)
plot(d, col="blue", main = "Unirradiated", xlab="Chromosome 2", xlim = c(0, 182113224))

hist(hze$AML.Locus, prob = T)
hist(hze$average, prob = T)
lines(density(x))


Expand All @@ -145,20 +170,40 @@ title(main = "Nonparametric Bootstrap Resampling with Replacement: Distribution



AML.boot <- factor(Thy.boot$TX, levels = c("All.irradiated", "gamma", "HZE", "Unirradiated"),
boot <- read.csv("~/Desktop/Whole Chr Resample/Raw Data-Cataract Chr 17.csv")
gamma = boot[which(boot$X == "gamma"),]
hze = boot[which(boot$X == "HZE"),]
un = boot[which(boot$X == "unirradiated"),]


merge.boot <- factor(boot$X, levels = c("gamma", "HZE", "unirradiated"),
labels = c("Gamma", "HZE", "Unirradiated"))
par(mfrow=c(1,1))
sm.density.compare(boot$average, boot$X, xlab = "Chromosome 17", lwd = 2.5)
title(main = "Resample Model Averaging: Cataract Chromosome 17")
colfill = c(2:(2+length(levels(merge.boot))))
legend("topleft", levels(merge.boot), fill = colfill)







merge.boot <- factor(boot$X, levels = c("All.irradiated", "gamma", "HZE", "Unirradiated"),
labels = c("All Irradiated", "Gamma", "HZE", "Unirradiated"))
par(mfrow=c(1,1))
sm.density.compare(Thy.boot$AML.Locus, Thy.boot$TX, xlab = "Chromosome 2", lwd = 2.5)
sm.density.compare(boot$average, boot$X, xlab = "Chromosome ", lwd = 2.5)
title(main = "AML Adenoma: Resample Model Averaging")
colfill = c(2:(2+length(levels(AML.boot))))
legend("topright", levels(AML.boot), fill = colfill)
colfill = c(2:(2+length(levels(merge.boot))))
legend("topleft", levels(merge.boot), fill = colfill)


AML.boot <- factor(Thy.boot2$TX, levels = c("All.irradiated", "gamma", "HZE", "Unirradiated"),
AML.boot <- factor(Thy.boot2$X, levels = c("All.irradiated", "gamma", "HZE", "Unirradiated"),
labels = c("All Irradiated", "Gamma", "HZE", "Unirradiated"))
par(mfrow=c(1,1))
sm.density.compare(Thy.boot2$AML.Locus, Thy.boot2$TX, xlab = "Chromosome 2", lwd = 2.5)
title(main = "AML Adenoma: Resample Model Averaging")
colfill = c(2:(2+length(levels(AML.boot))))
legend("topright", levels(AML.boot), fill = colfill)
sm.density.compare(boot$average, boot$X, xlab = "Chromosome 2", lwd = 2.5)
title(main = "Resample Model Averaging")
colfill = c(2:(2+length(levels(boot))))
legend("topright", levels(boot), fill = colfill)
#sm.binomial.bootstrap()
109 changes: 109 additions & 0 deletions HS.RMA.chrom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
HS.RMA.chrom = function(perms, chr, pheno, pheno.col, probs, K, addcovar,
markers, snp.file, outdir = "~/Desktop/",
tx = "", sanger.dir = "~/Desktop/R/QTL/WD/HS.sanger.files/")
{
begin <- Sys.time()
file.prefix = paste0("Bootstrap.", tx, ".", pheno.col, "(Chr", chr, ")")
plot.title = paste("Bootstrap ", tx, " ", pheno.col, " ", "(Chr ", chr, "):", sep = "")
print(paste(plot.title, Sys.time()))

load(file = paste0(sanger.dir, "probs", chr, ".Rdata"))

samples = intersect(rownames(pheno), rownames(probs))
samples = intersect(samples, rownames(addcovar))
samples = intersect(samples, rownames(K[[1]]))
stopifnot(length(samples) > 0)

pheno = pheno[samples,,drop = FALSE]
addcovar = addcovar[samples,,drop = FALSE]
probs = probs[samples,,,drop = FALSE]

#samples2 = markers$SNP_ID[which(markers$Chr == chr & markers$Mb_NCBI38 > (peakMB - 4000000) & markers$Mb_NCBI38 < (peakMB + 4000000))]
samples2 = markers$SNP_ID[which(markers$Chr == chr)]
probs = probs[,,samples2, drop = FALSE]

K = K[[chr]][samples,samples,drop = FALSE]

setwd(outdir)

##

permutations = matrix(1, nrow = perms, ncol = 5, dimnames = list(1:perms, c("LOD", "min", "max", "average", "#Markers")))
sanger.dir = sanger.dir
rm(samples, samples2)
for(p in 1:perms) {
LODtime = Sys.time()
print(p)

#repeat {
phenoperm = data.frame(pheno[sample(nrow(pheno), replace = TRUE), ],
check.names = FALSE, check.rows = FALSE)



samples = sub("\\.[0-9]$", "", rownames(phenoperm))
probsperm = probs[samples,,]
rownames(probsperm) = make.unique(rownames(probsperm))


Kperm = K
Kperm = K[samples,samples,drop = FALSE]
rownames(Kperm) = make.unique(rownames(Kperm))


### Move the model into the loop LOGISTIC REGRESSION MODEL ###

Kperm = Kperm[samples, samples]
chrs = chr
data = vector("list", length(chrs))
names(data) = chrs

rng = which(markers[,2] == chrs)
data = list(probsperm = probsperm, Kperm = Kperm,
markers = markers[rng,])

result = vector("list", length(data))
names(result) = names(data)
rm(probsperm, Kperm)


### WORK HORSE ###
### RETURN ANY POS FOR ENRIRE CHROMOSOME ###


result = GRSDbinom.permsfast(data, pheno = phenoperm, pheno.col, addcovar, tx, sanger.dir)

top = max(-log10(result$pv))
MAX.LOD = result$POS[which(-log10(result$pv) == top)]
MegaBase = (min(MAX.LOD) + max(MAX.LOD))/2000000

print(paste(MegaBase, "Mb: LOD", round(top, digits = 2)))

#if (MAX.LOD > (peakMB - (window/2)) & MAX.LOD < (peakMB + (window/2))) break

#}



print(paste0("Accepted locus: ", MegaBase, " Mb"))



# Save the locus.
permutations[p,] = c(top, min(MAX.LOD), max(MAX.LOD), ((min(MAX.LOD) + max(MAX.LOD))/2), length(MAX.LOD))
print(paste(round(difftime(Sys.time(), LODtime, units = 'mins'), digits = 2),
"minutes..."))

}

print(paste(round(difftime(Sys.time(), begin, units = 'hours'), digits = 2),
"hours elapsed during analysis"))

quant = quantile(permutations[,4], c(0.025,0.975))
print(paste("95% Confidence Interval for QTL:", min(quant), "-", max(quant)))
print(paste("Interval =", (round((((max(quant) - min(quant))/1000000)), digits = 2)), "Mb"))

return(permutations)


}#HS.RMA.chrom()
Loading

0 comments on commit a1f6c7b

Please sign in to comment.