-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathqtl2_coef_assoc_engine.R
172 lines (138 loc) · 5.99 KB
/
qtl2_coef_assoc_engine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
################################################################################
# Given the QTL mapping input data and a table of peaks from one of the QTL
# harvest functions, calculate the founder allele effects and association
# mapping at each peak.
#
# Daniel Gatti
# July 20, 2017
################################################################################
options(stringsAsFactors = F)
library(qtl2)
library(qtl2convert)
library(qtl2db)
library(RSQLite)
library(dplyr)
library(dbplyr)
library(AnnotationHub)
library(rtracklayer)
source.dir = "/hpcdata/gac/projects/Attie_DO_Metabolomics/scripts/"
source(paste0(source.dir, "assoc_mapping.R"))
ensembl.file = "/hpcdata/gac/projects/Attie_DO_Metabolomics/Mus_musculus.GRCm38.90.gtf.rds"
###############################
# Get command line arguments. #
###############################
# Arguments:
# data.file: full path to the qtl2 input data file. *.Rdata file.
# sum.file: full path to the QTL summary file.
# qtl.file: full path to the QTL LOD file.
# output.dir: full path to the output directory for the data files.
# fig.dir: full path to the output directory for the data files.
# chunk.size: number of QTL summary rows to map in this run.
# chunk.number: Index of th current chunk of QTL summary rows to run.
args = commandArgs(trailingOnly = TRUE)
data.file = args[1]
qtl.file = args[2]
sum.file = args[3]
output.dir = args[4]
fig.dir = args[5]
chunk_size = as.numeric(args[6])
chunk_number = as.numeric(args[7])
#data.file = "/hpcdata/gac/derived/Attie_DO_Metabolomics/qtl2_input/attie_liver_lipids_qtl2_input.Rdata"
#qtl.file = "/hpcdata/gac/projects/Attie_DO_Metabolomics/QTL/Liver/lipids_norm_jax/liver_lipids_jax_norm_all_qtl.rds"
#sum.file = "/hpcdata/gac/projects/Attie_DO_Metabolomics/QTL/Liver/lipids_norm_jax/liver_lipids_jax_norm_qtl_summary_thresh_6.csv"
#output.dir = "/hpcdata/gac/projects/Attie_DO_Metabolomics/QTL/Liver/lipids_norm_jax/"
#fig.dir = "/hpcdata/gac/projects/Attie_DO_Metabolomics/figures/QTL/liver_lipids_norm_jax/"
#chunk_size = 10
#chunk_number = 1
print(paste("data.file =", data.file))
print(paste("qtl.file =", qtl.file))
print(paste("sum.file =", sum.file))
print(paste("output.dir =", output.dir))
print(paste("fig.dir =", fig.dir))
print(paste("chunk_size =", chunk_size))
print(paste("chunk_number =", chunk_number))
################################
# Load in the qtl2 input data. #
################################
load(data.file)
# Load in the QTL LOD file.
qtl = readRDS(qtl.file)
# Load in the QTL summary table.
qtl.summary = read.csv(sum.file)
stopifnot(c("pheno", "pheno.descr", "genoprobs", "K", "map") %in% ls())
#############################
# Load in new 69K grid markers. #
#############################
markers = readRDS("/hpcdata/gac/derived/CGD_DO_Genoprobs/marker_grid_0.02cM_plus.rds")
map = map_df_to_list(markers, pos_column = "pos")
######################
# Set up covariates. #
######################
covar.names = strsplit(pheno.descr$covar_list, ":")
covar.names = unique(unlist(covar.names))
# There may be NA's in the covar.names.
covar.names = covar.names[!is.na(covar.names)]
stopifnot(covar.names %in% colnames(pheno))
f = as.formula(paste("~", paste(covar.names, collapse = "+")))
addcovar = model.matrix(f, data = pheno)[,-1]
##########################
# Get the Ensembl genes. #
##########################
ensembl = NULL
if(file.exists(ensembl.file)) {
ensembl = readRDS(ensembl.file)
} else {
hub = AnnotationHub()
hub = AnnotationHub::query(hub, c("gtf", "mus musculus", "ensembl"))
ensembl = hub[[names(hub)[hub$title == "Mus_musculus.GRCm38.90.gtf"]]]
saveRDS(ensembl, file = ensembl.file)
} # else
#######################################################
# Split out phenotypes and convert to numeric matrix. #
#######################################################
covar = pheno[,!pheno.descr$is_pheno]
pheno = as.matrix(pheno[,pheno.descr$is_pheno])
#########################################
# Calculate the phenotype range to run. #
#########################################
qtl.rng = ((chunk_number - 1) * chunk_size + 1):(chunk_number * chunk_size)
if(qtl.rng[length(qtl.rng)] > nrow(qtl.summary)) {
qtl.rng = qtl.rng[1]:nrow(qtl.summary)
}
###########################################################################
# Calculate the founder allele effects and association mapping and create #
# plots for each. #
###########################################################################
# For each analyte, select the peak with the highest LOD, plot the coefs
# on that chromosome and perform association mapping in a +/- 2 MB window
# around the peak.
for(i in qtl.rng) {
# Get the QTL info for this peak.
qtl.info = qtl.summary[i,]
analyte = qtl.info$analyte
chr = qtl.info$chr
pos = qtl.info$pos
print(paste(analyte, ": CHR", chr))
# Founder allele effects.
coef = scan1blup(genoprobs = genoprobs[,chr], pheno = pheno[,analyte,drop = F],
kinship = K[[chr]], addcovar = addcovar, cores = 10)
saveRDS(coef, file = paste0(output.dir, analyte, "_coef_chr", chr, ".rds"))
png(paste0(fig.dir, analyte, "_coef_chr", chr, ".png"), width = 1000, height = 1000, res = 128)
plot_coefCC(x = coef, map = map, scan1_output = qtl[,analyte, drop = FALSE],
main = analyte)
dev.off()
# Association mapping.
start = max(1, pos - 2)
end = pos + 2
assoc = assoc_map(chr = chr, start = start, end = end, probs = genoprobs[,chr],
pheno = pheno[, analyte, drop = FALSE], K = K[[chr]], addcovar = addcovar,
intcovar = NULL, map = map,
db.file = "/data/gac/resource/CCsnps/ccfoundersnps.sqlite", ncores = 4)
save(assoc, file = paste0(output.dir, analyte, "_assoc_chr", chr, ".Rdata"))
png(paste0(fig.dir, analyte, "_assoc_chr", chr, ".png"), width = 2000, height = 1600,
res = 128)
assoc_plot(assoc = assoc[[1]], snpinfo = assoc[[2]], map = map, chr = chr,
start = start, end = end)
dev.off()
} # for(i)