library(devtools)
install_github("jtlovell/qtlTools")## Downloading GitHub repo jtlovell/qtlTools@master
## from URL https://api.github.com/repos/jtlovell/qtlTools/zipball/master
## Installing qtlTools
## '/Library/Frameworks/R.framework/Resources/bin/R' --no-site-file \
## --no-environ --no-save --no-restore --quiet CMD INSTALL \
## '/private/var/folders/b6/w7rk82fx273ct14rskd_csk40000gp/T/RtmpvKv4c8/devtools2a573c7b3374/jtlovell-qtlTools-2d1ab9b' \
## --library='/Library/Frameworks/R.framework/Versions/3.3/Resources/library' \
## --install-tests
##
## Reloading installed qtlTools
library(qtlTools)
library(qtl)
library(ggplot2)
library(lsmeans)## Loading required package: estimability
##
## Attaching package: 'lsmeans'
## The following object is masked from 'package:devtools':
##
## test
library(qtlpvl)## Loading required package: Rcpp
For this tutorial, we will use the dataset multitrait from R/qtl.
data(multitrait)
cross<-multitrait
cross<-calc.genoprob(cross)
phes<-phenames(cross)In R/qtl there is a pipeline to analyze multiple traits in a single model. Here, we will discuss analyzing them independently, then concatenating the results
In the simplest case, we can run a bunch of scanones
s1<-scanone(cross, pheno.col = phes, method = "hk")
cols<-rainbow(length(phes))
plot(s1, type="n", ylim = c(0,max(as.matrix(s1[,-c(1:2)]))),
ylab = "LOD Score", main = "all phenotypes plotted together")
for(i in 1:length(phes)) plot(s1, add=T, lodcolumn = i, col = cols[i])This simplest way to look at these data is to pull out only the significant QTL peaks
perms<-scanone(cross, pheno.col = phes, method = "hk", n.perm = 100, verbose=F)
print(pullSigQTL(cross, pheno.col=phes,
s1.output = s1, perm.output = perms,
returnQTLModel=FALSE, alpha = 0.05,
controlAcrossCol = TRUE))## pheno chr lod1 pos
## 6 X3.Methylsulfinylpropyl 1 3.624535 105.304
## 7 X7.Methylsulfinylheptyl 1 4.592547 112.183
## 11 X2.Propenyl 1 3.200351 95.126
## 16 X7.Methylthioheptyl 1 4.078659 112.183
## 19 Quercetin.deoxyhexosyl.dihexoside 1 40.230510 88.617
## 20 Kaempferol.dideoxyhexosyl.dihexoside 1 28.398452 88.617
## 21 Quercetin.deoxyhexosyl.hexoside 1 50.260233 88.617
## 22 Isohamnetin.deoxyhesoxyl.dihexoside 1 37.347156 88.617
## 23 Isohamnetin.deoxyhesoxyl.hexoside 1 30.070622 88.617
## 24 Kaempferol.dideoxyhexosyl.hexoside 1 43.080414 88.617
## 55 X7.Methylsulfinylheptyl 3 4.709009 55.937
## 62 X6.Methylsulfinylhexyl 3 4.744972 55.937
## 64 X7.Methylthioheptyl 3 5.923296 55.937
## 73 X3.Hydroxypropyl 4 10.339584 9.027
## 75 X4.Methylsulfinylbutyl 4 4.210679 7.328
## 76 X3.Butenyl 4 5.654336 9.027
## 81 X5.Methylsulfinylpentyl 4 7.174370 9.027
## 82 X5.Methylthiopentyl 4 3.301104 9.027
## 89 X5.Benzoyloxypentyl 4 4.749325 13.229
## 90 X6.Benzoyloxyhexyl 4 7.467485 9.027
## 97 X3.Hydroxypropyl 5 12.970811 35.356
## 98 X4.Hydroxybutyl 5 15.389754 35.356
## 99 X4.Methylsulfinylbutyl 5 16.073220 35.356
## 100 X3.Butenyl 5 8.996856 35.356
## 102 X3.Methylsulfinylpropyl 5 8.026297 39.922
## 103 X7.Methylsulfinylheptyl 5 13.407763 35.356
## 104 X4.Methylthiobutyl 5 28.379592 35.356
## 105 X5.Methylsulfinylpentyl 5 9.931866 35.356
## 106 X5.Methylthiopentyl 5 2.298515 35.356
## 107 X2.Propenyl 5 20.533741 35.356
## 108 X3.Benzoyloxypropyl 5 12.049345 35.356
## 109 X6.Methylthiohexyl 5 7.305312 35.356
## 110 X6.Methylsulfinylhexyl 5 22.547515 35.356
## 111 X4.Benzoyloxybutyl 5 9.996076 35.356
## 112 X7.Methylthioheptyl 5 13.131570 35.356
## 113 X5.Benzoyloxypentyl 5 10.843851 35.356
## 114 X6.Benzoyloxyhexyl 5 6.321402 35.356
However, if we want to look at model statistics etc. it is best to create multiple qtl models for each phenotype
mods<-pullSigQTL(cross, pheno.col=phes,
s1.output = s1, perm.output = perms,
returnQTLModel=TRUE, alpha = 0.05,
controlAcrossCol = TRUE)This returns a list of fitted QTL models, which we could use to calculate confidence intervals etc.
print(mods[1:5])## $X3.Hydroxypropyl
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 4@9.0 4 9.027 2
## Q2 5@35.4 5 35.356 2
##
## $X4.Hydroxybutyl
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 5@35.4 5 35.356 2
##
## $X4.Methylsulfinylbutyl
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 4@7.3 4 7.328 2
## Q2 5@35.4 5 35.356 2
##
## $X3.Butenyl
## QTL object containing genotype probabilities.
##
## name chr pos n.gen
## Q1 4@9.0 4 9.027 2
## Q2 5@35.4 5 35.356 2
##
## $X3.Methylthiopropyl
## [1] "NULL QTL Model"
We can also plot confidence intervals directly from scanone data, however, it is important to note that multiple peaks on a single chromosome can violate assumptions of such intervals.
cis<-calcCis(cross,s1.output=s1, perm.output=perms)
head(cis)## pheno qtlname chr pos maxLod
## 1 X3.Methylsulfinylpropyl NA 1 105.304 3.624535
## 2 X7.Methylsulfinylheptyl NA 1 112.183 4.592547
## 3 X2.Propenyl NA 1 95.126 3.200351
## 4 X7.Methylthioheptyl NA 1 112.183 4.078659
## 5 Quercetin.deoxyhexosyl.dihexoside NA 1 88.617 40.230510
## 6 Kaempferol.dideoxyhexosyl.dihexoside NA 1 88.617 28.398452
## lowmarker highmarker lowposition highposition
## 1 CH.215L CC.318C 95.126 112.183
## 2 BF.116C FD.90L-Col 102.114 118.105
## 3 DF.260L-Col HH.360L-Col 82.554 126.083
## 4 BF.116C FD.90L-Col 102.114 118.105
## 5 EC.88C CH.215L 84.414 95.126
## 6 EC.88C CH.215L 84.414 95.126
We might be interested in where these QTL are - we can plot these on the genetic map
segmentsOnMap(cross, calcCisResults = cis, legendCex = .35)The approach above is nice because it is super fast, but it does not necessarily produce the best models (e.g. there can only be one QTL/chromosome). One could use stepwiseqtl to generate the models instead, however, scantwo permutations are needed for this appraoch, which is slow for many traits.
For the purposes of this tutorial, we will use the models generated through scanones to make statistical inference. The function qtlStats takes a qtl model and returns fitted statistics. For a single phenotype, we’d run it as follows:
mod<-mods[["X3.Methylsulfinylpropyl"]]
form<-paste("y ~",paste(mod$altname, collapse = " + "))
mod<-refineqtl(cross, qtl = mod, formula = form, verbose=F, method="hk")
qtlStats(cross,
pheno.col = "X3.Methylsulfinylpropyl",
form = form,
mod = mod)## terms altnames qtlnames phenotype formula df
## 1 1@73.2 Q1 1@73.2 X3.Methylsulfinylpropyl y ~ Q1 + Q2 1
## 2 5@35.4 Q2 5@35.4 X3.Methylsulfinylpropyl y ~ Q1 + Q2 1
## Type.III.SS LOD X.var F.value Pvalue.Chi2. Pvalue.F.
## 1 55390.13 0.1300122 0.3112865 0.5884745 4.390636e-01 4.441765e-01
## 2 3195852.83 6.7958000 17.9603421 33.9533011 2.215566e-08 3.163839e-08
## modelLod modelPercVar est SE t pheno chr pos
## 1 6.81262 18.00936 18.83107 24.54772 0.7671209 NA 1 73.166
## 2 6.81262 18.00936 -144.70775 24.83424 -5.8269461 NA 5 35.356
## maxLod lowmarker highmarker lowposition highposition
## 1 1.073048 PVV4 HH.360L-Col 0.000 126.083
## 2 13.698045 DF.184L-Col GH.121L-Col 29.579 39.922
Since mods is a list, we can loop through it using lapply First toss out an NULL QTL models
mods<-mods[sapply(mods,length)!=1] # a NULL QTL model has length = 1, toss theseNext do the loop, where x represents each phenotype.
stats<-lapply(names(mods), function(x){
mod<-mods[[x]]
form<-paste("y ~",paste(mod$altname, collapse = " + "))
mod<-refineqtl(cross, qtl = mod, formula = form,
verbose=F, method="hk", pheno.col = x)
qtlStats(cross,
pheno.col = x,
form = form,
mod = mod)
})
stats<-do.call(rbind, stats)
print(head(stats))## terms altnames qtlnames phenotype formula df
## 1 4@7.3 Q1 4@7.3 X3.Hydroxypropyl y ~ Q1 + Q2 1
## 2 5@35.4 Q2 5@35.4 X3.Hydroxypropyl y ~ Q1 + Q2 1
## 3 5@35.4 Q1 5@35.4 X4.Hydroxybutyl y ~ Q1 1
## 4 4@9.0 Q1 4@9.0 X4.Methylsulfinylbutyl y ~ Q1 + Q2 1
## 5 5@35.4 Q2 5@35.4 X4.Methylsulfinylbutyl y ~ Q1 + Q2 1
## 6 4@0.0 Q1 4@0.0 X3.Butenyl y ~ Q1 + Q2 1
## Type.III.SS LOD X.var F.value Pvalue.Chi2. Pvalue.F.
## 1 1122065752.9 14.816762 24.02963 83.71827 1.110223e-16 3.330669e-16
## 2 1546146184.2 19.087155 33.11154 115.35926 0.000000e+00 0.000000e+00
## 3 571087.8 15.389754 36.14528 NA 0.000000e+00 0.000000e+00
## 4 310708817.6 7.277717 11.96404 36.62612 7.071197e-09 1.034805e-08
## 5 1017169703.3 19.658997 39.16675 119.90318 0.000000e+00 0.000000e+00
## 6 961915216.0 7.206661 14.57554 36.22966 8.366950e-09 1.220014e-08
## modelLod modelPercVar est SE t pheno chr pos
## 1 27.78757 55.51039 -2688.59616 293.843073 -9.149769 NA 4 7.328
## 2 27.78757 55.51039 -3174.21911 295.536186 -10.740543 NA 5 35.356
## 3 15.38975 36.14528 60.97761 6.489016 9.397052 NA 5 35.356
## 4 23.35094 49.36876 -1411.58194 233.244097 -6.051951 NA 4 9.027
## 5 23.35094 49.36876 2575.80850 235.232986 10.950031 NA 5 35.356
## 6 16.20352 37.64200 2484.69093 412.800516 6.019108 NA 4 0.000
## maxLod lowmarker highmarker lowposition highposition
## 1 14.816762 ANL2 C6L9 0.000 13.229
## 2 19.087155 DF.184L-Col GH.121L-Col 29.579 39.922
## 3 15.389754 DF.184L-Col GH.121L-Col 29.579 39.922
## 4 7.277717 ANL2 C6L9 0.000 13.229
## 5 19.658997 DF.184L-Col GH.121L-Col 29.579 39.922
## 6 7.206661 ANL2 C6L9 0.000 13.229
The most basic way to present these data is to show the position of QTL intervals on a genetic map. qtlTools::segmentsOnMap provides one method to do this.
segmentsOnMap(cross=cross, phe=stats$phenotype,
chr=stats$chr,
l = stats$lowposition, lwd=2,
h =stats$highposition,legendCex=.35, leg.inset=.01)