Lovell — Jul 22, 2014, 12:17 PM
#############################
### Title: Re-analysis of Drydown QTLs
### Author: JTLovell
### Date Created: 13-July 2014
### Modifications (Date):
#1. (Part II) 14:20-July 2014- Added gene expression data analysis
#2. (Part III) 21:22-July 2014- Added covariate qtl analysis data output
#############################
###Libraries:###
#############################
library(qtl)
library(snow)
library(rlecuyer)
library(plyr)
library(ggplot2)
library(reshape)
Attaching package: 'reshape'
The following objects are masked from 'package:plyr':
rename, round_any
The following object is masked from 'package:qtl':
condense
library(reshape2)
Attaching package: 'reshape2'
The following objects are masked from 'package:reshape':
colsplit, melt, recast
library(qvalue)
Attaching package: 'qvalue'
The following object is masked from 'package:ggplot2':
qplot
options(warn=-1)
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] qvalue_1.38.0 reshape2_1.4 reshape_0.8.5 ggplot2_1.0.0
[5] plyr_1.8.1 rlecuyer_0.3-3 snow_0.3-13 qtl_1.32-10
[9] knitr_1.6
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 digest_0.6.4 evaluate_0.5.5 formatR_0.10
[5] grid_3.1.0 gtable_0.1.2 MASS_7.3-33 munsell_0.4.2
[9] parallel_3.1.0 proto_0.3-10 Rcpp_0.11.2 scales_0.2.4
[13] stringr_0.6.2 tcltk_3.1.0 tools_3.1.0
#############################
###Part I: QTL mapping of main effects (no epistasis incorporated yet)
#############################
#######################
#1.1 Set up the environment
rm(list=ls())
setwd("~/Desktop/Drydown2014")
cross<-read.cross("csvs",dir="",
genotypes=c("A","H"),
genfile="McKay_revised_map_1_27_12.csv",
phefile= "TKrils_full_phe.csv",
na.strings=c("NA","-", "#VALUE!","#NUM!"))
--Read the following data:
341 individuals
168 markers
38 phenotypes
--Cross type: bc
#######################
##1.2: Set class as recombinant inbred line (DH is not an option in R/qtl)
class(cross)[1] <-"riself"
summary(cross)
RI strains via selfing
No. individuals: 341
No. phenotypes: 38
Percent phenotyped: 78.7
No. chromosomes: 5
Autosomes: 1 2 3 4 5
Total markers: 168
No. markers: 41 33 26 32 36
Percent genotyped: 91.1
Genotypes (%): AA:50.8 BB:49.2
#######################
##1.3 cull matrix to include only high quality markers
#look at the genotypes... check for errors
par(mfrow=c(1,5),mar = c(2,0,2,0))
cross.errlod<-calc.errorlod(cross)
for (i in 1:5) plotGeno(cross.errlod,chr=i, include.xo=F, cutoff=4, main=paste("ch",i))
# qtl analysis of regular map indicates that there is an error on ch4, between 45 and 46
print(toperr <- top.errorlod(cross.errlod, cutoff=4))
chr id marker errorlod
1 5 182 5_4223381 4.833
2 5 194 5_4223381 4.669
3 5 154 MSAT5.14 4.387
4 5 133 MSAT5.14 4.387
5 4 108 MSAT4.35 4.211
6 2 55 2_4354761 4.165
cross.clean <- cross
for(i in 1:nrow(toperr)) {
chr <- toperr$chr[i]; id <- toperr$id[i]; mar <- toperr$marker[i]
cross.clean$geno[[chr]]$data[cross$pheno$id==id, mar] <- NA
}
for (i in 1:5) plotGeno(calc.errorlod(cross.clean),chr=i, include.xo=T,cutoff=4, main=paste("ch",i))
#if this looks good, replace the old cross
#still problems with chr 4 44-48#
##### Needs to be addressed ####
cross<-cross.clean
#######################
##1.4 make pseudomarker matrix
grid<-calc.genoprob(cross, step=1, error.prob=.0001, map.function="kosambi",
stepwidth="max", off.end=0)
mainphes<-names(cross$phe)[-c(1,seq(from=4,to=28,by=3),33,36)] #all environment-specific QTLs
#######################
#1.5 make cross/pseudomarker matrix with quantile normalized phenotypes
quantnorm<-function(x) {
n=sum(!is.na(x),na.rm=T)
x=rank(x)/(n+1)
x=qnorm(x)
x[is.infinite(x)]=NA
x
}
cross.qn<-transformPheno(cross,pheno.col=mainphes,transf=quantnorm)
grid.qn<-calc.genoprob(cross.qn, step=1, error.prob=.0001, map.function="kosambi",
stepwidth="max", off.end=0)
#######################
#1.6 compare permutation results between groups
permtest.raw<-scanone(grid, method="hk",pheno.col=mainphes,n.perm=1000, verbose=F)
permtest.qn<-scanone(grid.qn, method="hk",pheno.col=mainphes,n.perm=1000, verbose=F)
permsout.raw<-data.frame(phe=character(),perm.raw.1=numeric(),perm.raw.05=numeric())
for (i in 1:length(mainphes)){
phe<-mainphes[i]
index.05<-(1:length(mainphes)*2)[i]-1
index.1<-(1:length(mainphes)*2)[i]
perm.raw.1<-summary(permtest.raw)[index.1]
perm.raw.05<-summary(permtest.raw)[index.05]
permsout.raw<-rbind(permsout.raw,data.frame(phe,perm.raw.1,perm.raw.05))
}
permsout.qn<-data.frame(phe=character(),perm.qn.1=numeric(),perm.qn.05=numeric())
for (i in 1:length(mainphes)){
phe<-mainphes[i]
index.05<-(1:length(mainphes)*2)[i]-1
index.1<-(1:length(mainphes)*2)[i]
perm.qn.1<-summary(permtest.qn)[index.1]
perm.qn.05<-summary(permtest.qn)[index.05]
permsout.qn<-rbind(permsout.qn,data.frame(phe,perm.qn.1,perm.qn.05))
}
outmaxlod<-data.frame(phe=character(),maxlod_raw=numeric(),maxlod_qn=numeric())
for (i in mainphes){
phe<-i
maxlod.raw<-max(scanone(grid, pheno.col=i, method="hk"))[1,3]
maxlod.qn<-max(scanone(grid.qn, pheno.col=i, method="hk"))[1,3]
outmaxlod<-rbind(outmaxlod,data.frame(phe,maxlod.raw,maxlod.qn))
}
anyqtls<-merge(permsout.raw,permsout.qn,by="phe")
anyqtls<-merge(anyqtls,outmaxlod,by="phe")
print(anyqtls) # the distribution of max scanone peaks and permutations
phe perm.raw.1 perm.raw.05 perm.qn.1 perm.qn.05 maxlod.raw
1 ABAlyoph.dry 2.220 2.521 2.352 2.691 3.5237
2 ABAlyoph.wet 2.284 2.499 2.234 2.668 1.9033
3 C13.dry 2.265 2.607 2.295 2.600 2.9466
4 C13.wet 2.346 2.678 2.309 2.660 5.5038
5 FT.June 2.271 2.641 2.313 2.684 53.0746
6 g0 2.313 2.640 2.324 2.608 4.4961
7 GR.dry 2.333 2.618 2.335 2.633 3.7482
8 LeafArea.dry 2.288 2.642 2.230 2.497 2.4588
9 LeafArea.wet 2.373 2.656 2.325 2.742 2.7781
10 proline.dry 2.299 2.604 2.281 2.681 16.0433
11 proline.wet 2.125 2.386 2.284 2.567 1.5173
12 Rel.GR.wet 2.356 2.628 2.295 2.566 3.4307
13 RMR.dry 2.331 2.619 2.381 2.817 2.6379
14 RMR.wet 2.380 2.725 2.399 2.808 1.5504
15 RtMass.dry 2.429 2.744 2.427 2.784 2.3300
16 RtMass.wet 2.366 2.657 2.383 2.796 1.3313
17 S.R.dry 2.426 2.693 2.428 2.703 1.5904
18 S.R.wet 2.210 2.507 2.488 2.845 1.3005
19 ShtDW.dry 2.387 2.689 2.304 2.672 1.7355
20 ShtDW.wet 2.275 2.656 2.357 2.698 1.5474
21 ShtFrWt.dry 2.226 2.511 2.306 2.583 2.1435
22 ShtFrWt.wet 2.394 2.796 2.362 2.695 1.8202
23 WC.dry 2.434 2.765 2.329 2.641 2.3468
24 WC.wet 2.339 2.722 2.358 2.674 0.9362
25 Wilted.dry 2.368 2.691 2.350 2.713 2.7511
26 Wilted.wet 2.050 2.248 1.958 2.187 1.2578
maxlod.qn
1 2.4009
2 1.4075
3 2.9618
4 5.6963
5 53.7338
6 4.5293
7 3.5959
8 2.4290
9 2.6025
10 16.4985
11 2.2366
12 2.8481
13 2.3096
14 1.3736
15 2.2925
16 0.9848
17 1.5294
18 1.1509
19 1.6709
20 1.4349
21 2.2253
22 1.9064
23 2.2975
24 1.0334
25 2.7244
26 1.2578
#######################
#1.7 Choose the phenotypes to do the mapping
anyqtls$qtl.qn.05<-anyqtls$maxlod.qn>=anyqtls$perm.qn.05
anyqtls$qtl.raw.05<-anyqtls$maxlod.raw>=anyqtls$perm.raw.05
anyqtls$qtl.qn.1<-anyqtls$maxlod.qn>=anyqtls$perm.qn.1
anyqtls$qtl.raw.1<-anyqtls$maxlod.raw>=anyqtls$perm.raw.1
print(anyqtls[c(1,8:11)])
phe qtl.qn.05 qtl.raw.05 qtl.qn.1 qtl.raw.1
1 ABAlyoph.dry FALSE TRUE TRUE TRUE
2 ABAlyoph.wet FALSE FALSE FALSE FALSE
3 C13.dry TRUE TRUE TRUE TRUE
4 C13.wet TRUE TRUE TRUE TRUE
5 FT.June TRUE TRUE TRUE TRUE
6 g0 TRUE TRUE TRUE TRUE
7 GR.dry TRUE TRUE TRUE TRUE
8 LeafArea.dry FALSE FALSE TRUE TRUE
9 LeafArea.wet FALSE TRUE TRUE TRUE
10 proline.dry TRUE TRUE TRUE TRUE
11 proline.wet FALSE FALSE FALSE FALSE
12 Rel.GR.wet TRUE TRUE TRUE TRUE
13 RMR.dry FALSE TRUE FALSE TRUE
14 RMR.wet FALSE FALSE FALSE FALSE
15 RtMass.dry FALSE FALSE FALSE FALSE
16 RtMass.wet FALSE FALSE FALSE FALSE
17 S.R.dry FALSE FALSE FALSE FALSE
18 S.R.wet FALSE FALSE FALSE FALSE
19 ShtDW.dry FALSE FALSE FALSE FALSE
20 ShtDW.wet FALSE FALSE FALSE FALSE
21 ShtFrWt.dry FALSE FALSE FALSE FALSE
22 ShtFrWt.wet FALSE FALSE FALSE FALSE
23 WC.dry FALSE FALSE FALSE FALSE
24 WC.wet FALSE FALSE FALSE FALSE
25 Wilted.dry TRUE TRUE TRUE TRUE
26 Wilted.wet FALSE FALSE FALSE FALSE
phes_wqtls<-as.character(anyqtls$phe[anyqtls$qtl.raw.05])
phes_wqtls #these are the traits with qtl at a=0.05 w/ raw phenotypes
[1] "ABAlyoph.dry" "C13.dry" "C13.wet" "FT.June"
[5] "g0" "GR.dry" "LeafArea.wet" "proline.dry"
[9] "Rel.GR.wet" "RMR.dry" "Wilted.dry"
#######################
#1.8 compare scanone of phenotypes between qn and raw
par(mfrow=c(3,1),mar = c(2,4,2,0))
for (i in phes_wqtls){
s1.raw<-scanone(grid, pheno.col=i)
s1.qn<-scanone(grid.qn, pheno.col=i)
plot(s1.raw, ylim=c(0,max(max(s1.qn$lod),(.5+max(s1.raw$lod)))),
main=paste(i, " red: quantile normalized"), mar=c(0,0,0,0))
plot(s1.qn, col="red", lty=2, add=T)
}
par(mfrow=c(3,1),mar = c(2,4,2,0))
min.thresh<-min(anyqtls$perm.raw.05) #most lenient QTL selection criteria
#######################
#1.9 heavy and light penalties from previous analyses
pens<-c(min.thresh,3.2305306,1.68988784)
#qtlphes<-as.character(outmaxlod[outmaxlod$maxlod>=pens[1],]$i)
qtlphes<-phes_wqtls #these are the traits w/qtls
#set up the environment to write statistics to
#######################
#1.10 multiple QTL model generation and statistical analysis
stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
lowCImarker=character(), hiCImarker=character(),
lowCIpos=character(), hiCIpos=character())
statcols<-names(stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
for (i in qtlphes){
#print(i)
stepout<-stepwiseqtl(grid,pheno.col=i, penalties=pens, max.qtl=10,method="hk",
refine.locations=T,additive.only=T, keeplodprofile=T,keeptrace=T,verbose=F)
plotLodProfile(stepout,main=i)
fit<-fitqtl(grid,pheno.col=i,qtl=stepout,formula=formula(stepout),get.ests=T,dropone=T)
nqtls<-nqtl(stepout)
if(nqtls>1){
ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
cis<-data.frame()
for (j in 1:nqtls){
ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
lowposition<-ciout[1,2]; highposition<-ciout[3,2]
cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
}
stats<-data.frame(rep(i,nqtls),stepout$chr,stepout$pos,fit$result.drop,ests_out,cis)
colnames(stats)<-statcols
}
else{
cis<-data.frame()
for (j in 1:nqtls){
ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
lowposition<-ciout[1,2]; highposition<-ciout[3,2]
cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
}
ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
stats<-data.frame(c(i,stepout$chr[1],stepout$pos[1],as.numeric(fit$result.full[1,c(1,2,4,5,3,6,7)]),as.numeric(ests_out),cis[1,]))
colnames(stats)<-statcols; rownames(stats)<-stepout$name
}
stats_out<-rbind(stats_out,stats)
}
par(mfrow=c(4,1),mar = c(2,4,2,1))
write.csv(stats_out, file="dd2014_allqtlstatistics.csv")
print(stats_out) # this is the statistical output in a dataframe
phenotype chromosome position df type3SS LOD perc.var
2@13.5 ABAlyoph.dry 2 13.469 1 2.620e+02 3.524 5.006
2@70.3 C13.dry 2 70.323 1 4.412e+00 3.463 4.265
3@47.5 C13.dry 3 47.460 1 4.554e+00 3.572 4.402
4@37.0 C13.dry 4 37.031 1 3.426e+00 2.703 3.311
5@72.2 C13.dry 5 72.223 1 4.732e+00 3.708 4.574
1@60.8 C13.wet 1 60.778 1 2.074e+00 3.009 3.029
3@15.6 C13.wet 3 15.551 1 1.726e+00 2.514 2.522
3@90.2 C13.wet 3 90.218 1 2.630e+00 3.795 3.842
4@1.0 C13.wet 4 0.951 1 5.761e+00 8.069 8.417
4@45.1 C13.wet 4 45.123 1 2.723e+00 3.927 3.979
5@11.8 C13.wet 5 11.761 1 3.396e+00 4.865 4.962
5@73.2 C13.wet 5 73.162 1 4.576e+00 6.481 6.685
1@30.6 FT.June 1 30.579 1 4.181e+02 13.534 5.905
1@31.5 FT.June 1 31.547 1 3.053e+02 10.121 4.312
1@83.6 FT.June 1 83.628 1 3.535e+02 11.599 4.993
4@0.0 FT.June 4 0.000 1 4.350e+03 83.286 61.440
4@65.9 FT.June 4 65.929 1 2.112e+02 7.149 2.984
5@15.2 FT.June 5 15.168 1 1.075e+02 3.726 1.519
5@69.4 FT.June 5 69.411 1 3.423e+02 11.258 4.835
1@0.0 g0 1 0.000 1 6.189e-03 5.476 7.789
1@99.3 g0 1 99.350 1 2.968e-03 2.686 3.735
2@31.3 g0 2 31.312 1 4.062e-03 3.648 5.112
3@25.6 GR.dry 3 25.589 1 1.436e+02 3.748 5.283
4@45.11 LeafArea.wet 4 45.123 1 6.686e+02 2.528 3.264
4@46.0 LeafArea.wet 4 46.041 1 1.146e+03 4.282 5.594
2@30.4 proline.dry 2 30.360 1 3.268e+04 2.518 2.472
2@65.6 proline.dry 2 65.575 1 2.765e+05 18.903 20.922
3@63.4 proline.dry 3 63.395 1 5.096e+04 3.888 3.855
4@1.9 proline.dry 4 1.902 1 4.393e+04 3.365 3.324
4@81.2 proline.dry 4 81.217 1 3.175e+04 2.448 2.402
3@86.6 Rel.GR.wet 3 86.605 1 3.776e-02 3.431 4.862
4@1.01 RMR.dry 4 0.951 1 7.906e-03 2.638 18.329
2@14.3 Wilted.dry 2 14.325 1 6.583e+01 2.885 3.721
4@43.4 Wilted.dry 4 43.385 1 9.915e+01 4.301 5.605
4@48.0 Wilted.dry 4 47.996 1 1.411e+02 6.045 7.979
Fstat P.chi2 P.F effect.estimate effect.SE effect.t
2@13.5 2.620e+02 5.618e-05 6.011e-05 -0.982492 0.241537 -4.068
2@70.3 1.609e+01 6.510e-05 7.505e-05 0.119176 0.029707 4.012
3@47.5 1.661e+01 4.997e-05 5.785e-05 -0.120547 0.029576 -4.076
4@37.0 1.250e+01 4.181e-04 4.678e-04 -0.104069 0.029440 -3.535
5@72.2 1.726e+01 3.596e-05 4.185e-05 0.137703 0.033145 4.154
1@60.8 1.381e+01 1.973e-04 2.381e-04 0.088624 0.023849 3.716
3@15.6 1.150e+01 6.680e-04 7.828e-04 0.076349 0.022517 3.391
3@90.2 1.751e+01 2.910e-05 3.677e-05 -0.090149 0.021543 -4.185
4@1.0 3.836e+01 1.088e-09 1.767e-09 -0.137756 0.022241 -6.194
4@45.1 1.814e+01 2.115e-05 2.694e-05 -0.094149 0.022108 -4.259
5@11.8 2.262e+01 2.211e-06 2.975e-06 -0.106421 0.022378 -4.756
5@73.2 3.047e+01 4.675e-08 6.916e-08 0.136853 0.024791 5.520
1@30.6 6.681e+01 2.887e-15 6.439e-15 3.539286 0.433010 8.174
1@31.5 4.878e+01 8.673e-12 1.574e-11 -2.935921 0.420343 -6.985
1@83.6 5.649e+01 2.702e-13 5.340e-13 1.089161 0.144917 7.516
4@0.0 6.951e+02 0.000e+00 0.000e+00 -3.669350 0.139176 -26.365
4@65.9 3.376e+01 9.594e-09 1.467e-08 -0.846001 0.145608 -5.810
5@15.2 1.718e+01 3.441e-05 4.316e-05 -0.582742 0.140580 -4.145
5@69.4 5.470e+01 6.007e-13 1.164e-12 1.098149 0.148484 7.396
1@0.0 2.599e+01 5.117e-07 6.280e-07 0.004986 0.000978 5.098
1@99.3 1.246e+01 4.361e-04 4.839e-04 0.003492 0.000989 3.531
2@31.3 1.706e+01 4.153e-05 4.771e-05 -0.004837 0.001171 -4.130
3@25.6 1.436e+02 3.258e-05 3.499e-05 -0.754263 0.179655 -4.198
4@45.11 1.174e+01 6.441e-04 6.872e-04 -4.037755 1.178345 -3.427
4@46.0 2.012e+01 8.975e-06 9.987e-06 5.242976 1.168740 4.486
2@30.4 1.159e+01 6.612e-04 7.496e-04 -13.499903 3.965976 -3.404
2@65.6 9.806e+01 0.000e+00 0.000e+00 33.785459 3.411873 9.902
3@63.4 1.807e+01 2.322e-05 2.806e-05 -14.560774 3.425481 -4.251
4@1.9 1.558e+01 8.273e-05 9.756e-05 -11.797479 2.989070 -3.947
4@81.2 1.126e+01 7.868e-04 8.891e-04 -10.008236 2.982804 -3.355
3@86.6 3.776e-02 7.043e-05 7.522e-05 -0.011273 0.002810 -4.012
4@1.01 7.906e-03 4.914e-04 6.441e-04 0.011850 0.003285 3.608
2@14.3 1.339e+01 2.675e-04 2.955e-04 0.502782 0.137376 3.660
4@43.4 2.018e+01 8.569e-06 9.908e-06 -1.281149 0.285223 -4.492
4@48.0 2.872e+01 1.319e-07 1.613e-07 1.540130 0.287392 5.359
lowCImarker hiCImarker lowCIpos hiCIpos
2@13.5 2_518639 2_9256494 7.378 40.829
2@70.3 2-15986145 2_17844202 67.111 72.187
3@47.5 MSAT3.5 3.F1P2-TGF 40.691 68.384
4@37.0 4_4074232 4-17608717 26.868 81.217
5@72.2 5-22414510 5_26121185 68.476 83.371
1@60.8 1-17355484 1.NGA128 52.749 70.616
3@15.6 3.nga162 MSAT3.6 0 38.289
3@90.2 3_21728447 3-20482976 87.311 94.929
4@1.0 4.FRI 4_2126308 0 24.714
4@45.1 MSAT4.16 4-12403706 40.399 57.095
5@11.8 5.MUK11D 5.NGA106 6.776 15.168
5@73.2 5-22414510 5_25301310 68.476 79.489
1@30.6 1-9343234 1-9343234 30.579 30.579
1@31.5 1-9496666 1-9496666 31.547 31.547
1@83.6 1.NGA128 MSAT1.1 70.616 92.834
4@0.0 4.FRI 4.FRI 0 0
4@65.9 4_12900428 MSAT4.30 59.969 74.562
5@15.2 5_2799359 5-7278798 10.909 25.597
5@69.4 5-22414510 5-25301036 68.476 75.978
1@0.0 1_444764 1_112908 0 3.027
1@99.3 1.F5I14 1.NGA692 82.707 104.043
2@31.3 2_518639 2_9256494 7.378 40.829
3@25.6 3_620734 3-7391896 6.849 31.898
4@45.11 4_5726702 4-8078388 31.979 45.123
4@46.0 4-8297594 4-10482038 46.041 49.952
2@30.4 2-2477306 2_9256494 18.081 40.829
2@65.6 2_14801461 2-15986145 64.807 67.111
3@63.4 3.GAPHX 3.F1P2-TGF 47.46 68.384
4@1.9 4.FRI 4_2126308 0 24.714
4@81.2 4_208623 4-17608717 1.902 81.217
3@86.6 3-9747158 3-21632379 51.096 96.869
4@1.01 4.FRI 4_2126308 0 24.714
2@14.3 2_518639 2_9256494 7.378 40.829
4@43.4 MSAT4.16 4-8078388 40.399 45.123
4@48.0 4-8297594 4-10482038 46.041 49.952
#######################
#1.11 Processing and plotting the QTL statistics
#Read in all relevant data
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T) #statistics from Part 1
#######################
#1.12 Plot some phenotypes, determine correlations between phenotypes
phematrix<-cross$phe[,qtlphes]
distmatrix<-dist(t(phematrix))
hc<-hclust(distmatrix)
par(mfrow=c(1,1))
plot(hc)#plot the similarity of phenotypes
#reorder the phenotypes so that they are clustered by distance
statsout$phenotype <- factor(statsout$phenotype,
levels = names(phematrix)[hc$order])
#######################
#1.13plot the distribution of QTL
#
ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
geom_point() + scale_y_continuous("t statistic for QTL point estimate")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position,y=P.F))+
geom_point() + scale_y_log10("p.value of F statistic")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(.~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position))+
geom_histogram(binwidth=20) +
facet_grid(.~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution and density of QTL point estimates")
#############################
#############################
# Part 2: Gene expression analysis
#############################
#############################
##2.1 set up the gene expression environment
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T)
cmgenpos<-read.csv("gene_locations_new_McKay_map.csv",header=T,
na.strings="NA") #physical position of all genes
geneexp<-read.csv("May_SNP_5_4_12.csv",header=T) #gene expression of all AT genes in drydown
idnames<-read.csv("dd2014_markernametoid.csv",header=T)
annotations<-read.table("Functional_annotation_list.txt",sep="\t",header=F)
colnames(annotations)<-c("gene","annotation")
#make a super dense grid to extract pseudomarker probs
densegrid<-calc.genoprob(cross, step=.1, stepwidth="fixed")
#######################
##2.2 determine the cM positions of all genes in region
# the following steps must be conducted manually for each region of interest
#here are some potential regions of interest
qtls5.2<-statsout[statsout$chromosome == 5 & statsout$position > 60,]
ggplot(qtls5.2,aes(x=position,y=as.numeric(effect.t)))+
geom_point() + scale_y_continuous("t statistic for QTL point estimate")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(phenotype~., scales="free_y", margins=F)+
ggtitle("95% bayesian CI and point estimates for QTL in region 5.2 ")+
theme_bw()
print(qtl5.2_ci<-data.frame(max(qtls5.2$lowCIpos),min(qtls5.2$hiCIpos)))
max.qtls5.2.lowCIpos. min.qtls5.2.hiCIpos.
1 68.48 75.98
#######################
##2.3 Set parameters for each manually
#here change the region of interest
#i am opperating on the premise that the region should include all cm of the shortest confidence
#interval that overlaps all point estimates; see plots and summaries above
region<-qtl5.2_ci #defined above, change to desired interval
info<-qtls5.2 #defined above, change to desired interval
# cull the gene list to this region
genes<-cmgenpos[cmgenpos$Chr == info$chromosome[1] &
cmgenpos$cM > region[1,1] & cmgenpos$cM < region[1,2],]
##2.4 This is the engine that calculates differential expression for each gene
#this outputs a list of statistics, including the t test means and p-values
genexpstats<-data.frame(gene=character(),chr=numeric(),pos.bp=numeric(),pos.cm=numeric(),
dry_estAA=numeric(),dry_estBB=numeric(),dry_pval=numeric(),
wet_estAA=numeric(),wet_estBB=numeric(),wet_pval=numeric())
gen_woexp<-data.frame(gene=character)
count<-1
#pb <- txtProgressBar(min = 0, max = length(as.character(genes$Gene)), style = 3)
for (i in as.character(genes$Gene)){
#setTxtProgressBar(pb, count) #sets a progress bar (not run)
genein<-genes[genes$Gene==i,] #subsets data by genes in region
chpos<-paste(as.numeric(genein[4]),"@",round(as.numeric(genein[6]),1),sep="")
outgeno<-as.numeric(formMarkerCovar(densegrid,chpos,method="prob"))
#get pseduomarker genotype probabilities for the nearest pseduomarker
outgeno[outgeno>.05 & outgeno <.95]<-NA
#this is important--- set genotype probabilities of >95% to the alleles, set the rest to missing
outgeno<-cbind(idnames,as.data.frame(ifelse(outgeno>=.95,"BB","AA")))
colnames(outgeno)[2:3]<-c("line","geno")
#below, calculate the statistics as long as we have gene expression data
if (i %in% colnames(geneexp)){
outexp<-geneexp[,c("cyto","treatment","line",i)]
allin<-merge(outgeno,outexp,by="line")
colnames(allin)[6]<-"exp"
wet<-allin[allin$treatment=="wet",]
dry<-allin[allin$treatment=="dry",]
tdry<-t.test(dry$exp~dry$geno)
twet<-t.test(wet$exp~wet$geno)
ttest_out<-data.frame(i,as.numeric(genein[4]),as.numeric(genein[5]),as.numeric(genein[6]),
tdry$estimate[1],tdry$estimate[2],tdry$p.value,
twet$estimate[1],twet$estimate[2],twet$p.value)
colnames(ttest_out)<-c("gene","chr","pos.bp","pos.cm","dry_estAA","dry_estBB","dry_pval"
,"wet_estAA","wet_estBB","wet_pval")
rownames(ttest_out)<-count
genexpstats<-rbind(genexpstats,ttest_out)
count<-count+1
}
else{gen_woexp<-rbind(gen_woexp,i)}
}
#######################
##2.5 Correct pvalue statistics for number and distribution of comparisons using "qvalues"
genexpstats$qval_dry<-qvalue(genexpstats$dry_pval)$qvalues
genexpstats$qval_wet<-qvalue(genexpstats$wet_pval)$qvalues
sig_exp<-genexpstats[genexpstats$qval_dry<0.05 | genexpstats$qval_wet<0.05,]
cat("total number of significant genes = ", length(sig_exp[,1]), "out of", count)
total number of significant genes = 30 out of 657
sigexp_matcheffect<-genexpstats[genexpstats$qval_dry<0.05 & genexpstats$qval_wet<0.05,]
print(genesofinterest<-as.character(sigexp_matcheffect[,1]))
[1] "AT5G55180" "AT5G57830" "AT5G58575" "AT5G61410"
#######################
# 2.6 make dataframe of gene expression to use as covariates
geneexp.culled<-geneexp[,c("treatment","line",genesofinterest)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
# are the ids in the gene expression matrix and phenotype matrix identical
# IF FALSE, STOP!!! Figure out why not
identical(as.numeric(exp.covars$id),as.numeric(grid$phe$id))
[1] TRUE
#######################
#######################
# Part 3: Visualization and analysis of the effects of gene expression
#######################
#######################
#######################
#3.1 run all scanones, add to dataframe
phenos<-as.character(unique(info$phenotype))
outall<-data.frame(chr=numeric(), pos=numeric(),lod=numeric(),
phe=character(),covar=character(),
gene=character(),expression_env=character())
for (i in phenos){
temp<-cbind(as.data.frame(scanone(grid,pheno.col=i)),i,"nocovar","nocovar","nocovar")
colnames(temp)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,temp)
}
#######################
#3.2 do for each gene expression trait as a covariate
for (i in colnames(exp.covars)[3:10]){
for (j in phenos){
s1.out<-scanone(grid,pheno.col=j,addcovar=exp.covars[,i])
s1.out<-cbind(as.data.frame(s1.out),as.character(j),as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
print(i)
}
[1] "AT5G55180.wet"
[1] "AT5G57830.wet"
[1] "AT5G58575.wet"
[1] "AT5G61410.wet"
[1] "AT5G55180.dry"
[1] "AT5G57830.dry"
[1] "AT5G58575.dry"
[1] "AT5G61410.dry"
summary(outall)
chr pos lod phe
1:3456 Min. : 0.0 Min. : 0.00 C13.dry:4851
2:2673 1st Qu.: 20.9 1st Qu.: 0.05 C13.wet:4851
3:2970 Median : 44.2 Median : 0.24 FT.June:4851
4:2592 Mean : 45.2 Mean : 0.77
5:2862 3rd Qu.: 67.9 3rd Qu.: 0.66
Max. :107.4 Max. :53.28
covar gene expression_env
nocovar :1617 nocovar :1617 nocovar:1617
AT5G55180.wet:1617 AT5G55180:3234 wet :6468
AT5G57830.wet:1617 AT5G57830:3234 dry :6468
AT5G58575.wet:1617 AT5G58575:3234
AT5G61410.wet:1617 AT5G61410:3234
AT5G55180.dry:1617
(Other) :4851
#######################
#3.3 plot the scanones w/ and w/o covariates on the specified chromosome
out.s1.ch5<-outall[outall$chr==info$chromosome[1],]
palette1<-c("darkred","green","blue","violet")
line.cols <- c("lightgrey",rep(palette1,2))
line.width<-c(2,rep(.5,8))
line.shapes<-c(rep(1,5),rep(2,4))
#for just ch 5
ggplot(out.s1.ch5,aes(x=pos,y=lod))+
geom_line(aes(col=covar,size=covar,linetype=covar))+
scale_color_manual(values=line.cols)+
scale_linetype_manual(values=line.shapes)+
scale_size_manual(values=line.width)+
facet_grid(phe~.)+
ggtitle("QTL 5.2: scanones on CH5 w/ and w/o gene expression covariates")+
theme_bw()
#for all chromosomes
ggplot(outall,aes(x=pos,y=lod))+
geom_line(aes(col=covar,size=covar,linetype=covar))+
scale_color_manual(values=line.cols)+
scale_linetype_manual(values=line.shapes)+
scale_size_manual(values=line.width)+
facet_grid(phe~chr,scales="free_y")+
ggtitle("QTL 5.2: full scanones w/ and w/o gene expression covariates")+
theme_bw()
#######################
#3.4 fit models w/ and w/o covariates
outall.fit.covar<-data.frame()
for (j in 1:length(phenos)){ #1st make, fit and process qtl w/o covariates
pos.in<-info$position[j]; chr.in<-info$chromosome[j]; id.in<-as.character(info$X[j])
model.pos.in<-statsout[statsout$phenotype==phenos[j],"position"]
model.chr.in<-statsout[statsout$phenotype==phenos[j],"chromosome"]
make.full.in<-makeqtl(grid,chr=model.chr.in,pos=model.pos.in, what="prob")
make.single.in<-makeqtl(grid,chr=chr.in,pos=pos.in, what="prob")
full.fit.nocovar<-fitqtl(grid,pheno.col=phenos[j],qtl=make.full.in)
single.fit.nocovar<-fitqtl(grid,pheno.col=phenos[j],qtl=make.single.in)
full.fit.nocovar.out<-as.data.frame(cbind(rownames(full.fit.nocovar$result.drop),full.fit.nocovar$result.drop,
model.pos.in,model.chr.in,phenos[j],"full.model","no.covar"))
names(full.fit.nocovar.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
single.fit.nocovar.out<-c(id.in,single.fit.nocovar$result.full[1,c(1,2,4,5,3,6,7)],
pos.in,chr.in,phenos[j],"single.model","no.covar")
names(single.fit.nocovar.out)<-names(full.fit.nocovar.out)
all.fit.covar.out<-data.frame();qtl.fit.covar.out<-data.frame()
for (i in colnames(exp.covars)[3:10]){ #fit qtl models w/ covariates and process output
full.fit<-fitqtl(grid,pheno.col=phenos[j],qtl=make.full.in, covar=data.frame(exp.covars[,i]))
single.fit<-fitqtl(grid,pheno.col=phenos[j],qtl=make.single.in, covar=data.frame(exp.covars[,i]))
full.fit.out<-as.data.frame(cbind(rownames(full.fit$result.drop),full.fit$result.drop,
c(model.pos.in,i),c(model.chr.in,i),phenos[j],"full.model",i))
names(full.fit.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
single.fit.out<-as.data.frame(cbind(rownames(single.fit$result.drop),single.fit$result.drop,
c(pos.in,i),c(chr.in,i),phenos[j],"single.model",i))
names(single.fit.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
all.fit.covar.out<-rbind(all.fit.covar.out,full.fit.out,single.fit.out)
}
outall.fit.covar<-rbind(outall.fit.covar,t(data.frame(single.fit.nocovar.out)),full.fit.nocovar.out,all.fit.covar.out)
}
write.csv(outall.fit.covar,file="dd2014_fitqtloutput.wcovars.csv",row.names=F)
#######################
#3.5 Process output, plot to show differences in effect
outall.fit.covar<-read.csv("dd2014_fitqtloutput.wcovars.csv",header=T)
point.fit<-outall.fit.covar[outall.fit.covar$qtl.id=="exp.covars...i." |
outall.fit.covar$qtl.id %in% as.character(info$X),]
#plot only the effects of the QTLs w/ and w/o covariates
point.fit.onlyqtleffect<-point.fit[point.fit$chromosome==5,]
names(point.fit.onlyqtleffect)[c(5,7:8)]<-c("perc.var","chi2.pval","f.pval")
head(point.fit.onlyqtleffect)
qtl.id df Type.III.SS LOD perc.var F.value chi2.pval f.pval
1 5@72.2 1 3.5858253 2.4971528 3.466057 3.585825 6.960e-04 7.298e-04
5 5@72.2 1 4.7315467 3.7075038 4.573511 17.259852 3.596e-05 4.185e-05
9 5@72.2 1 0.0042136 0.0039368 0.015739 0.017065 8.929e-01 8.963e-01
11 5@72.2 1 0.0003505 0.0003095 0.001309 0.001383 9.699e-01 9.704e-01
16 5@72.2 1 0.2217082 0.1964939 0.828151 0.855448 3.415e-01 3.573e-01
18 5@72.2 1 0.2186075 0.1834367 0.816569 0.823316 3.580e-01 3.664e-01
position chromosome phenotype model.type covar
1 72.2233333333333 5 C13.dry single.model no.covar
5 72.2233333333333 5 C13.dry full.model no.covar
9 72.2233333333333 5 C13.dry full.model AT5G55180.wet
11 72.2233333333333 5 C13.dry single.model AT5G55180.wet
16 72.2233333333333 5 C13.dry full.model AT5G57830.wet
18 72.2233333333333 5 C13.dry single.model AT5G57830.wet
point.cols <-c(rep(palette1,each=2),"black")
point.shapes<-c(rep(c(1,16),4),15)
ggplot(point.fit.onlyqtleffect,aes(x=LOD,y=perc.var,col=covar, shape=covar))+
geom_point()+
scale_color_manual(values=point.cols)+
scale_shape_manual(values=point.shapes)+
scale_y_log10()+scale_x_log10()+
theme_bw()+ ggtitle("effect size of focal QTL w/ and w/out covariates")+
facet_grid(phenotype~model.type)
#plot only the effects of the covariates
point.fit.onlycovar<-point.fit[!point.fit$chromosome==5,]
names(point.fit.onlycovar)[c(5,7:8)]<-c("perc.var","chi2.pval","f.pval")
head(point.fit.onlycovar)
qtl.id df Type.III.SS LOD perc.var F.value chi2.pval
10 exp.covars...i. 1 1.23753 1.12717 4.6226 5.0119 0.02271
12 exp.covars...i. 1 1.30387 1.12242 4.8704 5.1462 0.02299
17 exp.covars...i. 1 0.06098 0.05422 0.2278 0.2353 0.61730
19 exp.covars...i. 1 0.10035 0.08440 0.3749 0.3780 0.53300
24 exp.covars...i. 1 0.30747 0.27474 1.1485 1.1982 0.26066
26 exp.covars...i. 1 0.36688 0.31011 1.3704 1.3959 0.23207
f.pval position chromosome phenotype model.type
10 0.02748 AT5G55180.wet AT5G55180.wet C13.dry full.model
12 0.02547 AT5G55180.wet AT5G55180.wet C13.dry single.model
17 0.62874 AT5G57830.wet AT5G57830.wet C13.dry full.model
19 0.54011 AT5G57830.wet AT5G57830.wet C13.dry single.model
24 0.27642 AT5G58575.wet AT5G58575.wet C13.dry full.model
26 0.24025 AT5G58575.wet AT5G58575.wet C13.dry single.model
covar
10 AT5G55180.wet
12 AT5G55180.wet
17 AT5G57830.wet
19 AT5G57830.wet
24 AT5G58575.wet
26 AT5G58575.wet
ggplot(point.fit.onlycovar,aes(x=LOD,y=perc.var,col=covar, shape=covar))+
geom_point()+
scale_color_manual(values=point.cols)+
scale_shape_manual(values=point.shapes)+
#scale_y_log10()+scale_x_log10()+
ggtitle("effect size of covariates in models w/ a single and full set of QTLs")+
theme_bw()+ facet_grid(phenotype~model.type)