dd2014_qtlmappingstatisticsplotting_draft3.R

Lovell — Jul 22, 2014, 12:17 PM

#############################
### Title: Re-analysis of Drydown QTLs
### Author: JTLovell
### Date Created: 13-July 2014
### Modifications (Date):

#1. (Part II) 14:20-July 2014- Added gene expression data analysis
#2. (Part III) 21:22-July 2014- Added covariate qtl analysis data output


#############################
###Libraries:###
#############################
library(qtl)
library(snow)
library(rlecuyer)
library(plyr)
library(ggplot2)
library(reshape)

Attaching package: 'reshape'

The following objects are masked from 'package:plyr':

    rename, round_any

The following object is masked from 'package:qtl':

    condense
library(reshape2)

Attaching package: 'reshape2'

The following objects are masked from 'package:reshape':

    colsplit, melt, recast
library(qvalue)

Attaching package: 'qvalue'

The following object is masked from 'package:ggplot2':

    qplot
options(warn=-1)
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] qvalue_1.38.0  reshape2_1.4   reshape_0.8.5  ggplot2_1.0.0 
[5] plyr_1.8.1     rlecuyer_0.3-3 snow_0.3-13    qtl_1.32-10   
[9] knitr_1.6     

loaded via a namespace (and not attached):
 [1] colorspace_1.2-4 digest_0.6.4     evaluate_0.5.5   formatR_0.10    
 [5] grid_3.1.0       gtable_0.1.2     MASS_7.3-33      munsell_0.4.2   
 [9] parallel_3.1.0   proto_0.3-10     Rcpp_0.11.2      scales_0.2.4    
[13] stringr_0.6.2    tcltk_3.1.0      tools_3.1.0     
#############################
###Part I: QTL mapping of main effects (no epistasis incorporated yet)
#############################

#######################
#1.1 Set up the environment
rm(list=ls())
setwd("~/Desktop/Drydown2014")
cross<-read.cross("csvs",dir="",
                  genotypes=c("A","H"),
                  genfile="McKay_revised_map_1_27_12.csv",
                  phefile= "TKrils_full_phe.csv",
                  na.strings=c("NA","-", "#VALUE!","#NUM!"))
 --Read the following data:
     341  individuals
     168  markers
     38  phenotypes
 --Cross type: bc 
#######################
##1.2: Set class as recombinant inbred line (DH is not an option in R/qtl)
class(cross)[1] <-"riself"
summary(cross)
    RI strains via selfing

    No. individuals:    341 

    No. phenotypes:     38 
    Percent phenotyped: 78.7 

    No. chromosomes:    5 
        Autosomes:      1 2 3 4 5 

    Total markers:      168 
    No. markers:        41 33 26 32 36 
    Percent genotyped:  91.1 
    Genotypes (%):      AA:50.8  BB:49.2 
#######################
##1.3 cull matrix to include only high quality markers
#look at the genotypes... check for errors
par(mfrow=c(1,5),mar = c(2,0,2,0))
cross.errlod<-calc.errorlod(cross)
for (i in 1:5) plotGeno(cross.errlod,chr=i, include.xo=F, cutoff=4, main=paste("ch",i))

plot of chunk unnamed-chunk-1

# qtl analysis of regular map indicates that there is an error on ch4, between 45 and 46
print(toperr <- top.errorlod(cross.errlod, cutoff=4))
  chr  id    marker errorlod
1   5 182 5_4223381    4.833
2   5 194 5_4223381    4.669
3   5 154  MSAT5.14    4.387
4   5 133  MSAT5.14    4.387
5   4 108  MSAT4.35    4.211
6   2  55 2_4354761    4.165
cross.clean <- cross
for(i in 1:nrow(toperr)) {
  chr <- toperr$chr[i];  id <- toperr$id[i];  mar <- toperr$marker[i]
  cross.clean$geno[[chr]]$data[cross$pheno$id==id, mar] <- NA
}
for (i in 1:5) plotGeno(calc.errorlod(cross.clean),chr=i, include.xo=T,cutoff=4, main=paste("ch",i))

plot of chunk unnamed-chunk-1

#if this looks good, replace the old cross
#still problems with chr 4 44-48# 
##### Needs to be addressed ####
cross<-cross.clean


#######################
##1.4 make pseudomarker matrix
grid<-calc.genoprob(cross, step=1, error.prob=.0001, map.function="kosambi",
                    stepwidth="max", off.end=0)
mainphes<-names(cross$phe)[-c(1,seq(from=4,to=28,by=3),33,36)] #all environment-specific QTLs


#######################
#1.5 make cross/pseudomarker matrix with quantile normalized phenotypes
quantnorm<-function(x) {
  n=sum(!is.na(x),na.rm=T)
  x=rank(x)/(n+1)
  x=qnorm(x)
  x[is.infinite(x)]=NA
  x
}
cross.qn<-transformPheno(cross,pheno.col=mainphes,transf=quantnorm)
grid.qn<-calc.genoprob(cross.qn, step=1, error.prob=.0001, map.function="kosambi",
                             stepwidth="max", off.end=0)

#######################
#1.6 compare permutation results between groups
permtest.raw<-scanone(grid, method="hk",pheno.col=mainphes,n.perm=1000, verbose=F)
permtest.qn<-scanone(grid.qn, method="hk",pheno.col=mainphes,n.perm=1000, verbose=F)

permsout.raw<-data.frame(phe=character(),perm.raw.1=numeric(),perm.raw.05=numeric())
for (i in 1:length(mainphes)){
  phe<-mainphes[i]
  index.05<-(1:length(mainphes)*2)[i]-1
  index.1<-(1:length(mainphes)*2)[i]
  perm.raw.1<-summary(permtest.raw)[index.1]
  perm.raw.05<-summary(permtest.raw)[index.05]
  permsout.raw<-rbind(permsout.raw,data.frame(phe,perm.raw.1,perm.raw.05))
}
permsout.qn<-data.frame(phe=character(),perm.qn.1=numeric(),perm.qn.05=numeric())
for (i in 1:length(mainphes)){
  phe<-mainphes[i]
  index.05<-(1:length(mainphes)*2)[i]-1
  index.1<-(1:length(mainphes)*2)[i]
  perm.qn.1<-summary(permtest.qn)[index.1]
  perm.qn.05<-summary(permtest.qn)[index.05]
  permsout.qn<-rbind(permsout.qn,data.frame(phe,perm.qn.1,perm.qn.05))
}
outmaxlod<-data.frame(phe=character(),maxlod_raw=numeric(),maxlod_qn=numeric())
for (i in mainphes){
  phe<-i
  maxlod.raw<-max(scanone(grid, pheno.col=i, method="hk"))[1,3]
  maxlod.qn<-max(scanone(grid.qn, pheno.col=i, method="hk"))[1,3]
  outmaxlod<-rbind(outmaxlod,data.frame(phe,maxlod.raw,maxlod.qn))
}
anyqtls<-merge(permsout.raw,permsout.qn,by="phe")
anyqtls<-merge(anyqtls,outmaxlod,by="phe")
print(anyqtls) # the distribution of max scanone peaks and permutations
            phe perm.raw.1 perm.raw.05 perm.qn.1 perm.qn.05 maxlod.raw
1  ABAlyoph.dry      2.220       2.521     2.352      2.691     3.5237
2  ABAlyoph.wet      2.284       2.499     2.234      2.668     1.9033
3       C13.dry      2.265       2.607     2.295      2.600     2.9466
4       C13.wet      2.346       2.678     2.309      2.660     5.5038
5       FT.June      2.271       2.641     2.313      2.684    53.0746
6            g0      2.313       2.640     2.324      2.608     4.4961
7        GR.dry      2.333       2.618     2.335      2.633     3.7482
8  LeafArea.dry      2.288       2.642     2.230      2.497     2.4588
9  LeafArea.wet      2.373       2.656     2.325      2.742     2.7781
10  proline.dry      2.299       2.604     2.281      2.681    16.0433
11  proline.wet      2.125       2.386     2.284      2.567     1.5173
12   Rel.GR.wet      2.356       2.628     2.295      2.566     3.4307
13      RMR.dry      2.331       2.619     2.381      2.817     2.6379
14      RMR.wet      2.380       2.725     2.399      2.808     1.5504
15   RtMass.dry      2.429       2.744     2.427      2.784     2.3300
16   RtMass.wet      2.366       2.657     2.383      2.796     1.3313
17      S.R.dry      2.426       2.693     2.428      2.703     1.5904
18      S.R.wet      2.210       2.507     2.488      2.845     1.3005
19    ShtDW.dry      2.387       2.689     2.304      2.672     1.7355
20    ShtDW.wet      2.275       2.656     2.357      2.698     1.5474
21  ShtFrWt.dry      2.226       2.511     2.306      2.583     2.1435
22  ShtFrWt.wet      2.394       2.796     2.362      2.695     1.8202
23       WC.dry      2.434       2.765     2.329      2.641     2.3468
24       WC.wet      2.339       2.722     2.358      2.674     0.9362
25   Wilted.dry      2.368       2.691     2.350      2.713     2.7511
26   Wilted.wet      2.050       2.248     1.958      2.187     1.2578
   maxlod.qn
1     2.4009
2     1.4075
3     2.9618
4     5.6963
5    53.7338
6     4.5293
7     3.5959
8     2.4290
9     2.6025
10   16.4985
11    2.2366
12    2.8481
13    2.3096
14    1.3736
15    2.2925
16    0.9848
17    1.5294
18    1.1509
19    1.6709
20    1.4349
21    2.2253
22    1.9064
23    2.2975
24    1.0334
25    2.7244
26    1.2578
#######################
#1.7 Choose the phenotypes to do the mapping
anyqtls$qtl.qn.05<-anyqtls$maxlod.qn>=anyqtls$perm.qn.05
anyqtls$qtl.raw.05<-anyqtls$maxlod.raw>=anyqtls$perm.raw.05
anyqtls$qtl.qn.1<-anyqtls$maxlod.qn>=anyqtls$perm.qn.1
anyqtls$qtl.raw.1<-anyqtls$maxlod.raw>=anyqtls$perm.raw.1
print(anyqtls[c(1,8:11)])
            phe qtl.qn.05 qtl.raw.05 qtl.qn.1 qtl.raw.1
1  ABAlyoph.dry     FALSE       TRUE     TRUE      TRUE
2  ABAlyoph.wet     FALSE      FALSE    FALSE     FALSE
3       C13.dry      TRUE       TRUE     TRUE      TRUE
4       C13.wet      TRUE       TRUE     TRUE      TRUE
5       FT.June      TRUE       TRUE     TRUE      TRUE
6            g0      TRUE       TRUE     TRUE      TRUE
7        GR.dry      TRUE       TRUE     TRUE      TRUE
8  LeafArea.dry     FALSE      FALSE     TRUE      TRUE
9  LeafArea.wet     FALSE       TRUE     TRUE      TRUE
10  proline.dry      TRUE       TRUE     TRUE      TRUE
11  proline.wet     FALSE      FALSE    FALSE     FALSE
12   Rel.GR.wet      TRUE       TRUE     TRUE      TRUE
13      RMR.dry     FALSE       TRUE    FALSE      TRUE
14      RMR.wet     FALSE      FALSE    FALSE     FALSE
15   RtMass.dry     FALSE      FALSE    FALSE     FALSE
16   RtMass.wet     FALSE      FALSE    FALSE     FALSE
17      S.R.dry     FALSE      FALSE    FALSE     FALSE
18      S.R.wet     FALSE      FALSE    FALSE     FALSE
19    ShtDW.dry     FALSE      FALSE    FALSE     FALSE
20    ShtDW.wet     FALSE      FALSE    FALSE     FALSE
21  ShtFrWt.dry     FALSE      FALSE    FALSE     FALSE
22  ShtFrWt.wet     FALSE      FALSE    FALSE     FALSE
23       WC.dry     FALSE      FALSE    FALSE     FALSE
24       WC.wet     FALSE      FALSE    FALSE     FALSE
25   Wilted.dry      TRUE       TRUE     TRUE      TRUE
26   Wilted.wet     FALSE      FALSE    FALSE     FALSE
phes_wqtls<-as.character(anyqtls$phe[anyqtls$qtl.raw.05])
phes_wqtls #these are the traits with qtl at a=0.05 w/ raw phenotypes
 [1] "ABAlyoph.dry" "C13.dry"      "C13.wet"      "FT.June"     
 [5] "g0"           "GR.dry"       "LeafArea.wet" "proline.dry" 
 [9] "Rel.GR.wet"   "RMR.dry"      "Wilted.dry"  
#######################
#1.8 compare scanone of phenotypes between qn and raw
par(mfrow=c(3,1),mar = c(2,4,2,0))
for (i in phes_wqtls){
  s1.raw<-scanone(grid, pheno.col=i)
  s1.qn<-scanone(grid.qn, pheno.col=i)
  plot(s1.raw, ylim=c(0,max(max(s1.qn$lod),(.5+max(s1.raw$lod)))), 
       main=paste(i, "  red: quantile normalized"), mar=c(0,0,0,0))
  plot(s1.qn, col="red", lty=2, add=T)
}

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

par(mfrow=c(3,1),mar = c(2,4,2,0))

plot of chunk unnamed-chunk-1

min.thresh<-min(anyqtls$perm.raw.05) #most lenient QTL selection criteria

#######################
#1.9 heavy and light penalties from previous analyses
pens<-c(min.thresh,3.2305306,1.68988784)
#qtlphes<-as.character(outmaxlod[outmaxlod$maxlod>=pens[1],]$i)
qtlphes<-phes_wqtls #these are the traits w/qtls
#set up the environment to write statistics to

#######################
#1.10  multiple QTL model generation and statistical analysis
stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
                      df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
                      effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
                      lowCImarker=character(), hiCImarker=character(),
                      lowCIpos=character(), hiCIpos=character())
statcols<-names(stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
for (i in qtlphes){
  #print(i)
  stepout<-stepwiseqtl(grid,pheno.col=i, penalties=pens, max.qtl=10,method="hk",
                   refine.locations=T,additive.only=T, keeplodprofile=T,keeptrace=T,verbose=F)
  plotLodProfile(stepout,main=i)
  fit<-fitqtl(grid,pheno.col=i,qtl=stepout,formula=formula(stepout),get.ests=T,dropone=T)
  nqtls<-nqtl(stepout)
  if(nqtls>1){
    ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
    cis<-data.frame()
    for (j in 1:nqtls){
      ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
      lowmarker<-rownames(ciout)[1];  highmarker<-rownames(ciout)[3]
      lowposition<-ciout[1,2];  highposition<-ciout[3,2]
      cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
    }
    stats<-data.frame(rep(i,nqtls),stepout$chr,stepout$pos,fit$result.drop,ests_out,cis)
    colnames(stats)<-statcols
  }
  else{
    cis<-data.frame()
    for (j in 1:nqtls){
      ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
      lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
      lowposition<-ciout[1,2]; highposition<-ciout[3,2]
      cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
    }
    ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
    stats<-data.frame(c(i,stepout$chr[1],stepout$pos[1],as.numeric(fit$result.full[1,c(1,2,4,5,3,6,7)]),as.numeric(ests_out),cis[1,]))
    colnames(stats)<-statcols; rownames(stats)<-stepout$name
  }
  stats_out<-rbind(stats_out,stats)
}

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

par(mfrow=c(4,1),mar = c(2,4,2,1))

plot of chunk unnamed-chunk-1

write.csv(stats_out, file="dd2014_allqtlstatistics.csv")
print(stats_out) # this is the statistical output in a dataframe
           phenotype chromosome position df   type3SS    LOD perc.var
2@13.5  ABAlyoph.dry          2   13.469  1 2.620e+02  3.524    5.006
2@70.3       C13.dry          2   70.323  1 4.412e+00  3.463    4.265
3@47.5       C13.dry          3   47.460  1 4.554e+00  3.572    4.402
4@37.0       C13.dry          4   37.031  1 3.426e+00  2.703    3.311
5@72.2       C13.dry          5   72.223  1 4.732e+00  3.708    4.574
1@60.8       C13.wet          1   60.778  1 2.074e+00  3.009    3.029
3@15.6       C13.wet          3   15.551  1 1.726e+00  2.514    2.522
3@90.2       C13.wet          3   90.218  1 2.630e+00  3.795    3.842
4@1.0        C13.wet          4    0.951  1 5.761e+00  8.069    8.417
4@45.1       C13.wet          4   45.123  1 2.723e+00  3.927    3.979
5@11.8       C13.wet          5   11.761  1 3.396e+00  4.865    4.962
5@73.2       C13.wet          5   73.162  1 4.576e+00  6.481    6.685
1@30.6       FT.June          1   30.579  1 4.181e+02 13.534    5.905
1@31.5       FT.June          1   31.547  1 3.053e+02 10.121    4.312
1@83.6       FT.June          1   83.628  1 3.535e+02 11.599    4.993
4@0.0        FT.June          4    0.000  1 4.350e+03 83.286   61.440
4@65.9       FT.June          4   65.929  1 2.112e+02  7.149    2.984
5@15.2       FT.June          5   15.168  1 1.075e+02  3.726    1.519
5@69.4       FT.June          5   69.411  1 3.423e+02 11.258    4.835
1@0.0             g0          1    0.000  1 6.189e-03  5.476    7.789
1@99.3            g0          1   99.350  1 2.968e-03  2.686    3.735
2@31.3            g0          2   31.312  1 4.062e-03  3.648    5.112
3@25.6        GR.dry          3   25.589  1 1.436e+02  3.748    5.283
4@45.11 LeafArea.wet          4   45.123  1 6.686e+02  2.528    3.264
4@46.0  LeafArea.wet          4   46.041  1 1.146e+03  4.282    5.594
2@30.4   proline.dry          2   30.360  1 3.268e+04  2.518    2.472
2@65.6   proline.dry          2   65.575  1 2.765e+05 18.903   20.922
3@63.4   proline.dry          3   63.395  1 5.096e+04  3.888    3.855
4@1.9    proline.dry          4    1.902  1 4.393e+04  3.365    3.324
4@81.2   proline.dry          4   81.217  1 3.175e+04  2.448    2.402
3@86.6    Rel.GR.wet          3   86.605  1 3.776e-02  3.431    4.862
4@1.01       RMR.dry          4    0.951  1 7.906e-03  2.638   18.329
2@14.3    Wilted.dry          2   14.325  1 6.583e+01  2.885    3.721
4@43.4    Wilted.dry          4   43.385  1 9.915e+01  4.301    5.605
4@48.0    Wilted.dry          4   47.996  1 1.411e+02  6.045    7.979
            Fstat    P.chi2       P.F effect.estimate effect.SE effect.t
2@13.5  2.620e+02 5.618e-05 6.011e-05       -0.982492  0.241537   -4.068
2@70.3  1.609e+01 6.510e-05 7.505e-05        0.119176  0.029707    4.012
3@47.5  1.661e+01 4.997e-05 5.785e-05       -0.120547  0.029576   -4.076
4@37.0  1.250e+01 4.181e-04 4.678e-04       -0.104069  0.029440   -3.535
5@72.2  1.726e+01 3.596e-05 4.185e-05        0.137703  0.033145    4.154
1@60.8  1.381e+01 1.973e-04 2.381e-04        0.088624  0.023849    3.716
3@15.6  1.150e+01 6.680e-04 7.828e-04        0.076349  0.022517    3.391
3@90.2  1.751e+01 2.910e-05 3.677e-05       -0.090149  0.021543   -4.185
4@1.0   3.836e+01 1.088e-09 1.767e-09       -0.137756  0.022241   -6.194
4@45.1  1.814e+01 2.115e-05 2.694e-05       -0.094149  0.022108   -4.259
5@11.8  2.262e+01 2.211e-06 2.975e-06       -0.106421  0.022378   -4.756
5@73.2  3.047e+01 4.675e-08 6.916e-08        0.136853  0.024791    5.520
1@30.6  6.681e+01 2.887e-15 6.439e-15        3.539286  0.433010    8.174
1@31.5  4.878e+01 8.673e-12 1.574e-11       -2.935921  0.420343   -6.985
1@83.6  5.649e+01 2.702e-13 5.340e-13        1.089161  0.144917    7.516
4@0.0   6.951e+02 0.000e+00 0.000e+00       -3.669350  0.139176  -26.365
4@65.9  3.376e+01 9.594e-09 1.467e-08       -0.846001  0.145608   -5.810
5@15.2  1.718e+01 3.441e-05 4.316e-05       -0.582742  0.140580   -4.145
5@69.4  5.470e+01 6.007e-13 1.164e-12        1.098149  0.148484    7.396
1@0.0   2.599e+01 5.117e-07 6.280e-07        0.004986  0.000978    5.098
1@99.3  1.246e+01 4.361e-04 4.839e-04        0.003492  0.000989    3.531
2@31.3  1.706e+01 4.153e-05 4.771e-05       -0.004837  0.001171   -4.130
3@25.6  1.436e+02 3.258e-05 3.499e-05       -0.754263  0.179655   -4.198
4@45.11 1.174e+01 6.441e-04 6.872e-04       -4.037755  1.178345   -3.427
4@46.0  2.012e+01 8.975e-06 9.987e-06        5.242976  1.168740    4.486
2@30.4  1.159e+01 6.612e-04 7.496e-04      -13.499903  3.965976   -3.404
2@65.6  9.806e+01 0.000e+00 0.000e+00       33.785459  3.411873    9.902
3@63.4  1.807e+01 2.322e-05 2.806e-05      -14.560774  3.425481   -4.251
4@1.9   1.558e+01 8.273e-05 9.756e-05      -11.797479  2.989070   -3.947
4@81.2  1.126e+01 7.868e-04 8.891e-04      -10.008236  2.982804   -3.355
3@86.6  3.776e-02 7.043e-05 7.522e-05       -0.011273  0.002810   -4.012
4@1.01  7.906e-03 4.914e-04 6.441e-04        0.011850  0.003285    3.608
2@14.3  1.339e+01 2.675e-04 2.955e-04        0.502782  0.137376    3.660
4@43.4  2.018e+01 8.569e-06 9.908e-06       -1.281149  0.285223   -4.492
4@48.0  2.872e+01 1.319e-07 1.613e-07        1.540130  0.287392    5.359
        lowCImarker  hiCImarker lowCIpos hiCIpos
2@13.5     2_518639   2_9256494    7.378  40.829
2@70.3   2-15986145  2_17844202   67.111  72.187
3@47.5      MSAT3.5  3.F1P2-TGF   40.691  68.384
4@37.0    4_4074232  4-17608717   26.868  81.217
5@72.2   5-22414510  5_26121185   68.476  83.371
1@60.8   1-17355484    1.NGA128   52.749  70.616
3@15.6     3.nga162     MSAT3.6        0  38.289
3@90.2   3_21728447  3-20482976   87.311  94.929
4@1.0         4.FRI   4_2126308        0  24.714
4@45.1     MSAT4.16  4-12403706   40.399  57.095
5@11.8     5.MUK11D    5.NGA106    6.776  15.168
5@73.2   5-22414510  5_25301310   68.476  79.489
1@30.6    1-9343234 1-9343234     30.579  30.579
1@31.5    1-9496666 1-9496666     31.547  31.547
1@83.6     1.NGA128     MSAT1.1   70.616  92.834
4@0.0         4.FRI     4.FRI          0       0
4@65.9   4_12900428    MSAT4.30   59.969  74.562
5@15.2    5_2799359   5-7278798   10.909  25.597
5@69.4   5-22414510  5-25301036   68.476  75.978
1@0.0      1_444764    1_112908        0   3.027
1@99.3      1.F5I14    1.NGA692   82.707 104.043
2@31.3     2_518639   2_9256494    7.378  40.829
3@25.6     3_620734   3-7391896    6.849  31.898
4@45.11   4_5726702   4-8078388   31.979  45.123
4@46.0    4-8297594  4-10482038   46.041  49.952
2@30.4    2-2477306   2_9256494   18.081  40.829
2@65.6   2_14801461  2-15986145   64.807  67.111
3@63.4      3.GAPHX  3.F1P2-TGF    47.46  68.384
4@1.9         4.FRI   4_2126308        0  24.714
4@81.2     4_208623  4-17608717    1.902  81.217
3@86.6    3-9747158  3-21632379   51.096  96.869
4@1.01        4.FRI   4_2126308        0  24.714
2@14.3     2_518639   2_9256494    7.378  40.829
4@43.4     MSAT4.16   4-8078388   40.399  45.123
4@48.0    4-8297594  4-10482038   46.041  49.952
#######################
#1.11 Processing and plotting the QTL statistics
#Read in all relevant data
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T) #statistics from Part 1

#######################
#1.12 Plot some phenotypes, determine correlations between phenotypes
phematrix<-cross$phe[,qtlphes]
distmatrix<-dist(t(phematrix))
hc<-hclust(distmatrix)
par(mfrow=c(1,1))
plot(hc)#plot the similarity of phenotypes

plot of chunk unnamed-chunk-1

#reorder the phenotypes so that they are clustered by distance
statsout$phenotype <- factor(statsout$phenotype,
                       levels = names(phematrix)[hc$order])

#######################
#1.13plot the distribution of QTL
#
ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
  geom_point() +   scale_y_continuous("t statistic for QTL point estimate")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position,y=P.F))+
  geom_point() +   scale_y_log10("p.value of F statistic")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position))+
  geom_histogram(binwidth=20) + 
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution and density of QTL point estimates")

plot of chunk unnamed-chunk-1

#############################
#############################
# Part 2: Gene expression analysis
#############################
#############################
##2.1 set up the gene expression environment
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T)
cmgenpos<-read.csv("gene_locations_new_McKay_map.csv",header=T,
                   na.strings="NA") #physical position of all genes
geneexp<-read.csv("May_SNP_5_4_12.csv",header=T) #gene expression of all AT genes in drydown
idnames<-read.csv("dd2014_markernametoid.csv",header=T)
annotations<-read.table("Functional_annotation_list.txt",sep="\t",header=F)
colnames(annotations)<-c("gene","annotation")
#make a super dense grid to extract pseudomarker probs
densegrid<-calc.genoprob(cross, step=.1, stepwidth="fixed")


#######################
##2.2 determine the cM positions of all genes in region
# the following steps must be conducted manually for each region of interest
#here are some potential regions of interest
qtls5.2<-statsout[statsout$chromosome == 5 & statsout$position > 60,]
ggplot(qtls5.2,aes(x=position,y=as.numeric(effect.t)))+
  geom_point() +   scale_y_continuous("t statistic for QTL point estimate")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(phenotype~., scales="free_y", margins=F)+
  ggtitle("95% bayesian CI and point estimates for QTL in region 5.2 ")+ 
  theme_bw()

plot of chunk unnamed-chunk-1

print(qtl5.2_ci<-data.frame(max(qtls5.2$lowCIpos),min(qtls5.2$hiCIpos)))
  max.qtls5.2.lowCIpos. min.qtls5.2.hiCIpos.
1                 68.48                75.98
#######################
##2.3 Set parameters for each manually
#here change the region of interest
#i am opperating on the premise that the region should include all cm of the shortest confidence
#interval that overlaps all point estimates; see plots and summaries above 
region<-qtl5.2_ci #defined above, change to desired interval
info<-qtls5.2 #defined above, change to desired interval
#  cull the gene list to this region
genes<-cmgenpos[cmgenpos$Chr == info$chromosome[1] & 
                  cmgenpos$cM > region[1,1] &  cmgenpos$cM < region[1,2],]

##2.4 This is the engine that calculates differential expression for each gene
#this outputs a list of statistics, including the t test means and p-values
genexpstats<-data.frame(gene=character(),chr=numeric(),pos.bp=numeric(),pos.cm=numeric(),
                        dry_estAA=numeric(),dry_estBB=numeric(),dry_pval=numeric(),
                        wet_estAA=numeric(),wet_estBB=numeric(),wet_pval=numeric())
gen_woexp<-data.frame(gene=character)
count<-1
#pb <- txtProgressBar(min = 0, max = length(as.character(genes$Gene)), style = 3)
for (i in as.character(genes$Gene)){
  #setTxtProgressBar(pb, count) #sets a progress bar (not run)
  genein<-genes[genes$Gene==i,] #subsets data by genes in region
  chpos<-paste(as.numeric(genein[4]),"@",round(as.numeric(genein[6]),1),sep="")
  outgeno<-as.numeric(formMarkerCovar(densegrid,chpos,method="prob"))
  #get pseduomarker genotype probabilities for the nearest pseduomarker
  outgeno[outgeno>.05 & outgeno <.95]<-NA
  #this is important--- set genotype probabilities of >95% to the alleles, set the rest to missing
  outgeno<-cbind(idnames,as.data.frame(ifelse(outgeno>=.95,"BB","AA")))
  colnames(outgeno)[2:3]<-c("line","geno")
  #below, calculate the statistics as long as we have gene expression data
  if (i %in% colnames(geneexp)){
    outexp<-geneexp[,c("cyto","treatment","line",i)]
    allin<-merge(outgeno,outexp,by="line")
    colnames(allin)[6]<-"exp"
    wet<-allin[allin$treatment=="wet",]
    dry<-allin[allin$treatment=="dry",]
    tdry<-t.test(dry$exp~dry$geno)
    twet<-t.test(wet$exp~wet$geno)
    ttest_out<-data.frame(i,as.numeric(genein[4]),as.numeric(genein[5]),as.numeric(genein[6]),
                          tdry$estimate[1],tdry$estimate[2],tdry$p.value,
                          twet$estimate[1],twet$estimate[2],twet$p.value)
    colnames(ttest_out)<-c("gene","chr","pos.bp","pos.cm","dry_estAA","dry_estBB","dry_pval"
                           ,"wet_estAA","wet_estBB","wet_pval")
    rownames(ttest_out)<-count
    genexpstats<-rbind(genexpstats,ttest_out)
    count<-count+1
  }
  else{gen_woexp<-rbind(gen_woexp,i)}
}

#######################
##2.5 Correct pvalue statistics for number and distribution of comparisons using "qvalues"
genexpstats$qval_dry<-qvalue(genexpstats$dry_pval)$qvalues
genexpstats$qval_wet<-qvalue(genexpstats$wet_pval)$qvalues
sig_exp<-genexpstats[genexpstats$qval_dry<0.05 | genexpstats$qval_wet<0.05,]
cat("total number of significant genes = ", length(sig_exp[,1]), "out of", count)
total number of significant genes =  30 out of 657
sigexp_matcheffect<-genexpstats[genexpstats$qval_dry<0.05 & genexpstats$qval_wet<0.05,]
print(genesofinterest<-as.character(sigexp_matcheffect[,1]))
[1] "AT5G55180" "AT5G57830" "AT5G58575" "AT5G61410"
#######################
# 2.6 make dataframe of gene expression to use as covariates
geneexp.culled<-geneexp[,c("treatment","line",genesofinterest)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
# are the ids in the gene expression matrix and phenotype matrix identical
# IF FALSE, STOP!!! Figure out why not
identical(as.numeric(exp.covars$id),as.numeric(grid$phe$id))
[1] TRUE
#######################
#######################
# Part 3: Visualization and analysis of the effects of gene expression
#######################
#######################

#######################
#3.1 run all scanones, add to dataframe
phenos<-as.character(unique(info$phenotype))
outall<-data.frame(chr=numeric(), pos=numeric(),lod=numeric(),
                   phe=character(),covar=character(),
                   gene=character(),expression_env=character())
for (i in phenos){
  temp<-cbind(as.data.frame(scanone(grid,pheno.col=i)),i,"nocovar","nocovar","nocovar")
  colnames(temp)[4:7]<-c("phe","covar","gene","expression_env")
  outall<-rbind(outall,temp)
}

#######################
#3.2 do for each gene expression trait as a covariate
for (i in colnames(exp.covars)[3:10]){
  for (j in phenos){
    s1.out<-scanone(grid,pheno.col=j,addcovar=exp.covars[,i])
    s1.out<-cbind(as.data.frame(s1.out),as.character(j),as.character(i),
                  unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
    colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
    outall<-rbind(outall,s1.out)
  }
  print(i)
}
[1] "AT5G55180.wet"
[1] "AT5G57830.wet"
[1] "AT5G58575.wet"
[1] "AT5G61410.wet"
[1] "AT5G55180.dry"
[1] "AT5G57830.dry"
[1] "AT5G58575.dry"
[1] "AT5G61410.dry"
summary(outall)
 chr           pos             lod             phe      
 1:3456   Min.   :  0.0   Min.   : 0.00   C13.dry:4851  
 2:2673   1st Qu.: 20.9   1st Qu.: 0.05   C13.wet:4851  
 3:2970   Median : 44.2   Median : 0.24   FT.June:4851  
 4:2592   Mean   : 45.2   Mean   : 0.77                 
 5:2862   3rd Qu.: 67.9   3rd Qu.: 0.66                 
          Max.   :107.4   Max.   :53.28                 

           covar             gene      expression_env
 nocovar      :1617   nocovar  :1617   nocovar:1617  
 AT5G55180.wet:1617   AT5G55180:3234   wet    :6468  
 AT5G57830.wet:1617   AT5G57830:3234   dry    :6468  
 AT5G58575.wet:1617   AT5G58575:3234                 
 AT5G61410.wet:1617   AT5G61410:3234                 
 AT5G55180.dry:1617                                  
 (Other)      :4851                                  
#######################
#3.3 plot the scanones w/ and w/o covariates on the specified chromosome
out.s1.ch5<-outall[outall$chr==info$chromosome[1],]
palette1<-c("darkred","green","blue","violet")
line.cols <- c("lightgrey",rep(palette1,2))
line.width<-c(2,rep(.5,8))
line.shapes<-c(rep(1,5),rep(2,4))
#for just ch 5
ggplot(out.s1.ch5,aes(x=pos,y=lod))+
  geom_line(aes(col=covar,size=covar,linetype=covar))+
  scale_color_manual(values=line.cols)+
  scale_linetype_manual(values=line.shapes)+
  scale_size_manual(values=line.width)+
  facet_grid(phe~.)+
  ggtitle("QTL 5.2: scanones on CH5 w/ and w/o gene expression covariates")+
  theme_bw()

plot of chunk unnamed-chunk-1

#for all chromosomes
ggplot(outall,aes(x=pos,y=lod))+
  geom_line(aes(col=covar,size=covar,linetype=covar))+
  scale_color_manual(values=line.cols)+
  scale_linetype_manual(values=line.shapes)+
  scale_size_manual(values=line.width)+
  facet_grid(phe~chr,scales="free_y")+
  ggtitle("QTL 5.2: full scanones w/ and w/o gene expression covariates")+
  theme_bw()

plot of chunk unnamed-chunk-1

#######################
#3.4 fit models w/ and w/o covariates
outall.fit.covar<-data.frame()
for (j in 1:length(phenos)){  #1st make, fit and process qtl w/o covariates
  pos.in<-info$position[j];  chr.in<-info$chromosome[j];  id.in<-as.character(info$X[j])
  model.pos.in<-statsout[statsout$phenotype==phenos[j],"position"]
  model.chr.in<-statsout[statsout$phenotype==phenos[j],"chromosome"]
  make.full.in<-makeqtl(grid,chr=model.chr.in,pos=model.pos.in, what="prob")
  make.single.in<-makeqtl(grid,chr=chr.in,pos=pos.in, what="prob")
  full.fit.nocovar<-fitqtl(grid,pheno.col=phenos[j],qtl=make.full.in)
  single.fit.nocovar<-fitqtl(grid,pheno.col=phenos[j],qtl=make.single.in)
  full.fit.nocovar.out<-as.data.frame(cbind(rownames(full.fit.nocovar$result.drop),full.fit.nocovar$result.drop,
                                    model.pos.in,model.chr.in,phenos[j],"full.model","no.covar"))
  names(full.fit.nocovar.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
  single.fit.nocovar.out<-c(id.in,single.fit.nocovar$result.full[1,c(1,2,4,5,3,6,7)],
                                      pos.in,chr.in,phenos[j],"single.model","no.covar")
  names(single.fit.nocovar.out)<-names(full.fit.nocovar.out)
  all.fit.covar.out<-data.frame();qtl.fit.covar.out<-data.frame()
  for (i in colnames(exp.covars)[3:10]){    #fit qtl models w/ covariates and  process output
    full.fit<-fitqtl(grid,pheno.col=phenos[j],qtl=make.full.in,  covar=data.frame(exp.covars[,i]))
    single.fit<-fitqtl(grid,pheno.col=phenos[j],qtl=make.single.in,  covar=data.frame(exp.covars[,i]))
    full.fit.out<-as.data.frame(cbind(rownames(full.fit$result.drop),full.fit$result.drop,
        c(model.pos.in,i),c(model.chr.in,i),phenos[j],"full.model",i))
    names(full.fit.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
    single.fit.out<-as.data.frame(cbind(rownames(single.fit$result.drop),single.fit$result.drop,
        c(pos.in,i),c(chr.in,i),phenos[j],"single.model",i))
    names(single.fit.out)[c(1,9:13)]<-c("qtl.id","position","chromosome","phenotype","model.type","covar")
    all.fit.covar.out<-rbind(all.fit.covar.out,full.fit.out,single.fit.out)
  }
  outall.fit.covar<-rbind(outall.fit.covar,t(data.frame(single.fit.nocovar.out)),full.fit.nocovar.out,all.fit.covar.out)
}
write.csv(outall.fit.covar,file="dd2014_fitqtloutput.wcovars.csv",row.names=F)

#######################
#3.5 Process output, plot to show differences in effect
outall.fit.covar<-read.csv("dd2014_fitqtloutput.wcovars.csv",header=T)
point.fit<-outall.fit.covar[outall.fit.covar$qtl.id=="exp.covars...i." |
                              outall.fit.covar$qtl.id %in% as.character(info$X),]
#plot only the effects of the QTLs w/ and w/o covariates
point.fit.onlyqtleffect<-point.fit[point.fit$chromosome==5,]
names(point.fit.onlyqtleffect)[c(5,7:8)]<-c("perc.var","chi2.pval","f.pval")
head(point.fit.onlyqtleffect)
   qtl.id df Type.III.SS       LOD perc.var   F.value chi2.pval    f.pval
1  5@72.2  1   3.5858253 2.4971528 3.466057  3.585825 6.960e-04 7.298e-04
5  5@72.2  1   4.7315467 3.7075038 4.573511 17.259852 3.596e-05 4.185e-05
9  5@72.2  1   0.0042136 0.0039368 0.015739  0.017065 8.929e-01 8.963e-01
11 5@72.2  1   0.0003505 0.0003095 0.001309  0.001383 9.699e-01 9.704e-01
16 5@72.2  1   0.2217082 0.1964939 0.828151  0.855448 3.415e-01 3.573e-01
18 5@72.2  1   0.2186075 0.1834367 0.816569  0.823316 3.580e-01 3.664e-01
           position chromosome phenotype   model.type         covar
1  72.2233333333333          5   C13.dry single.model      no.covar
5  72.2233333333333          5   C13.dry   full.model      no.covar
9  72.2233333333333          5   C13.dry   full.model AT5G55180.wet
11 72.2233333333333          5   C13.dry single.model AT5G55180.wet
16 72.2233333333333          5   C13.dry   full.model AT5G57830.wet
18 72.2233333333333          5   C13.dry single.model AT5G57830.wet
point.cols <-c(rep(palette1,each=2),"black")
point.shapes<-c(rep(c(1,16),4),15)
ggplot(point.fit.onlyqtleffect,aes(x=LOD,y=perc.var,col=covar, shape=covar))+
  geom_point()+ 
  scale_color_manual(values=point.cols)+
  scale_shape_manual(values=point.shapes)+
  scale_y_log10()+scale_x_log10()+
  theme_bw()+ ggtitle("effect size of focal QTL w/ and w/out covariates")+
  facet_grid(phenotype~model.type)

plot of chunk unnamed-chunk-1

#plot only the effects of the covariates
point.fit.onlycovar<-point.fit[!point.fit$chromosome==5,]
names(point.fit.onlycovar)[c(5,7:8)]<-c("perc.var","chi2.pval","f.pval")
head(point.fit.onlycovar)
            qtl.id df Type.III.SS     LOD perc.var F.value chi2.pval
10 exp.covars...i.  1     1.23753 1.12717   4.6226  5.0119   0.02271
12 exp.covars...i.  1     1.30387 1.12242   4.8704  5.1462   0.02299
17 exp.covars...i.  1     0.06098 0.05422   0.2278  0.2353   0.61730
19 exp.covars...i.  1     0.10035 0.08440   0.3749  0.3780   0.53300
24 exp.covars...i.  1     0.30747 0.27474   1.1485  1.1982   0.26066
26 exp.covars...i.  1     0.36688 0.31011   1.3704  1.3959   0.23207
    f.pval      position    chromosome phenotype   model.type
10 0.02748 AT5G55180.wet AT5G55180.wet   C13.dry   full.model
12 0.02547 AT5G55180.wet AT5G55180.wet   C13.dry single.model
17 0.62874 AT5G57830.wet AT5G57830.wet   C13.dry   full.model
19 0.54011 AT5G57830.wet AT5G57830.wet   C13.dry single.model
24 0.27642 AT5G58575.wet AT5G58575.wet   C13.dry   full.model
26 0.24025 AT5G58575.wet AT5G58575.wet   C13.dry single.model
           covar
10 AT5G55180.wet
12 AT5G55180.wet
17 AT5G57830.wet
19 AT5G57830.wet
24 AT5G58575.wet
26 AT5G58575.wet
ggplot(point.fit.onlycovar,aes(x=LOD,y=perc.var,col=covar, shape=covar))+
  geom_point()+ 
  scale_color_manual(values=point.cols)+
  scale_shape_manual(values=point.shapes)+
  #scale_y_log10()+scale_x_log10()+
  ggtitle("effect size of covariates in models w/ a single and full set of QTLs")+
  theme_bw()+ facet_grid(phenotype~model.type)

plot of chunk unnamed-chunk-1