dd2014_finalqtlanalysis_modelselection.R

johnlovell — Jul 25, 2014, 9:13 PM

#############################
###Libraries:###
#############################
library(qtl)
library(snow)
library(rlecuyer)
library(plyr)
library(ggplot2)
library(reshape)

Attaching package: 'reshape'

The following objects are masked from 'package:plyr':

    rename, round_any

The following object is masked from 'package:qtl':

    condense
library(reshape2)

Attaching package: 'reshape2'

The following objects are masked from 'package:reshape':

    colsplit, melt, recast
library(qvalue)
Warning: couldn't connect to display ":0"

Attaching package: 'qvalue'

The following object is masked from 'package:ggplot2':

    qplot
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following object is masked from 'package:reshape':

    expand

Loading required package: Rcpp
library(nlme)

Attaching package: 'nlme'

The following object is masked from 'package:lme4':

    lmList
library(car)
library(doBy)
Loading required package: survival
Loading required package: splines
Loading required package: MASS
library(scales)
options(warn=-1)
sessionInfo()
R version 3.0.2 (2013-09-25)
Platform: x86_64-redhat-linux-gnu (64-bit)

locale:
 [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C         LC_TIME=C           
 [4] LC_COLLATE=C         LC_MONETARY=C        LC_MESSAGES=C       
 [7] LC_PAPER=C           LC_NAME=C            LC_ADDRESS=C        
[10] LC_TELEPHONE=C       LC_MEASUREMENT=C     LC_IDENTIFICATION=C 

attached base packages:
[1] splines   stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] scales_0.2.4    doBy_4.5-10     MASS_7.3-33     survival_2.37-7
 [5] car_2.0-20      nlme_3.1-117    lme4_1.1-7      Rcpp_0.11.2    
 [9] Matrix_1.1-4    qvalue_1.36.0   reshape2_1.4    reshape_0.8.5  
[13] ggplot2_1.0.0   plyr_1.8.1      rlecuyer_0.3-3  snow_0.3-13    
[17] qtl_1.32-10     knitr_1.6      

loaded via a namespace (and not attached):
 [1] colorspace_1.2-4 digest_0.6.4     evaluate_0.5.5   formatR_0.10    
 [5] grid_3.0.2       gtable_0.1.2     lattice_0.20-29  minqa_1.2.3     
 [9] munsell_0.4.2    nloptr_1.0.0     nnet_7.3-8       parallel_3.0.2  
[13] proto_0.3-10     stringr_0.6.2    tcltk_3.0.2      tools_3.0.2     
#############################
###Part I: QTL mapping of main effects (no epistasis incorporated yet)
#############################

#######################
#1.1 Set up the environment
rm(list=ls())
cross<-read.cross("csvs",dir="",
                  genotypes=c("a","b"),
                  genfile="TKrils_map55.csv",
                  phefile= "TKrils_full_phe.csv",
                  na.strings=c("NA","-", "#VALUE!","#NUM!"))
 --Read the following data:
     341  individuals
     450  markers
     38  phenotypes
 --Cross type: bc 
cross<-convert2riself(cross)
summary(cross)
    RI strains via selfing

    No. individuals:    341 

    No. phenotypes:     38 
    Percent phenotyped: 78.7 

    No. chromosomes:    5 
        Autosomes:      1 2 3 4 5 

    Total markers:      450 
    No. markers:        103 80 82 74 111 
    Percent genotyped:  70.4 
    Genotypes (%):      AA:51.2  BB:48.8 
load("dd2014_10000s2perms.RData")
load("dd2014_wiltedperms.RData")
load("dd2014_allS1perms.RData")
allphes<-phenames(cross)[-1]
mainphes<-phenames(cross)[-1]
#######################
#1.6 compare permutation results between groups
grid<-calc.genoprob(cross, step=1,error.prob=.01, map.function="kosambi",
                    stepwidth="max", off.end=0)
#permtest.raw<-scanone(grid, method="hk",pheno.col=allphes,n.perm=1000, verbose=F, n.cluster=20)
#save(permtest.raw,file="dd2014_allS1perms.RData")
permsout.raw<-data.frame(phe=character(),perm.raw.1=numeric(),perm.raw.05=numeric())
for (i in 1:length(mainphes)){
  phe<-mainphes[i]
  index.05<-(1:length(mainphes)*2)[i]-1
  index.1<-(1:length(mainphes)*2)[i]
  perm.raw.1<-summary(permtest.raw)[index.1]
  perm.raw.05<-summary(permtest.raw)[index.05]
  permsout.raw<-rbind(permsout.raw,data.frame(phe,perm.raw.1,perm.raw.05))
}
outmaxlod<-data.frame(phe=character(),maxlod_raw=numeric(),maxlod_qn=numeric())
for (i in mainphes){
  phe<-i
  maxlod.raw<-max(scanone(grid, pheno.col=i, method="hk"))[1,3]
  outmaxlod<-rbind(outmaxlod,data.frame(phe,maxlod.raw))
}
anyqtls<-permsout.raw
print(anyqtls<-merge(anyqtls,outmaxlod,by="phe"))
             phe perm.raw.1 perm.raw.05 maxlod.raw
1   ABAlyoph.dry     2.4425      2.8014     3.6354
2   ABAlyoph.wet     2.4153      2.6074     1.9705
3       C13.diff     2.5051      2.8299     1.8686
4        C13.dry     2.5263      2.9341     3.2340
5        C13.wet     2.4620      2.7184     6.2650
6        FT.June     2.4333      2.8512    51.5156
7       FT.March     2.4289      2.8094    18.0404
8         GR.dry     2.4404      2.7266     3.2195
9         GR.wet     2.2081      2.4567     1.4758
10 LeafArea.diff     2.4056      2.7171     1.8808
11  LeafArea.dry     2.4373      2.7805     2.5667
12  LeafArea.wet     2.3535      2.6336     1.6101
13      RMR.diff     2.4948      2.7325     1.5325
14       RMR.dry     2.3127      2.7216     3.1277
15       RMR.wet     2.4111      2.7548     1.3882
16    Rel.GR.dry     2.3884      2.6814     2.5433
17    Rel.GR.wet     2.3432      2.6423     4.7130
18    Rolled.dry     2.3367      2.6863     4.0081
19   RtMass.diff     0.7062      0.7679     0.3673
20    RtMass.dry     2.5066      2.8899     2.8269
21    RtMass.wet     2.4154      2.7397     1.1478
22       S.R.dry     2.4434      2.7448     2.4688
23       S.R.wet     2.1840      2.5285     1.1077
24    ShtDW.diff     2.3373      2.6261     1.4052
25     ShtDW.dry     2.4764      2.7623     1.8674
26     ShtDW.wet     2.3261      2.6829     1.2147
27  ShtFrWt.diff     2.3794      2.6797     1.3722
28   ShtFrWt.dry     2.4129      2.8009     2.1713
29   ShtFrWt.wet     2.3810      2.6826     1.4501
30        WC.dry     2.4235      2.7024     2.0729
31        WC.wet     2.4390      2.7227     1.4207
32    Wilted.dry     2.4012      2.7284     2.8535
33    Wilted.wet     2.2914      2.6887     1.6021
34            g0     2.1962      2.4193     6.2574
35  proline.diff     2.4128      2.7232    16.6321
36   proline.dry     2.3323      2.6535    17.5127
37   proline.wet     2.4190      2.6976     1.3840
anyqtls$qtl.raw.05<-anyqtls$maxlod.raw>=anyqtls$perm.raw.05
anyqtls$qtl.raw.1<-anyqtls$maxlod.raw>=anyqtls$perm.raw.1
print(anyqtls[c("phe","qtl.raw.05")])
             phe qtl.raw.05
1   ABAlyoph.dry       TRUE
2   ABAlyoph.wet      FALSE
3       C13.diff      FALSE
4        C13.dry       TRUE
5        C13.wet       TRUE
6        FT.June       TRUE
7       FT.March       TRUE
8         GR.dry       TRUE
9         GR.wet      FALSE
10 LeafArea.diff      FALSE
11  LeafArea.dry      FALSE
12  LeafArea.wet      FALSE
13      RMR.diff      FALSE
14       RMR.dry       TRUE
15       RMR.wet      FALSE
16    Rel.GR.dry      FALSE
17    Rel.GR.wet       TRUE
18    Rolled.dry       TRUE
19   RtMass.diff      FALSE
20    RtMass.dry      FALSE
21    RtMass.wet      FALSE
22       S.R.dry      FALSE
23       S.R.wet      FALSE
24    ShtDW.diff      FALSE
25     ShtDW.dry      FALSE
26     ShtDW.wet      FALSE
27  ShtFrWt.diff      FALSE
28   ShtFrWt.dry      FALSE
29   ShtFrWt.wet      FALSE
30        WC.dry      FALSE
31        WC.wet      FALSE
32    Wilted.dry       TRUE
33    Wilted.wet      FALSE
34            g0       TRUE
35  proline.diff       TRUE
36   proline.dry       TRUE
37   proline.wet      FALSE
phes_wqtls<-as.character(anyqtls$phe[anyqtls$qtl.raw.05])
phes_wqtls #these are the traits with qtl at a=0.05 w/ raw phenotypes
 [1] "ABAlyoph.dry" "C13.dry"      "C13.wet"      "FT.June"     
 [5] "FT.March"     "GR.dry"       "RMR.dry"      "Rel.GR.wet"  
 [9] "Rolled.dry"   "Wilted.dry"   "g0"           "proline.diff"
[13] "proline.dry" 
qtlphes<-phes_wqtls #these are the traits w/qtls
#perms.raw<-scantwo(grid, method="hk",pheno.col=qtlphes,n.perm=10000, verbose=T, n.cluster=22)
#perms.wilted<-scantwo(grid, method="hk",pheno.col="Wilted.dry",n.perm=10000, verbose=T, n.cluster=20)
#save(perms.wilted,file="dd2014_wiltedperms.RData") #these were read in earlier
#save(perms.raw, file = "dd2014_10000s2perms.RData")
pens<-calc.penalties(perms.raw, alpha=0.1)
Wilted.dry<-calc.penalties(perms.wilted, alpha=0.1)
pens<-rbind(pens, Wilted.dry)


#make sure that there are qtl for each phenotype
for (i in qtlphes){
  maxlod.raw<-scanone(grid, pheno.col=i, method="hk")
  cat(i,max(maxlod.raw)[1,3],pens[i,1])
  print(max(maxlod.raw)[1,3]>pens[i,1])
}
ABAlyoph.dry 3.635 2.231[1] TRUE
C13.dry 3.234 2.271[1] TRUE
C13.wet 6.265 2.281[1] TRUE
FT.June 51.52 2.252[1] TRUE
FT.March 18.04 2.24[1] TRUE
GR.dry 3.22 2.264[1] TRUE
RMR.dry 3.128 2.317[1] TRUE
Rel.GR.wet 4.713 2.268[1] TRUE
Rolled.dry 4.008 2.285[1] TRUE
Wilted.dry 2.854 2.394[1] TRUE
g0 6.257 2.27[1] TRUE
proline.diff 16.63 2.252[1] TRUE
proline.dry 17.51 2.255[1] TRUE
qtlphes<-qtlphes[-12]
#######################
#1.10  multiple QTL model generation and statistical analysis
stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
                      df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
                      effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
                      lowCImarker=character(), hiCImarker=character(),
                      lowCIpos=character(), hiCIpos=character())
statcols<-names(stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
model.out<-list()
for (i in qtlphes){
  stepout<-stepwiseqtl(grid,pheno.col=i, penalties=pens[i,], max.qtl=8,method="hk",
                       refine.locations=T,additive.only=F, keeplodprofile=T,keeptrace=T,verbose=F)
  plotLodProfile(stepout,main=i)
  model.out[[i]]<-stepout
  fit<-fitqtl(grid,pheno.col=i,qtl=stepout,formula=formula(stepout),get.ests=T,dropone=T)
  nqtls<-nqtl(stepout)
  if(nqtls>1){
    ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
    cis<-data.frame()
    for (j in 1:nqtls){
      ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
      lowmarker<-rownames(ciout)[1];  highmarker<-rownames(ciout)[3]
      lowposition<-ciout[1,2];  highposition<-ciout[3,2]
      cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
    }
    stats<-data.frame(rep(i,nqtls),stepout$chr,stepout$pos,fit$result.drop[1:length(stepout$pos),],ests_out,cis)
    colnames(stats)<-statcols
  }
  else{
    cis<-data.frame()
    for (j in 1:nqtls){
      ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
      lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
      lowposition<-ciout[1,2]; highposition<-ciout[3,2]
      cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
    }
    ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
    stats<-data.frame(c(i,stepout$chr[1],stepout$pos[1],as.numeric(fit$result.full[1,c(1,2,4,5,3,6,7)]),as.numeric(ests_out),cis[1,]))
    colnames(stats)<-statcols; rownames(stats)<-stepout$name
  }
  stats_out<-rbind(stats_out,stats)
}

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

par(mfrow=c(4,1),mar = c(2,4,2,1))
write.csv(stats_out, file="dd2014_allqtlstatistics.csv")
print(stats_out) # this is the statistical output in a dataframe
           phenotype chromosome position df   type3SS    LOD perc.var
2@15.5  ABAlyoph.dry          2   15.501  1 2.700e+02  3.635    5.160
2@74.4       C13.dry          2   74.357  1 3.728e+00  2.805    3.604
3@58.5       C13.dry          3   58.473  1 3.897e+00  2.930    3.767
5@84.2       C13.dry          5   84.177  1 3.656e+00  2.752    3.534
3@17.9       C13.wet          3   17.916  1 2.482e+00  3.520    3.626
3@75.5       C13.wet          3   75.535  1 2.643e+00  3.743    3.862
4@4.4        C13.wet          4    4.408  1 3.936e+00  5.506    5.751
4@42.4       C13.wet          4   42.395  1 2.499e+00  3.544    3.652
5@9.8        C13.wet          5    9.845  1 3.613e+00  5.070    5.279
5@69.8       C13.wet          5   69.820  1 3.143e+00  4.429    4.591
1@83.8       FT.June          1   83.846  1 2.868e+02  8.055    4.051
4@2.8        FT.June          4    2.791  2 4.186e+03 72.788   59.129
4@62.1       FT.June          4   62.073  2 3.653e+02 10.115    5.159
5@15.3       FT.June          5   15.267  1 1.278e+02  3.699    1.805
5@71.7       FT.June          5   71.677  1 3.055e+02  8.553    4.316
4@2.81      FT.March          4    2.791  1 1.088e+04 19.318   28.479
4@57.8      FT.March          4   57.796  1 1.174e+03  2.434    3.073
3@3.9         GR.dry          3    3.905  1 1.238e+02  3.220    4.555
3@19.0       RMR.dry          3   18.982  1 8.537e-03  3.575   19.792
4@1.5        RMR.dry          4    1.473  1 6.891e-03  2.958   15.976
3@53.6    Rel.GR.wet          3   53.591  1 5.140e-02  4.713    6.618
2@73.6    Rolled.dry          2   73.572  1 9.255e+01  4.008    5.588
4@2.1     Wilted.dry          4    2.132  1 7.095e+01  2.854    4.011
1@23.1            g0          1   23.064  1 7.455e-03  7.008    9.383
1@108.2           g0          1  108.185  2 5.234e-03  5.001    6.587
3@77.6            g0          3   77.635  2 5.524e-03  5.267    6.952
5@71.0            g0          5   71.042  1 2.475e-03  2.415    3.114
2@74.41  proline.dry          2   74.357  2 3.015e+05 20.534   22.811
3@50.2   proline.dry          3   50.173  2 7.678e+04  5.826    5.809
4@2.82   proline.dry          4    2.791  1 4.486e+04  3.462    3.394
5@12.8   proline.dry          5   12.797  1 2.955e+04  2.300    2.236
           Fstat    P.chi2       P.F effect.estimate effect.SE effect.t
2@15.5  270.0436 4.283e-05 4.593e-05       -0.985732 0.2384854   -4.133
2@74.4   13.0167 3.253e-04 3.578e-04        0.116991 0.0324265    3.608
3@58.5   13.6069 2.395e-04 2.645e-04       -0.114963 0.0311659   -3.689
5@84.2   12.7652 3.707e-04 4.071e-04        0.113008 0.0316299    3.573
3@17.9   16.2608 5.672e-05 6.874e-05        0.091062 0.0225821    4.032
3@75.5   17.3188 3.299e-05 4.045e-05       -0.096167 0.0231083   -4.162
4@4.4    25.7923 4.764e-07 6.400e-07       -0.114468 0.0225393   -5.079
4@42.4   16.3774 5.342e-05 6.483e-05       -0.091741 0.0226694   -4.047
5@9.8    23.6757 1.352e-06 1.776e-06       -0.108289 0.0222553   -4.866
5@69.8   20.5913 6.295e-06 7.997e-06        0.101628 0.0223961    4.538
1@83.8   38.3911 1.125e-09 1.714e-09        0.964291 0.1556299    6.196
4@2.8   280.2083 0.000e+00 0.000e+00       -3.676013 0.1552961  -23.671
4@62.1   24.4500 7.681e-11 1.243e-10       -0.847434 0.1551672   -5.461
5@15.3   17.1114 3.666e-05 4.471e-05       -0.636587 0.1538917   -4.137
5@71.7   40.9032 3.477e-10 5.432e-10        0.990904 0.1549361    6.396
4@2.81  104.9212 0.000e+00 0.000e+00       -6.562376 0.6406627  -10.243
4@57.8   11.3212 8.145e-04 8.838e-04       -2.222262 0.6604638   -3.365
3@3.9   123.8171 1.179e-04 1.254e-04       -0.632885 0.1629644   -3.884
3@19.0   17.9976 4.957e-05 8.217e-05        0.012219 0.0028802    4.242
4@1.5    14.5279 2.235e-04 3.411e-04        0.010970 0.0028781    3.812
3@53.6    0.0514 3.181e-06 3.478e-06       -0.013048 0.0027616   -4.725
2@73.6   92.5484 1.737e-05 1.873e-05       -0.555015 0.1277309   -4.345
4@2.1    70.9520 2.889e-04 3.051e-04       -0.477899 0.1308942   -3.651
1@23.1   33.4451 1.339e-08 1.950e-08        0.005726 0.0009902    5.783
1@108.2  11.7394 9.975e-06 1.269e-05        0.003417 0.0009268    3.686
3@77.6   12.3907 5.406e-06 6.966e-06       -0.002978 0.0009428   -3.159
5@71.0   11.1012 8.540e-04 9.783e-04       -0.003072 0.0009221   -3.332
2@74.41  53.9097 0.000e+00 0.000e+00       32.753466 3.2197066   10.173
3@50.2   13.7272 1.494e-06 1.916e-06      -13.250384 3.0803158   -4.302
4@2.82   16.0399 6.526e-05 7.732e-05      -12.004233 2.9973238   -4.005
5@12.8   10.5681 1.135e-03 1.274e-03        9.906987 3.0474950    3.251
        lowCImarker  hiCImarker lowCIpos hiCIpos
2@15.5     2_518639  C2_6997430     8.38  33.796
2@74.4  C2_12037234  2-18752604   57.664   86.92
3@58.5     3.GAPABX  3-21632379   44.183  89.501
5@84.2  C5_21106454    MSAT5.18   67.883  88.431
3@17.9   C3_2497708   3_7307956   13.874  26.475
3@75.5  C3_18396211  3_21728447   64.783  85.984
4@4.4      4_208623    C4_70965    1.473   6.311
4@42.4   C4_5538105 C4_14773912   31.209  70.164
5@9.8     C5_896146 C5_11137211     7.41  37.821
5@69.8  C5_21106454  5-26120843   67.883  90.755
1@83.8  C1_24316033 C1_25080434    80.81  86.852
4@2.8         4.FRI     MSAT4.8    2.791   4.408
4@62.1     Msat4.19  4_12900428   60.332  62.073
5@15.3   C5_3913044  C5_6688659   13.198  21.081
5@71.7     MSAT5.13 C5_22635947   71.677  73.754
4@2.81     4_208623     MSAT4.8    1.473   4.408
4@57.8    4_2126308    MSAT4.30   28.971   86.59
3@3.9      3_166828     MSAT3.6        0  35.833
3@19.0   C3_3623963   3_7307956   15.611  26.475
4@1.5       4.F6N15   C4_885880        0  13.509
3@53.6  C3_10332905 C3_16836255   46.945  60.102
2@73.6   2-14003272  2_17844202   70.808  77.858
4@2.1       4.F6N15 C4_17272166        0  77.755
1@23.1   C1_6315998  C1_7667369   15.548  23.849
1@108.2     MSAT1.5  1_30116495  106.481 111.091
3@77.6  C3_20197905 C3_20876948   75.891   79.34
5@71.0  C5_19731984    MSAT5.18   63.974  88.431
2@74.41  2-15986145 C2_16916847   73.572  75.142
3@50.2     3.GAPABX C3_12021844   44.183  55.666
4@2.82      4.F6N15  C4_1416972        0  19.217
5@12.8    5-1213621 C5_26396744    2.763  84.825
#######################
#1.11 Processing and plotting the QTL statistics
#Read in all relevant data
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T) #statistics from Part 1

#######################
#1.12 Plot some phenotypes, determine correlations between phenotypes
phematrix<-cross$phe[,qtlphes]
distmatrix<-dist(t(phematrix))
hc<-hclust(distmatrix)
par(mfrow=c(1,1))
plot(hc)#plot the similarity of phenotypes

plot of chunk unnamed-chunk-1

#reorder the phenotypes so that they are clustered by distance
statsout$phenotype <- factor(statsout$phenotype,
                             levels = names(phematrix)[hc$order])

#######################
#1.13plot the distribution of QTL
#
reverselog_trans <- function(base = exp(1)) {
  trans <- function(y) -log(y, base)
  inv <- function(y) base^(-y)
  trans_new(paste0("reverselog-", format(base)), trans, inv, 
            log_breaks(base = base), 
            domain = c(1e-100, Inf))
}
ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
  geom_point() +   scale_y_continuous("t statistic for QTL point estimate")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position,y=P.F))+
  geom_point() + scale_y_continuous(trans=reverselog_trans(10),"p.value of F statistic")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position))+
  geom_histogram(binwidth=20) + 
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution and density of QTL point estimates")

plot of chunk unnamed-chunk-1