johnlovell — Jul 25, 2014, 9:13 PM
#############################
###Libraries:###
#############################
library(qtl)
library(snow)
library(rlecuyer)
library(plyr)
library(ggplot2)
library(reshape)
Attaching package: 'reshape'
The following objects are masked from 'package:plyr':
rename, round_any
The following object is masked from 'package:qtl':
condense
library(reshape2)
Attaching package: 'reshape2'
The following objects are masked from 'package:reshape':
colsplit, melt, recast
library(qvalue)
Warning: couldn't connect to display ":0"
Attaching package: 'qvalue'
The following object is masked from 'package:ggplot2':
qplot
library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following object is masked from 'package:reshape':
expand
Loading required package: Rcpp
library(nlme)
Attaching package: 'nlme'
The following object is masked from 'package:lme4':
lmList
library(car)
library(doBy)
Loading required package: survival
Loading required package: splines
Loading required package: MASS
library(scales)
options(warn=-1)
sessionInfo()
R version 3.0.2 (2013-09-25)
Platform: x86_64-redhat-linux-gnu (64-bit)
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=C
[4] LC_COLLATE=C LC_MONETARY=C LC_MESSAGES=C
[7] LC_PAPER=C LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C LC_IDENTIFICATION=C
attached base packages:
[1] splines stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] scales_0.2.4 doBy_4.5-10 MASS_7.3-33 survival_2.37-7
[5] car_2.0-20 nlme_3.1-117 lme4_1.1-7 Rcpp_0.11.2
[9] Matrix_1.1-4 qvalue_1.36.0 reshape2_1.4 reshape_0.8.5
[13] ggplot2_1.0.0 plyr_1.8.1 rlecuyer_0.3-3 snow_0.3-13
[17] qtl_1.32-10 knitr_1.6
loaded via a namespace (and not attached):
[1] colorspace_1.2-4 digest_0.6.4 evaluate_0.5.5 formatR_0.10
[5] grid_3.0.2 gtable_0.1.2 lattice_0.20-29 minqa_1.2.3
[9] munsell_0.4.2 nloptr_1.0.0 nnet_7.3-8 parallel_3.0.2
[13] proto_0.3-10 stringr_0.6.2 tcltk_3.0.2 tools_3.0.2
#############################
###Part I: QTL mapping of main effects (no epistasis incorporated yet)
#############################
#######################
#1.1 Set up the environment
rm(list=ls())
cross<-read.cross("csvs",dir="",
genotypes=c("a","b"),
genfile="TKrils_map55.csv",
phefile= "TKrils_full_phe.csv",
na.strings=c("NA","-", "#VALUE!","#NUM!"))
--Read the following data:
341 individuals
450 markers
38 phenotypes
--Cross type: bc
cross<-convert2riself(cross)
summary(cross)
RI strains via selfing
No. individuals: 341
No. phenotypes: 38
Percent phenotyped: 78.7
No. chromosomes: 5
Autosomes: 1 2 3 4 5
Total markers: 450
No. markers: 103 80 82 74 111
Percent genotyped: 70.4
Genotypes (%): AA:51.2 BB:48.8
load("dd2014_10000s2perms.RData")
load("dd2014_wiltedperms.RData")
load("dd2014_allS1perms.RData")
allphes<-phenames(cross)[-1]
mainphes<-phenames(cross)[-1]
#######################
#1.6 compare permutation results between groups
grid<-calc.genoprob(cross, step=1,error.prob=.01, map.function="kosambi",
stepwidth="max", off.end=0)
#permtest.raw<-scanone(grid, method="hk",pheno.col=allphes,n.perm=1000, verbose=F, n.cluster=20)
#save(permtest.raw,file="dd2014_allS1perms.RData")
permsout.raw<-data.frame(phe=character(),perm.raw.1=numeric(),perm.raw.05=numeric())
for (i in 1:length(mainphes)){
phe<-mainphes[i]
index.05<-(1:length(mainphes)*2)[i]-1
index.1<-(1:length(mainphes)*2)[i]
perm.raw.1<-summary(permtest.raw)[index.1]
perm.raw.05<-summary(permtest.raw)[index.05]
permsout.raw<-rbind(permsout.raw,data.frame(phe,perm.raw.1,perm.raw.05))
}
outmaxlod<-data.frame(phe=character(),maxlod_raw=numeric(),maxlod_qn=numeric())
for (i in mainphes){
phe<-i
maxlod.raw<-max(scanone(grid, pheno.col=i, method="hk"))[1,3]
outmaxlod<-rbind(outmaxlod,data.frame(phe,maxlod.raw))
}
anyqtls<-permsout.raw
print(anyqtls<-merge(anyqtls,outmaxlod,by="phe"))
phe perm.raw.1 perm.raw.05 maxlod.raw
1 ABAlyoph.dry 2.4425 2.8014 3.6354
2 ABAlyoph.wet 2.4153 2.6074 1.9705
3 C13.diff 2.5051 2.8299 1.8686
4 C13.dry 2.5263 2.9341 3.2340
5 C13.wet 2.4620 2.7184 6.2650
6 FT.June 2.4333 2.8512 51.5156
7 FT.March 2.4289 2.8094 18.0404
8 GR.dry 2.4404 2.7266 3.2195
9 GR.wet 2.2081 2.4567 1.4758
10 LeafArea.diff 2.4056 2.7171 1.8808
11 LeafArea.dry 2.4373 2.7805 2.5667
12 LeafArea.wet 2.3535 2.6336 1.6101
13 RMR.diff 2.4948 2.7325 1.5325
14 RMR.dry 2.3127 2.7216 3.1277
15 RMR.wet 2.4111 2.7548 1.3882
16 Rel.GR.dry 2.3884 2.6814 2.5433
17 Rel.GR.wet 2.3432 2.6423 4.7130
18 Rolled.dry 2.3367 2.6863 4.0081
19 RtMass.diff 0.7062 0.7679 0.3673
20 RtMass.dry 2.5066 2.8899 2.8269
21 RtMass.wet 2.4154 2.7397 1.1478
22 S.R.dry 2.4434 2.7448 2.4688
23 S.R.wet 2.1840 2.5285 1.1077
24 ShtDW.diff 2.3373 2.6261 1.4052
25 ShtDW.dry 2.4764 2.7623 1.8674
26 ShtDW.wet 2.3261 2.6829 1.2147
27 ShtFrWt.diff 2.3794 2.6797 1.3722
28 ShtFrWt.dry 2.4129 2.8009 2.1713
29 ShtFrWt.wet 2.3810 2.6826 1.4501
30 WC.dry 2.4235 2.7024 2.0729
31 WC.wet 2.4390 2.7227 1.4207
32 Wilted.dry 2.4012 2.7284 2.8535
33 Wilted.wet 2.2914 2.6887 1.6021
34 g0 2.1962 2.4193 6.2574
35 proline.diff 2.4128 2.7232 16.6321
36 proline.dry 2.3323 2.6535 17.5127
37 proline.wet 2.4190 2.6976 1.3840
anyqtls$qtl.raw.05<-anyqtls$maxlod.raw>=anyqtls$perm.raw.05
anyqtls$qtl.raw.1<-anyqtls$maxlod.raw>=anyqtls$perm.raw.1
print(anyqtls[c("phe","qtl.raw.05")])
phe qtl.raw.05
1 ABAlyoph.dry TRUE
2 ABAlyoph.wet FALSE
3 C13.diff FALSE
4 C13.dry TRUE
5 C13.wet TRUE
6 FT.June TRUE
7 FT.March TRUE
8 GR.dry TRUE
9 GR.wet FALSE
10 LeafArea.diff FALSE
11 LeafArea.dry FALSE
12 LeafArea.wet FALSE
13 RMR.diff FALSE
14 RMR.dry TRUE
15 RMR.wet FALSE
16 Rel.GR.dry FALSE
17 Rel.GR.wet TRUE
18 Rolled.dry TRUE
19 RtMass.diff FALSE
20 RtMass.dry FALSE
21 RtMass.wet FALSE
22 S.R.dry FALSE
23 S.R.wet FALSE
24 ShtDW.diff FALSE
25 ShtDW.dry FALSE
26 ShtDW.wet FALSE
27 ShtFrWt.diff FALSE
28 ShtFrWt.dry FALSE
29 ShtFrWt.wet FALSE
30 WC.dry FALSE
31 WC.wet FALSE
32 Wilted.dry TRUE
33 Wilted.wet FALSE
34 g0 TRUE
35 proline.diff TRUE
36 proline.dry TRUE
37 proline.wet FALSE
phes_wqtls<-as.character(anyqtls$phe[anyqtls$qtl.raw.05])
phes_wqtls #these are the traits with qtl at a=0.05 w/ raw phenotypes
[1] "ABAlyoph.dry" "C13.dry" "C13.wet" "FT.June"
[5] "FT.March" "GR.dry" "RMR.dry" "Rel.GR.wet"
[9] "Rolled.dry" "Wilted.dry" "g0" "proline.diff"
[13] "proline.dry"
qtlphes<-phes_wqtls #these are the traits w/qtls
#perms.raw<-scantwo(grid, method="hk",pheno.col=qtlphes,n.perm=10000, verbose=T, n.cluster=22)
#perms.wilted<-scantwo(grid, method="hk",pheno.col="Wilted.dry",n.perm=10000, verbose=T, n.cluster=20)
#save(perms.wilted,file="dd2014_wiltedperms.RData") #these were read in earlier
#save(perms.raw, file = "dd2014_10000s2perms.RData")
pens<-calc.penalties(perms.raw, alpha=0.1)
Wilted.dry<-calc.penalties(perms.wilted, alpha=0.1)
pens<-rbind(pens, Wilted.dry)
#make sure that there are qtl for each phenotype
for (i in qtlphes){
maxlod.raw<-scanone(grid, pheno.col=i, method="hk")
cat(i,max(maxlod.raw)[1,3],pens[i,1])
print(max(maxlod.raw)[1,3]>pens[i,1])
}
ABAlyoph.dry 3.635 2.231[1] TRUE
C13.dry 3.234 2.271[1] TRUE
C13.wet 6.265 2.281[1] TRUE
FT.June 51.52 2.252[1] TRUE
FT.March 18.04 2.24[1] TRUE
GR.dry 3.22 2.264[1] TRUE
RMR.dry 3.128 2.317[1] TRUE
Rel.GR.wet 4.713 2.268[1] TRUE
Rolled.dry 4.008 2.285[1] TRUE
Wilted.dry 2.854 2.394[1] TRUE
g0 6.257 2.27[1] TRUE
proline.diff 16.63 2.252[1] TRUE
proline.dry 17.51 2.255[1] TRUE
qtlphes<-qtlphes[-12]
#######################
#1.10 multiple QTL model generation and statistical analysis
stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
lowCImarker=character(), hiCImarker=character(),
lowCIpos=character(), hiCIpos=character())
statcols<-names(stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
model.out<-list()
for (i in qtlphes){
stepout<-stepwiseqtl(grid,pheno.col=i, penalties=pens[i,], max.qtl=8,method="hk",
refine.locations=T,additive.only=F, keeplodprofile=T,keeptrace=T,verbose=F)
plotLodProfile(stepout,main=i)
model.out[[i]]<-stepout
fit<-fitqtl(grid,pheno.col=i,qtl=stepout,formula=formula(stepout),get.ests=T,dropone=T)
nqtls<-nqtl(stepout)
if(nqtls>1){
ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
cis<-data.frame()
for (j in 1:nqtls){
ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
lowposition<-ciout[1,2]; highposition<-ciout[3,2]
cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
}
stats<-data.frame(rep(i,nqtls),stepout$chr,stepout$pos,fit$result.drop[1:length(stepout$pos),],ests_out,cis)
colnames(stats)<-statcols
}
else{
cis<-data.frame()
for (j in 1:nqtls){
ciout<-bayesint(stepout,qtl.index=j, expandtomarkers=T, prob=.9)
lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
lowposition<-ciout[1,2]; highposition<-ciout[3,2]
cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
}
ests_out<-summary(fit)$ests[2:(nqtls+1),1:3]
stats<-data.frame(c(i,stepout$chr[1],stepout$pos[1],as.numeric(fit$result.full[1,c(1,2,4,5,3,6,7)]),as.numeric(ests_out),cis[1,]))
colnames(stats)<-statcols; rownames(stats)<-stepout$name
}
stats_out<-rbind(stats_out,stats)
}
par(mfrow=c(4,1),mar = c(2,4,2,1))
write.csv(stats_out, file="dd2014_allqtlstatistics.csv")
print(stats_out) # this is the statistical output in a dataframe
phenotype chromosome position df type3SS LOD perc.var
2@15.5 ABAlyoph.dry 2 15.501 1 2.700e+02 3.635 5.160
2@74.4 C13.dry 2 74.357 1 3.728e+00 2.805 3.604
3@58.5 C13.dry 3 58.473 1 3.897e+00 2.930 3.767
5@84.2 C13.dry 5 84.177 1 3.656e+00 2.752 3.534
3@17.9 C13.wet 3 17.916 1 2.482e+00 3.520 3.626
3@75.5 C13.wet 3 75.535 1 2.643e+00 3.743 3.862
4@4.4 C13.wet 4 4.408 1 3.936e+00 5.506 5.751
4@42.4 C13.wet 4 42.395 1 2.499e+00 3.544 3.652
5@9.8 C13.wet 5 9.845 1 3.613e+00 5.070 5.279
5@69.8 C13.wet 5 69.820 1 3.143e+00 4.429 4.591
1@83.8 FT.June 1 83.846 1 2.868e+02 8.055 4.051
4@2.8 FT.June 4 2.791 2 4.186e+03 72.788 59.129
4@62.1 FT.June 4 62.073 2 3.653e+02 10.115 5.159
5@15.3 FT.June 5 15.267 1 1.278e+02 3.699 1.805
5@71.7 FT.June 5 71.677 1 3.055e+02 8.553 4.316
4@2.81 FT.March 4 2.791 1 1.088e+04 19.318 28.479
4@57.8 FT.March 4 57.796 1 1.174e+03 2.434 3.073
3@3.9 GR.dry 3 3.905 1 1.238e+02 3.220 4.555
3@19.0 RMR.dry 3 18.982 1 8.537e-03 3.575 19.792
4@1.5 RMR.dry 4 1.473 1 6.891e-03 2.958 15.976
3@53.6 Rel.GR.wet 3 53.591 1 5.140e-02 4.713 6.618
2@73.6 Rolled.dry 2 73.572 1 9.255e+01 4.008 5.588
4@2.1 Wilted.dry 4 2.132 1 7.095e+01 2.854 4.011
1@23.1 g0 1 23.064 1 7.455e-03 7.008 9.383
1@108.2 g0 1 108.185 2 5.234e-03 5.001 6.587
3@77.6 g0 3 77.635 2 5.524e-03 5.267 6.952
5@71.0 g0 5 71.042 1 2.475e-03 2.415 3.114
2@74.41 proline.dry 2 74.357 2 3.015e+05 20.534 22.811
3@50.2 proline.dry 3 50.173 2 7.678e+04 5.826 5.809
4@2.82 proline.dry 4 2.791 1 4.486e+04 3.462 3.394
5@12.8 proline.dry 5 12.797 1 2.955e+04 2.300 2.236
Fstat P.chi2 P.F effect.estimate effect.SE effect.t
2@15.5 270.0436 4.283e-05 4.593e-05 -0.985732 0.2384854 -4.133
2@74.4 13.0167 3.253e-04 3.578e-04 0.116991 0.0324265 3.608
3@58.5 13.6069 2.395e-04 2.645e-04 -0.114963 0.0311659 -3.689
5@84.2 12.7652 3.707e-04 4.071e-04 0.113008 0.0316299 3.573
3@17.9 16.2608 5.672e-05 6.874e-05 0.091062 0.0225821 4.032
3@75.5 17.3188 3.299e-05 4.045e-05 -0.096167 0.0231083 -4.162
4@4.4 25.7923 4.764e-07 6.400e-07 -0.114468 0.0225393 -5.079
4@42.4 16.3774 5.342e-05 6.483e-05 -0.091741 0.0226694 -4.047
5@9.8 23.6757 1.352e-06 1.776e-06 -0.108289 0.0222553 -4.866
5@69.8 20.5913 6.295e-06 7.997e-06 0.101628 0.0223961 4.538
1@83.8 38.3911 1.125e-09 1.714e-09 0.964291 0.1556299 6.196
4@2.8 280.2083 0.000e+00 0.000e+00 -3.676013 0.1552961 -23.671
4@62.1 24.4500 7.681e-11 1.243e-10 -0.847434 0.1551672 -5.461
5@15.3 17.1114 3.666e-05 4.471e-05 -0.636587 0.1538917 -4.137
5@71.7 40.9032 3.477e-10 5.432e-10 0.990904 0.1549361 6.396
4@2.81 104.9212 0.000e+00 0.000e+00 -6.562376 0.6406627 -10.243
4@57.8 11.3212 8.145e-04 8.838e-04 -2.222262 0.6604638 -3.365
3@3.9 123.8171 1.179e-04 1.254e-04 -0.632885 0.1629644 -3.884
3@19.0 17.9976 4.957e-05 8.217e-05 0.012219 0.0028802 4.242
4@1.5 14.5279 2.235e-04 3.411e-04 0.010970 0.0028781 3.812
3@53.6 0.0514 3.181e-06 3.478e-06 -0.013048 0.0027616 -4.725
2@73.6 92.5484 1.737e-05 1.873e-05 -0.555015 0.1277309 -4.345
4@2.1 70.9520 2.889e-04 3.051e-04 -0.477899 0.1308942 -3.651
1@23.1 33.4451 1.339e-08 1.950e-08 0.005726 0.0009902 5.783
1@108.2 11.7394 9.975e-06 1.269e-05 0.003417 0.0009268 3.686
3@77.6 12.3907 5.406e-06 6.966e-06 -0.002978 0.0009428 -3.159
5@71.0 11.1012 8.540e-04 9.783e-04 -0.003072 0.0009221 -3.332
2@74.41 53.9097 0.000e+00 0.000e+00 32.753466 3.2197066 10.173
3@50.2 13.7272 1.494e-06 1.916e-06 -13.250384 3.0803158 -4.302
4@2.82 16.0399 6.526e-05 7.732e-05 -12.004233 2.9973238 -4.005
5@12.8 10.5681 1.135e-03 1.274e-03 9.906987 3.0474950 3.251
lowCImarker hiCImarker lowCIpos hiCIpos
2@15.5 2_518639 C2_6997430 8.38 33.796
2@74.4 C2_12037234 2-18752604 57.664 86.92
3@58.5 3.GAPABX 3-21632379 44.183 89.501
5@84.2 C5_21106454 MSAT5.18 67.883 88.431
3@17.9 C3_2497708 3_7307956 13.874 26.475
3@75.5 C3_18396211 3_21728447 64.783 85.984
4@4.4 4_208623 C4_70965 1.473 6.311
4@42.4 C4_5538105 C4_14773912 31.209 70.164
5@9.8 C5_896146 C5_11137211 7.41 37.821
5@69.8 C5_21106454 5-26120843 67.883 90.755
1@83.8 C1_24316033 C1_25080434 80.81 86.852
4@2.8 4.FRI MSAT4.8 2.791 4.408
4@62.1 Msat4.19 4_12900428 60.332 62.073
5@15.3 C5_3913044 C5_6688659 13.198 21.081
5@71.7 MSAT5.13 C5_22635947 71.677 73.754
4@2.81 4_208623 MSAT4.8 1.473 4.408
4@57.8 4_2126308 MSAT4.30 28.971 86.59
3@3.9 3_166828 MSAT3.6 0 35.833
3@19.0 C3_3623963 3_7307956 15.611 26.475
4@1.5 4.F6N15 C4_885880 0 13.509
3@53.6 C3_10332905 C3_16836255 46.945 60.102
2@73.6 2-14003272 2_17844202 70.808 77.858
4@2.1 4.F6N15 C4_17272166 0 77.755
1@23.1 C1_6315998 C1_7667369 15.548 23.849
1@108.2 MSAT1.5 1_30116495 106.481 111.091
3@77.6 C3_20197905 C3_20876948 75.891 79.34
5@71.0 C5_19731984 MSAT5.18 63.974 88.431
2@74.41 2-15986145 C2_16916847 73.572 75.142
3@50.2 3.GAPABX C3_12021844 44.183 55.666
4@2.82 4.F6N15 C4_1416972 0 19.217
5@12.8 5-1213621 C5_26396744 2.763 84.825
#######################
#1.11 Processing and plotting the QTL statistics
#Read in all relevant data
statsout<-read.csv("dd2014_allqtlstatistics.csv",header=T) #statistics from Part 1
#######################
#1.12 Plot some phenotypes, determine correlations between phenotypes
phematrix<-cross$phe[,qtlphes]
distmatrix<-dist(t(phematrix))
hc<-hclust(distmatrix)
par(mfrow=c(1,1))
plot(hc)#plot the similarity of phenotypes
#reorder the phenotypes so that they are clustered by distance
statsout$phenotype <- factor(statsout$phenotype,
levels = names(phematrix)[hc$order])
#######################
#1.13plot the distribution of QTL
#
reverselog_trans <- function(base = exp(1)) {
trans <- function(y) -log(y, base)
inv <- function(y) base^(-y)
trans_new(paste0("reverselog-", format(base)), trans, inv,
log_breaks(base = base),
domain = c(1e-100, Inf))
}
ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
geom_point() + scale_y_continuous("t statistic for QTL point estimate")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position,y=P.F))+
geom_point() + scale_y_continuous(trans=reverselog_trans(10),"p.value of F statistic")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(.~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position))+
geom_histogram(binwidth=20) +
facet_grid(.~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution and density of QTL point estimates")