Lovell — Aug 1, 2014, 1:41 PM
####################################################
### Title: Complete analysis for drydown manuscript
### Author: JTLovell
### Date Created: 29-July 2014
### Version: 2.4
### Most recent update: 1:40PM, 31-July 2014
####################################################
###Outline:###
####################################################
# A1: set up the environment
# Part 1 (QTL analysis w/out covariates):
# 1.1: Load permutations
# 1.2: Load Cross
# 1.3: Plot map, information and other diagnostics
# 1.4: Load list of phenotypes w/ qtls
# 1.5: Plot correlations and nj tree of phenotypes
# 1.6: Calculate Penalties (and concatenate from different files)
# 1.7: Run stepwiseQTL, output results and save results
# 1.8: Make lod profile plots
# 1.9: Make qtl position plots
# Part 2 QTL fine mapping w/ gene expression
# 2.1: Load gene expression statistics
# 2.1a: plotting of genomic distribution of expression
# 2.1b: determination of GxE genes
# 2.2: Load other files needed for analysis
# 2.3: Calculate intervals (2q, 4p, 4q, 5q)
# 2.4: Plot intervals (and gene-cM correlations)
# 2.5: Extract genes w/ significant expression differences (from ANOVA)
# 2.6: Run covariate scans w/ significant genes
# 2.7: Plot covariate scans
# 2.8: Summarize covariate analysis
# 2.9: Extract go terms from significant genes
# 2.10: Summarize candidate genes
# Part 3: Effect of cytoplasm
# 3.1: T.tests and varcomp analysis of cytoplasm
# 3.2: Plots and summary of cytoplasm stats
# 3.3: Bioinformatics of DNA sequence polymorphism in cytoplasm
# 3.4: Determination of candidate genes in cytoplasm
# 3.5: Cytoplasm main effect in QTLs (oneways)
# 3.6: Cytoplasm main effect in multi-QTL models
# 3.7: Cytoplasm interactive effects in QTLs (oneway)- compare w/ 3.3
# 3.8: Interaction plots of QTL*cyto and cyto main effect at proline
# 3.8: Bioinformatics of potential QTL-by-cytoplasm interactions
# Part 4: Epistasis
# 4.1: Extract epistasis components from stepwiseqtl
# 4.2: Interaction plots of epistasis
# 4.3: Scantwo of epistasis
# 4.4: fitqtl of epistasis
# Part 5: QTL*environment interactions
# 5.1: work on later...
####################################################
###Libraries:###
####################################################
##############################################
#A1 Set up the environment
rm(list=ls())
options(warn=-1)
pkg <- c("biomaRt","qtl","snow","rlecuyer","plyr","ggplot2","reshape","reshape2","qvalue","lme4","nlme","car","doBy","scales")
invisible(lapply(pkg, function(x) {cat(x,"..."); library(x, character.only=T, verbose=F, warn.conflicts=F,quietly=T)} ))
biomaRt ...qtl ...snow ...rlecuyer ...plyr ...ggplot2 ...reshape ...reshape2 ...qvalue ...lme4 ...
Attaching package: 'Matrix'
The following object is masked from 'package:reshape':
expand
nlme ...car ...doBy ...
Loading required package: splines
Attaching package: 'MASS'
The following object is masked from 'package:biomaRt':
select
scales ...
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] splines stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] scales_0.2.4 doBy_4.5-10 MASS_7.3-33 survival_2.37-7
[5] car_2.0-20 nlme_3.1-117 lme4_1.1-7 Rcpp_0.11.2
[9] Matrix_1.1-4 qvalue_1.38.0 reshape2_1.4 reshape_0.8.5
[13] ggplot2_1.0.0 plyr_1.8.1 rlecuyer_0.3-3 snow_0.3-13
[17] qtl_1.32-10 biomaRt_2.20.0 knitr_1.6
loaded via a namespace (and not attached):
[1] AnnotationDbi_1.26.0 Biobase_2.24.0 BiocGenerics_0.10.0
[4] colorspace_1.2-4 DBI_0.2-7 digest_0.6.4
[7] evaluate_0.5.5 formatR_0.10 GenomeInfoDb_1.0.2
[10] grid_3.1.0 gtable_0.1.2 IRanges_1.22.10
[13] lattice_0.20-29 minqa_1.2.3 munsell_0.4.2
[16] nloptr_1.0.0 nnet_7.3-8 parallel_3.1.0
[19] proto_0.3-10 RCurl_1.95-4.1 RSQLite_0.11.4
[22] stats4_3.1.0 stringr_0.6.2 tcltk_3.1.0
[25] tools_3.1.0 XML_3.98-1.1
setwd("~/Desktop/Drydown2014")
##############################################
##############################################
#Part 1 (QTL analysis w/out covariates):
##############################################
##############################################
##############################################
#1.1: Load permutations
load("dd2014_allqtldata.RData")
##############################################
#1.2: Load Cross
cross<-read.cross("csvs",dir="", genotypes=c("a","b"),
genfile="TKrils_map55.csv", phefile= "TKrils_full_phe.csv",
na.strings=c("NA","-", "#VALUE!","#NUM!"))
--Read the following data:
341 individuals
450 markers
38 phenotypes
--Cross type: bc
cross.2008<-read.cross("csvs",dir="", genotypes=c("A","H"),
genfile="McKay_revised_map_1_27_12.csv",phefile= "TKrils_full_phe.csv",
na.strings=c("NA","-", "#VALUE!","#NUM!"))
--Read the following data:
341 individuals
168 markers
38 phenotypes
--Cross type: bc
cross.2008<-convert2riself(cross.2008)
cross<-convert2riself(cross)
cross<-calc.genoprob(cross, step=1, stepwidth="max",error.prob=0.01, map.function="kosambi")
##############################################
#1.3: Plot map, information and other diagnostics
par(mfrow=c(1,1))
plot.map(cross.2008, main="2008 map")
plot.map(cross, main="new map")
plotInfo(cross.2008, main="missing information, 2008 map",col="red",lty=2)
plotInfo(cross, main="missing information, new map")
summary(cross)
RI strains via selfing
No. individuals: 341
No. phenotypes: 38
Percent phenotyped: 78.7
No. chromosomes: 5
Autosomes: 1 2 3 4 5
Total markers: 450
No. markers: 103 80 82 74 111
Percent genotyped: 70.4
Genotypes (%): AA:51.2 BB:48.8
##############################################
#1.4: Load list of phenotypes w/ qtls
qtlphes<-c("ABAlyoph.dry", "C13.dry","C13.wet","FT.June","GR.dry","RMR.dry",
"Rel.GR.wet","Rolled.dry","Wilted.dry","g0","proline.dry")
##############################################
#1.5: Plot correlations and nj tree of phenotypes
pheno.matrix<-as.data.frame(cross$phe)
pheno.matrix.qtl<-pheno.matrix[,qtlphes]
heatmap(cor(pheno.matrix.qtl, use="pairwise"), na.rm=T) #can make this look better
#print(cor(pheno.matrix.main[,-1], use="pairwise"))
##############################################
#1.6: Calculate Penalties (and concatenate from different files)
pens<-calc.penalties(perms.raw, alpha=0.05)
Wilted.dry<-calc.penalties(perms.wilted, alpha=0.05)
pens<-rbind(pens, Wilted.dry)
##############################################
#1.7: Run stepwiseQTL, output results and save results
#stepwiseqtl.summary- function found here and published on my website (johntlovell.wordpress.com)
if(!exists("stepwiseqtl.summary", mode="function")) source("stepwiseqtl.summary_function.R")
#inputs: cross-, phes-, max.qtls- pens- covar- type-
stats_out<-stepwiseqtl.summary(cross=cross, phes=qtlphes, max.qtls=8,
pens=pens, covar=NULL, type="stats", printout=T)
conducting stepwise QTL model selection
phenotype (1:11) -- ABAlyoph.dry
phenotype (2:11) -- C13.dry
phenotype (3:11) -- C13.wet
phenotype (4:11) -- FT.June
phenotype (5:11) -- GR.dry
phenotype (6:11) -- RMR.dry
phenotype (7:11) -- Rel.GR.wet
phenotype (8:11) -- Rolled.dry
phenotype (9:11) -- Wilted.dry
phenotype (10:11) -- g0
phenotype (11:11) -- proline.dry
par(mfrow=c(4,1),mar = c(2,4,2,1))
write.csv(stats_out, file="dd2014_allqtlstatistics_nocovar.csv", row.names=F)
##############################################
#1.8 LOD PRofile plots (see above)
##############################################
#1.9: Make qtl position plots
statsout<-read.csv("dd2014_allqtlstatistics_nocovar.csv",header=T)
head(statsout)
phenotype chromosome position df type3SS LOD perc.var Fstat
1 ABAlyoph.dry 2 15.501 1 270.044 3.635 5.160 270.04
2 C13.dry 3 58.473 1 4.620 3.234 4.466 4.62
3 C13.wet 3 17.916 1 2.482 3.520 3.626 16.26
4 C13.wet 3 75.535 1 2.643 3.743 3.862 17.32
5 C13.wet 4 4.408 1 3.936 5.506 5.751 25.79
6 C13.wet 4 42.395 1 2.499 3.544 3.652 16.38
P.chi2 P.F effect.estimate effect.SE effect.t lowCImarker
1 4.283e-05 4.593e-05 -0.98573 0.23849 -4.133 2_518639
2 1.138e-04 1.209e-04 -0.12470 0.03204 -3.892 3.GAPABX
3 5.672e-05 6.874e-05 0.09106 0.02258 4.032 C3_2497708
4 3.299e-05 4.045e-05 -0.09617 0.02311 -4.162 C3_18396211
5 4.764e-07 6.400e-07 -0.11447 0.02254 -5.079 4_208623
6 5.342e-05 6.483e-05 -0.09174 0.02267 -4.047 C4_5538105
hiCImarker lowCIpos hiCIpos
1 C2_6997430 8.380 33.796
2 3-21632379 44.183 89.501
3 3_7307956 13.874 26.475
4 3_21728447 64.783 85.984
5 C4_70965 1.473 6.311
6 C4_14773912 31.209 70.164
statsout<-statsout[complete.cases(statsout),]
#function to plot pvals
reverselog_trans <- function(base = exp(1)) {
trans <- function(y) -log(y, base); inv <- function(y) base^(-y)
trans_new(paste0("reverselog-", format(base)), trans, inv,
log_breaks(base = base), domain = c(1e-100, Inf))}
ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
geom_point() + scale_y_continuous("t statistic for QTL point estimate")+
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position,y=P.F))+
geom_point() +
geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
facet_grid(.~chromosome, scales="free_y", margins=F)+
scale_y_continuous(trans=reverselog_trans(10),"p.value of F statistic")+
theme_bw()+ ggtitle("distribution of QTL positions and effect sizes")
ggplot(statsout,aes(x=position))+
geom_histogram(binwidth=5) +
facet_grid(.~chromosome, scales="free_y", margins=F)+
theme_bw()+ ggtitle("distribution and density of QTL point estimates")
##############################################
##############################################
#Part 2 QTL fine mapping w/ gene expression
##############################################
##############################################
##############################################
#2.1: Load gene expression statistics, generate qvalue statistics and process
all.anova<-read.csv("dd2014_allgenes_geneexpressionstats.csv", header=T)
qvals<-data.frame(rownames(all.anova))
par(mfrow=c(5,1),mar = c(2,4,2,1))
for (i in c("p.geno","p.trt","p.gxe","dry_pval","wet_pval")){
q.out<-as.numeric(qvalue(all.anova[,i])$qvalue)
hist(q.out, main=paste("qvalue statistics calculated from", i))
qvals<-cbind(qvals,as.data.frame(q.out))
}
qvals<-qvals[,-1];colnames(qvals)<-c("sig.geno","sig.trt","sig.gxe","dry_qval","wet_qval")
all.anova<-cbind(all.anova,qvals)
summary(all.anova)
gene.id marker.id chromosome pos.bp
AT1G01010: 1 C3_3623963 : 311 Min. :1.00 Min. : 1251
AT1G01020: 1 C3_4648688 : 225 1st Qu.:1.00 1st Qu.: 5822841
AT1G01030: 1 C3_2497708 : 217 Median :3.00 Median :11920794
AT1G01040: 1 C3_19161491: 205 Mean :2.94 Mean :12591720
AT1G01050: 1 2-15986145 : 193 3rd Qu.:4.00 3rd Qu.:18724714
AT1G01060: 1 C4_9316828 : 189 Max. :5.00 Max. :30426839
(Other) :25618 (Other) :24284
pos.cm f.geno f.trt f.gxe
Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.00
1st Qu.: 22.1 1st Qu.: 0.2 1st Qu.: 0.3 1st Qu.: 0.09
Median : 51.5 Median : 0.8 Median : 1.9 Median : 0.42
Mean : 48.7 Mean : 3.5 Mean : 14.6 Mean : 1.01
3rd Qu.: 71.7 3rd Qu.: 2.7 3rd Qu.: 10.0 3rd Qu.: 1.26
Max. :111.1 Max. :882.7 Max. :2003.5 Max. :64.02
p.geno p.trt p.gxe AA.dry
Min. :0.000 Min. :0.0000 Min. :0.000 Min. : 6.68
1st Qu.:0.104 1st Qu.:0.0018 1st Qu.:0.264 1st Qu.: 6.99
Median :0.366 Median :0.1713 Median :0.519 Median : 7.14
Mean :0.403 Mean :0.2989 Mean :0.511 Mean : 7.26
3rd Qu.:0.677 3rd Qu.:0.5598 3rd Qu.:0.759 3rd Qu.: 7.39
Max. :1.000 Max. :0.9999 Max. :1.000 Max. :12.75
BB.dry AA.wet BB.wet dry_estAA
Min. : 6.65 Min. : 6.68 Min. : 6.67 Min. : 6.68
1st Qu.: 6.99 1st Qu.: 6.99 1st Qu.: 6.99 1st Qu.: 6.99
Median : 7.15 Median : 7.14 Median : 7.14 Median : 7.14
Mean : 7.26 Mean : 7.26 Mean : 7.26 Mean : 7.26
3rd Qu.: 7.39 3rd Qu.: 7.39 3rd Qu.: 7.39 3rd Qu.: 7.39
Max. :12.53 Max. :12.36 Max. :12.36 Max. :12.75
dry_estBB dry_pval wet_estAA wet_estBB
Min. : 6.65 Min. :0.000 Min. : 6.68 Min. : 6.67
1st Qu.: 6.99 1st Qu.:0.158 1st Qu.: 6.99 1st Qu.: 6.99
Median : 7.15 Median :0.420 Median : 7.14 Median : 7.14
Mean : 7.26 Mean :0.440 Mean : 7.26 Mean : 7.26
3rd Qu.: 7.39 3rd Qu.:0.705 3rd Qu.: 7.39 3rd Qu.: 7.39
Max. :12.53 Max. :1.000 Max. :12.36 Max. :12.36
wet_pval sig.geno sig.trt sig.gxe
Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
1st Qu.:0.162 1st Qu.:0.322 1st Qu.:0.004 1st Qu.:1.000
Median :0.422 Median :0.565 Median :0.190 Median :1.000
Mean :0.440 Mean :0.489 Mean :0.219 Mean :0.991
3rd Qu.:0.704 3rd Qu.:0.697 3rd Qu.:0.414 3rd Qu.:1.000
Max. :1.000 Max. :0.772 Max. :0.555 Max. :1.000
dry_qval wet_qval
Min. :0.000 Min. :0.000
1st Qu.:0.529 1st Qu.:0.546
Median :0.705 Median :0.712
Mean :0.623 Mean :0.630
3rd Qu.:0.788 3rd Qu.:0.791
Max. :0.839 Max. :0.843
##############################################
#2.1a: make plots of genomic distribution of pvalues
#set significance (based on qvalues) as a threshold variable
sig.geno.fac<- transform(all.anova, cut=cut(sig.geno, breaks=c(0,.05,1)))$cut
sig.trt.fac<- transform(all.anova, cut=cut(sig.trt, breaks=c(0,.05,1)))$cut
sig.gxe.fac<- transform(all.anova, cut=cut(sig.gxe, breaks=c(0,.05,1)))$cut
dry_pval.fac<- transform(all.anova, cut=cut(dry_pval, breaks=c(0,.05,1)))$cut
wet_pval.fac<- transform(all.anova, cut=cut(wet_pval, breaks=c(0,.05,1)))$cut
all.anova.toplot<-cbind(all.anova,sig.geno.fac,sig.trt.fac,sig.gxe.fac,dry_pval.fac,wet_pval.fac)
facstoplot<-list(c("sig.geno.fac","p.geno","Pvalues of genotype effect (typeIII)"),
c("sig.trt.fac","p.trt","Pvalues of environment effect (typeIII)"),
c("dry_pval.fac","dry_pval","Pvalues of genotype effect in dry (t.test)"),
c("wet_pval.fac","wet_pval","Pvalues of genotype effect in wet (t.test)"))
#threshold is at alpha=0.05 (via Q-values), colored in plots this way
for(i in facstoplot){
print(ggplot(all.anova.toplot,aes(y=get(i[2]),x=pos.bp,color=get(i[1])))+
geom_point()+theme_bw()+ggtitle(i[3])+
scale_y_continuous(trans=reverselog_trans(10))+ facet_grid(chromosome~.))}
##############################################
#2.1b Identify genes w/ significant G*E, output for later
head(all.anova)
gene.id marker.id chromosome pos.bp pos.cm f.geno f.trt f.gxe
1 AT1G01010 C1_103654 1 3631 0.186 3.6251 6.63512 7.83503
2 AT1G01020 C1_103654 1 5928 0.186 4.2390 1.87075 0.34134
3 AT1G01030 C1_103654 1 11649 0.186 2.1099 5.92443 1.01925
4 AT1G01040 C1_103654 1 23146 0.186 0.0727 1.53420 0.01366
5 AT1G01050 C1_103654 1 31170 0.186 0.3614 0.03060 0.03141
6 AT1G01060 C1_103654 1 33666 0.186 4.9961 0.06019 0.60216
p.geno p.trt p.gxe AA.dry BB.dry AA.wet BB.wet dry_estAA dry_estBB
1 0.05836 0.01073 0.005631 7.240 7.219 7.245 7.354 7.240 7.219
2 0.04081 0.17294 0.559722 7.198 7.139 7.217 7.184 7.198 7.139
3 0.14793 0.01582 0.313930 7.072 7.119 7.134 7.142 7.072 7.119
4 0.78772 0.21695 0.907088 7.624 7.619 7.606 7.604 7.624 7.619
5 0.54844 0.86131 0.859505 7.469 7.497 7.480 7.495 7.469 7.497
6 0.02652 0.80645 0.438682 7.438 7.551 7.407 7.641 7.438 7.551
dry_pval wet_estAA wet_estBB wet_pval sig.geno sig.trt sig.gxe dry_qval
1 0.49431 7.245 7.354 0.003923 0.2334 0.01946 0.6680 0.7342
2 0.04312 7.217 7.184 0.352691 0.1902 0.19130 0.9997 0.3092
3 0.11033 7.134 7.142 0.733678 0.3789 0.02728 0.9997 0.4637
4 0.79055 7.606 7.604 0.908446 0.7274 0.22526 0.9997 0.8065
5 0.55787 7.480 7.495 0.763103 0.6489 0.51776 0.9997 0.7545
6 0.27156 7.407 7.641 0.054034 0.1481 0.50217 0.9997 0.6282
wet_qval
1 0.08107
2 0.67949
3 0.79762
4 0.83166
5 0.80225
6 0.35175
#effect of g*e
ggplot(all.anova.toplot,aes(y=p.gxe,x=pos.bp,color=sig.gxe.fac))+
geom_point()+theme_bw()+ggtitle("Pvalues of GxE effect (typeIII)")+
scale_y_continuous(trans=reverselog_trans(10))+
facet_grid(chromosome~.)
gxe.genes<-all.anova[all.anova$sig.gxe<0.05,]
as.character(gxe.genes$gene.id)
[1] "AT1G05200" "AT1G23940" "AT1G27030" "AT1G48760" "AT1G56510"
[6] "AT1G62290" "AT1G62810" "AT1G63420" "AT1G67365" "AT1G72800"
[11] "AT1G73010" "AT1G76240" "AT2G15480" "AT2G16990" "AT2G23110"
[16] "AT2G35075" "AT3G47580" "AT4G01360" "AT4G15910" "AT4G19530"
[21] "AT4G21410" "AT4G22990" "AT5G09230" "AT5G09240" "AT5G12230"
[26] "AT5G25980" "AT5G41760" "AT5G42825" "AT5G44310"
##############################################
#2.2: Load other files needed for analysis
cmgenpos<-read.csv("gene_locations_new_McKay_map.csv",header=T, na.strings="NA") #physical position of all genes
geneexp<-read.csv("May_SNP_5_4_12.csv",header=T) #gene expression of all AT genes in drydown
idnames<-read.csv("dd2014_markernametoid.csv",header=T)
colnames(idnames)[2]<-"line"
geneexp<-merge(idnames,geneexp, by="line")
physpos.gene<-cmgenpos[,c(1,4:5)]
annotations<-read.table("Functional_annotation_list.txt",sep="\t",header=F)
colnames(annotations)<-c("gene","annotation")
##############################################
#2.3: Look at the distribution of QTL and effects to figure out where intervals should be
#look at the lod profiles and scanones if needed
#n <- names(model.out);lapply(model.out, function(x) plotLodProfile(x, main=n[substitute(x)[[3]]])
#par(mfrow=c(2,2));for (i in qtlphes) plot(scanone(cross, pheno.col=i,ch=5), main=i)
#make a dataframe with effects to help decide how to structure the regions
cross<-sim.geno(cross, n.draws=64, step=1, stepwidth="max",error.prob=0.01, map.function="kosambi")
effectscan.out<-data.frame()
for (i in qtlphes){
temp<-data.frame(effectscan(cross, pheno.col=i, draw=F),i)
temp$st.effect<-scale(temp$a,center=T,scale=T);effectscan.out<-rbind(effectscan.out,temp)
}
#general plots of qtl effects
names(effectscan.out)[3:4]<-c("effect","phenotype")
ggplot(effectscan.out, aes(x=pos, y=st.effect, color=phenotype))+ geom_line()+
theme_bw()+ggtitle("effect scans (of TSU allele) of all phenotypes")+ facet_wrap(~chr)
es.phys<-effectscan.out[effectscan.out$phenotype %in% c("C13.wet","C13.dry","ABAlyoph.dry","proline.dry","g0"),]
ggplot(es.phys, aes(x=pos, y=st.effect, color=phenotype))+geom_line()+theme_bw()+
facet_wrap(~chr)+ggtitle("effect scans (of TSU allele) of physiological phenotypes")
es.growth<-effectscan.out[effectscan.out$phenotype %in% c("GR.dry","RMR.dry","Rel.GR.wet","Rolled.dry","Wilted.dry"),]
ggplot(es.growth, aes(x=pos, y=st.effect, color=phenotype))+
geom_line()+theme_bw()+ facet_wrap(~chr)+ggtitle("effect scans (of TSU allele) of growth phenotypes")
##############################################
#2.4: Check intervals around four regions...
#check out the qtl effects in a region... here is a function
#region needs to be a vector of length 3 (chromosome, start pos, end pos)
#phes needs to be a vector of exactly 2 phenotypes
ploteff<-function(region,phes){
par(mfrow=c(3,1)); plot(scanone(cross, pheno.col=phes[1], chr=region[1]), main=paste("scanones on ch",region[1], "for phenotypes:", phes[1],phes[2]))
plot(scanone(cross, pheno.col=phes[2], chr=region[1]), add=T, lty=3, col="red")
rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
effectscan(cross, pheno.col=phes[1], draw=T, chr=region[1],main=paste("effect scan, ch=",region[1],"; phenotype=", phes[1]))
rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
effectscan(cross, pheno.col=phes[2], draw=T, chr=region[1], main=paste("effect scan, ch=",region[1],"; phenotype=", phes[2]))
rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
}
stats_out<-stepwiseqtl.summary(cross=cross, phes=qtlphes, max.qtls=8,
pens=pens, covar=NULL, type="model", printout=T)
conducting stepwise QTL model selection
phenotype (1:11) -- ABAlyoph.dry
phenotype (2:11) -- C13.dry
phenotype (3:11) -- C13.wet
phenotype (4:11) -- FT.June
phenotype (5:11) -- GR.dry
phenotype (6:11) -- RMR.dry
phenotype (7:11) -- Rel.GR.wet
phenotype (8:11) -- Rolled.dry
phenotype (9:11) -- Wilted.dry
phenotype (10:11) -- g0
phenotype (11:11) -- proline.dry
model.out<-stats_out
##############################################
#2.4.1 (reg2.1) --- aba-2.1 - Initially too wide- entire shoulder of peak
#look at wilted too:
statsout<-statsout[complete.cases(statsout),]
reg2.1<-statsout[statsout$chromosome==2 & statsout$position <20,c(2,16,17)]
phes2.1<-c("ABAlyoph.dry","Wilted.dry")
ploteff(region=reg2.1,phes=phes2.1)
newint<-data.frame(bayesint(model.out["ABAlyoph.dry"][[1]], qtl.index=1, prob=.75)[1:3,1:3])
reg.test<-reg2.1; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes2.1)
par(mfrow=c(1,1)); plotLodProfile(model.out["ABAlyoph.dry"][[1]], main="Lod Profile of ABA, red-90% CI, purple-75% CI")
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg2.1[2], xright=reg2.1[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)
#the narrower region captures the peak much better (without the shoulders)
reg2.1<-reg.test
##############################################
#2.4.2 (reg2.2) ---proline-2.2
reg2.2<-statsout[statsout$chromosome==2 & statsout$position >60 & statsout$phenotype=="proline.dry",c(2,16,17)]
phes2.2<-c("proline.dry","Rolled.dry")
ploteff(region=reg2.2,phes=phes2.2)
newint<-data.frame(bayesint(model.out["proline.dry"][[1]], qtl.index=1, prob=.99)[1:3,1:3])
reg.test<-reg2.2; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes2.2)
par(mfrow=c(1,1)); plotLodProfile(model.out["proline.dry"][[1]], main="Lod Profile of proline, purple-90% CI, blue-99% CI",ch=2)
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg2.2[2], xright=reg2.2[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)
#the wider region captures the peak much better (without the shoulders)
reg2.2<-reg.test
##############################################
#2.4.3 (reg4.1) ---d13C-4.1
reg4.1<-statsout[statsout$chromosome==4 & statsout$position <10 & statsout$phenotype=="C13.wet",c(2,16,17)]
phes4.1<-c("C13.wet","proline.dry")
ploteff(region=reg4.1,phes=phes4.1)
##############################################
#2.4.4(reg5.2) ---d13C-5.2 -initially too wide (incorporates 2 peaks)
reg5.2<-statsout[statsout$chromosome==5 & statsout$position >60 & statsout$phenotype=="C13.wet",c(2,16,17)]
phes5.2<-c("C13.wet","C13.dry")
ploteff(region=reg5.2,phes=phes5.2)
newint<-data.frame(bayesint(model.out["C13.wet"][[1]], qtl.index=6, prob=.99)[1:3,1:3])
reg.test<-reg5.2; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes5.2)
par(mfrow=c(1,1)); plotLodProfile(model.out["C13.wet"][[1]],ch=5, main="Lod Profile of d13C.wet, red-90% CI, purple-80% CI")
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg5.2[2], xright=reg5.2[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)
#this looks much better- only single peak in coverages
reg5.2<-reg.test
##############################################
#2.5 list out and plot the regions and focal phenotypes
all.regions<-list(reg2.1,reg2.2,reg4.1,reg5.2)
all.focalphes<-c("ABAlyoph.dry","proline.dry","C13.wet","C13.wet")
all.region.names<-c("reg2.1","reg2.2","reg4.1","reg5.2")
#function to subset the data
findgenes<-function(region){
reg.in<-as.numeric(region)
culled.bycm<-all.anova[all.anova$chromosome==reg.in[1] & all.anova$pos.cm>reg.in[2] & all.anova$pos.cm<reg.in[3],]
temp<-all.anova[
all.anova$chromosome==reg.in[1] & all.anova$pos.cm>reg.in[2] & all.anova$pos.cm<reg.in[3] |
all.anova$chromosome==reg.in[1] & all.anova$pos.bp>min(culled.bycm$pos.bp) & all.anova$pos.bp<max(culled.bycm$pos.bp),]
return(as.character(temp$gene.id))
}
exp.stats<-all.anova
exp.stats$gene.cat<-as.character(exp.stats$gene.id)
for (i in 1:4){
temp<-findgenes(all.regions[[i]])
exp.stats$gene.cat[exp.stats$gene.cat %in% temp]<-all.region.names[i]
}
exp.stats$gene.cat[!exp.stats$gene.cat %in% all.region.names]<-"other"
ggplot(exp.stats, aes(y=pos.cm, x=pos.bp, color=gene.cat))+
scale_color_manual(values=c("black",rainbow(4)))+
geom_point()+theme_bw()+ggtitle("physical gene position to cM marker position")+
facet_wrap(~chromosome)
##############################################
#2.6: Extract genes w/ significant expression differences (from ANOVA)
#2.7: Make dataframe with those genes only for covariate analysis
exp.covars.list<-list()
sig.genes.out<-list()
for (i in 1:4){
temp<-exp.stats[exp.stats$gene.cat==all.region.names[i],]
#extract 10 most signifiant genes
siggenes<-temp[temp$sig.geno<as.numeric(quantile(temp$sig.geno,(length(temp[,1])-(length(temp[,1])-10))/length(temp[,1]))),]
print(all.region.names[i])
sig.genes<-print(as.character(siggenes$gene.id))
sig.genes.out[[i]]<-sig.genes
geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
exp.covars.list[[i]]<-exp.covars
}
[1] "reg2.1"
[1] "AT2G03140" "AT2G03820" "AT2G04170" "AT2G04378" "AT2G04380"
[6] "AT2G05810" "AT2G05915" "AT2G07215" "AT2G11140" "AT2G11240"
[1] "reg2.2"
[1] "AT2G38530" "AT2G39110" "AT2G40960" "AT2G41440" "AT2G42170"
[6] "AT2G42490" "AT2G43210" "AT2G43530" "AT2G43680" "AT2G43980"
[1] "reg4.1"
[1] "AT4G00650" "AT4G00710" "AT4G00720" "AT4G00740" "AT4G00755"
[6] "AT4G00840" "AT4G00880" "AT4G00893" "AT4G00900" "AT4G00975"
[1] "reg5.2"
[1] "AT5G53700" "AT5G53930" "AT5G54710" "AT5G54720" "AT5G54730"
[6] "AT5G58575" "AT5G61410" "AT5G63020" "AT5G63030" "AT5G67480"
#2.8 Run scanone for w/ and w/o covars
outall<-data.frame(chr=numeric(), pos=numeric(),lod=numeric(),
phe=character(),covar=character(),
gene=character(),expression_env=character())
for (i in all.focalphes){
temp<-cbind(as.data.frame(scanone(cross,pheno.col=i)),i,"nocovar","nocovar","nocovar")
colnames(temp)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,temp)
}
for (j in 1:4){
for (i in colnames(exp.covars.list[j][[1]])[3:length(colnames(exp.covars.list[j][[1]]))]){
s1.out<-scanone(cross,pheno.col=all.focalphes[j],addcovar=exp.covars.list[[j]][,i])
s1.out<-cbind(as.data.frame(s1.out),as.character(all.focalphes[j]),as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
}
##############################################
#2.9: Plot covariate scans
#for all phenotypes, just the focal ch
#inputs-
# toplot: the full scanout-out object,
#region: the region and phenotype: the phenotype
#maxcm- maximum cm to plot (mincm same way)
#plotcovscan<-function(toplot, region, phenotype, maxcm, mincm, genes)
if(!exists("plotcovscan", mode="function")) source("plotcovscan_function.R")
#plot all regions
max.cm<-c(45,100,45,100)
min.cm<-c(0,45,0,45)
for (i in 1:4){
print(
plotcovscan(outall,region=all.regions[[i]],phenotype=all.focalphes[i],
genes=sig.genes.out[[i]],maxcm=max.cm[i], mincm=min.cm[i])
)
}
##############################################
#2.10: Summarize covariate analysis
for (i in 1:4) cat(all.focalphes[i],"-",all.region.names[i],": ", sig.genes.out[[i]], sep=" ", "\n")
ABAlyoph.dry - reg2.1 : AT2G03140 AT2G03820 AT2G04170 AT2G04378 AT2G04380 AT2G05810 AT2G05915 AT2G07215 AT2G11140 AT2G11240
proline.dry - reg2.2 : AT2G38530 AT2G39110 AT2G40960 AT2G41440 AT2G42170 AT2G42490 AT2G43210 AT2G43530 AT2G43680 AT2G43980
C13.wet - reg4.1 : AT4G00650 AT4G00710 AT4G00720 AT4G00740 AT4G00755 AT4G00840 AT4G00880 AT4G00893 AT4G00900 AT4G00975
C13.wet - reg5.2 : AT5G53700 AT5G53930 AT5G54710 AT5G54720 AT5G54730 AT5G58575 AT5G61410 AT5G63020 AT5G63030 AT5G67480
# ##############################################
# #2.9: Extract go terms from significant genes
# library(biomaRt)
# mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
# results <- getBM(attributes = c("refseq_dna", "go_molecular_function_id",
# "go_molecular_function__dm_name_1006"), filters = "refseq_dna",
# values = c("NM_030621"), mart = mart)
##############################################
#2.10: Summarize candidate genes
##############################################
##############################################
#Part 3: Effect of cytoplasm
##############################################
###############################################
#read in gene expression statistics w/ cytoplasm as a term
exp.stats.wcyto<-read.csv("dd2014_allgenes_geneexpressionstats_wcyto.csv", header=T)
allanova.cyto<-exp.stats.wcyto
qvals<-data.frame(rownames(allanova.cyto))
par(mfrow=c(5,1),mar = c(2,4,2,1))
for (i in c("p.geno","p.cyto","p.trt","p.gxe","p.gxc","p.cxe","p.gxcxe","drytsu.gen","drykas.gen")){
q.out<-as.numeric(qvalue(allanova.cyto[,i])$qvalue)
hist(q.out, main=paste("qvalue statistics calculated from", i))
qvals<-cbind(qvals,as.data.frame(q.out))
}
qvals<-qvals[,-1]
colnames(qvals)<-c("q.geno","q.cyto","q.trt","q.gxe","q.gxc","q.cxe","q.gxcxe","q.drytsu.gen","q.drykas.gen")
allanova.cyto<-cbind(allanova.cyto,qvals)
#3.1 Plot distribution of pvalues w/ cyto
#set significance (based on qvalues) as a threshold variable
#names of stats variables (p-values)
factors<-colnames(allanova.cyto)[-c(1:14)]
all.anova.toplot<-allanova.cyto
for (i in factors){
test<-data.frame(all.anova.toplot[,i])
test[test<0.05]<-"sig.05"
test[test!="sig.05"]<-"not.sig"
all.anova.toplot<-cbind(all.anova.toplot,test)
}
colnames(all.anova.toplot)[24:32]<-paste(factors,".sig",sep="")
#threshold is at alpha=0.05 (via Q-values), colored in plots this way
for(i in 1:length(factors)){
toplot<-all.anova.toplot[,c(1:5,i+5,i+23)]
colnames(toplot)[6:7]<-c("p.value","sig.qval")
print(ggplot(toplot,aes(y=p.value,x=pos.bp,color=sig.qval))+
geom_point()+theme_bw()+ggtitle(facstoplot[i])+
scale_y_continuous(trans=reverselog_trans(10))+ facet_grid(chromosome~.))
}
##############################################
#3.1: T.tests and varcomp analysis of cytoplasm
cyto.vc<-data.frame(cbind(cross$phe, covs))
vc.out<-data.frame()
for (i in 2:38){
data.in<-cyto.vc[,c(i,41)]; colnames(data.in)[1]<-"pheno.in"
lmer.out<-lmer(pheno.in~ 1 | cytoplasm, data=data.in)
out.ttest<-t.test(pheno.in~cytoplasm, data=data.in)$p.value
out<-cbind(colnames(cyto.vc)[i],data.frame(as.numeric(VarCorr(lmer.out))), attr(VarCorr(lmer.out), "sc")^2,out.ttest)
vc.out<-rbind(vc.out, out)
}
colnames(vc.out)[1:4]<-c("phenotype","vc.cyto","vc.residual","ttest.pvalue")
vc.out$prop.cyto<-vc.out$vc.cyto/(vc.out$vc.residual+vc.out$vc.cyto)
hist(vc.out$prop.cyto)
ggplot(vc.out, aes(x=prop.cyto,y=ttest.pvalue))+
geom_text(aes(label=phenotype))+
scale_y_continuous(trans=reverselog_trans(10))+
scale_x_continuous(expand=c(.02,.02))+
geom_hline(yintercept=0.05, lty=3)+
theme_bw()+ggtitle("")
##############################################
#3.2: Plots and summary of cytoplasm Scanones
cytocovar<-read.csv("dd2014_cytocovar.csv", header=T)
covs<-read.csv("dd2014_cytocovar.csv", header=T)
addcov<-NULL; intcov<-NULL #model w/out covariates
cyto<-as.data.frame(covs$cyto.num)
#oneway scans
for (i in qtlphes){
nocovar<-scanone(grid,pheno.col=i, method="hk")
addcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto)
intcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto, intcovar=cyto)
ylim<-c(0,max(max(nocovar$lod),max(addcov$lod),max(intcov$lod)))
plot(nocovar,main=i,ylim=ylim);plot(addcov, add=T, col="red",lty=2);plot(intcov, add=T, col="blue",lty=2)
}
#plot the difference between models
par(mfrow=c(1,1))
for (i in qtlphes){
nocovar<-scanone(grid,pheno.col=i, method="hk")
addcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto)
intcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto, intcovar=cyto)
ylim1<-c(min(min(addcov$lod-nocovar$lod),min(intcov$lod-nocovar$lod),min(intcov$lod-addcov$lod)))
ylim2<-c(max(max(addcov$lod-nocovar$lod),max(intcov$lod-nocovar$lod),max(intcov$lod-addcov$lod))+2)
plot(addcov-nocovar,main=i,
lty=2, col=rgb(1,0,0,alpha=.7), ylim=c(ylim1,ylim2))
plot(intcov-nocovar,col=rgb(0,0,1,alpha=.4),lty=1, add=T)
plot(intcov-addcov,col=rgb(0,1,0),lty=3, add=T)
abline(h=pens[i,1], lty=3)
legend("topright",legend= c("addcov-nocov", "intcov-nocov", "int-add", "threshold"),
col=c("red","blue","green","black"), lty=c(2,1,3,3))
print(i); print(max(addcov-nocovar));print(max(intcov-nocovar)); cat("\n")
}
[1] "ABAlyoph.dry"
chr pos lod
C3_5631875 3 19.7 0.739
chr pos lod
4_4804292 4 30.3 1.49
[1] "C13.dry"
chr pos lod
C2_16916847 2 75.1 0.237
chr pos lod
2.Frag50 2 79.1 2.56
[1] "C13.wet"
chr pos lod
C5_24599755 5 77.5 0.63
chr pos lod
At2g22200 2 47.7 1.59
[1] "FT.June"
chr pos lod
4.F6N15 4 0 0.432
chr pos lod
C5_896146 5 7.41 1.99
[1] "GR.dry"
chr pos lod
3_7307956 3 26.5 0.0595
chr pos lod
C4_18038808 4 81.4 2.36
[1] "RMR.dry"
chr pos lod
c3.loc6 3 6.99 0.787
chr pos lod
3_620734 3 1.44 1.06
[1] "Rel.GR.wet"
chr pos lod
c3.loc30 3 50.9 0.0748
chr pos lod
c5.loc26 5 54.7 2.56
[1] "Rolled.dry"
chr pos lod
c2.loc44 2 74.4 0.679
chr pos lod
4_4804292 4 30.3 1.88
[1] "Wilted.dry"
chr pos lod
c2.loc12 2 15.5 0.0353
chr pos lod
4_14385343 4 66.3 2.49
[1] "g0"
chr pos lod
1_2696684 1 9.35 0.0581
chr pos lod
C3_16998392 3 61 0.772
[1] "proline.dry"
chr pos lod
c2.loc44 2 74.4 4.79
chr pos lod
C2_16916847 2 75.1 5.36
cyto.phes<-c("proline.dry","C13.dry")
#######################
#3.3 multiple QTL model generation and statistical analysis w/ cytoplasm as a covariate (additive)
cov_stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
lowCImarker=character(), hiCImarker=character(),
lowCIpos=character(), hiCIpos=character())
statcols<-names(cov_stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
names(cyto)<-"cytoplasm"
##############################################
#3.4:manual model selection for QTL*covariate interactions
#d13C.dry interacting locus:2@79.1
model.out[[cyto.phes[2]]]
QTL object containing genotype probabilities.
name chr pos n.gen
Q1 3@58.5 3 58.473 2
Formula: y ~ Q1
pLOD: 0.659
model.in<-model.out[[cyto.phes[2]]]
model.in1<-makeqtl(cross, chr=3, pos=58.473) #from stepwise
model.in2<-addtoqtl(cross, qtl=model.in1, chr=2, pos=79.1)
formula.in1<- y ~cytoplasm + Q1 + Q2 + cytoplasm:Q2
fit1<-fitqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, covar=cyto,formula=formula(formula.in1),
method="hk")
ref.C13.dry<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, covar=cyto,formula=formula(formula.in1),method="hk")
pos: 58.47 79.08
Iteration 1
Q1 pos: 58.47 -> 58.47
LOD increase: 0
Q2 pos: 79.08 -> 77.07
LOD increase: 0.389
all pos: 58.47 79.08 -> 58.47 77.07
LOD increase at this iteration: 0.389
Iteration 2
Q2 pos: 77.07 -> 77.07
LOD increase: 0
Q1 pos: 58.47 -> 58.47
LOD increase: 0
all pos: 58.47 77.07 -> 58.47 77.07
LOD increase at this iteration: 0
overall pos: 58.47 79.08 -> 58.47 77.07
LOD increase overall: 0.389
fit2<-fitqtl(cross, pheno.col=cyto.phes[2], qtl=ref.C13.dry, covar=cyto,formula=formula(formula.in1),
method="hk")
plotLodProfile(ref.C13.dry)
summary(fit1)
fitqtl summary
Method: multiple imputation
Model: normal phenotype
Number of observations : 326
Full model result
----------------------------------
Model formula: y ~ cytoplasm + Q1 + Q2 + cytoplasm:Q2
df SS MS LOD %var Pvalue(Chi2) Pvalue(F)
Model 4 9.594 2.3986 6.89 9.274 2.174e-06 2.612e-06
Error 321 93.861 0.2924
Total 325 103.455
Drop one QTL at a time ANOVA table:
----------------------------------
df Type III SS LOD %var F value Pvalue(Chi2) Pvalue(F)
cytoplasm 2 3.350 2.483 3.239 5.729 0.003 0.003591
3@58.5 1 3.223 2.390 3.115 11.021 0.001 0.001004
2@79.1 2 4.721 3.474 4.564 8.073 0.000 0.000379
cytoplasm:2@79.1 1 2.842 2.112 2.748 9.721 0.002 0.001986
cytoplasm **
3@58.5 **
2@79.1 ***
cytoplasm:2@79.1 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit2)
fitqtl summary
Method: multiple imputation
Model: normal phenotype
Number of observations : 326
Full model result
----------------------------------
Model formula: y ~ cytoplasm + Q1 + Q2 + cytoplasm:Q2
df SS MS LOD %var Pvalue(Chi2) Pvalue(F)
Model 4 10.11 2.5272 7.279 9.771 9.349e-07 1.136e-06
Error 321 93.35 0.2908
Total 325 103.46
Drop one QTL at a time ANOVA table:
----------------------------------
df Type III SS LOD %var F value Pvalue(Chi2) Pvalue(F)
cytoplasm 2 3.305 2.463 3.194 5.682 0.003 0.003759
3@58.5 1 3.199 2.385 3.092 11.001 0.001 0.001015
2@77.1 2 5.236 3.863 5.061 9.002 0.000 0.000157
cytoplasm:2@77.1 1 2.766 2.067 2.674 9.512 0.002 0.002218
cytoplasm **
3@58.5 **
2@77.1 ***
cytoplasm:2@77.1 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##############################################
#3.5 plot interactions
par(mfrow=c(2,1),mar = c(2,4,2,1))
mname<-find.marker(cross, 2,76.3) #from manual fit C13.dry
cyto.eff<-as.numeric(covs$cyto.num)
effectplot(cross, pheno.col=cyto.phes[2], mname2="cytoplasm", mark2=cyto.eff, geno2=c("KAS","TSU"),
mname1=mname)
mname<-find.marker(cross, 2,74.357) #from manual fit proline.dry
effectplot(cross, pheno.col=cyto.phes[1], mname2="cytoplasm", mark2=cyto.eff, geno2=c("KAS","TSU"),
mname1=mname)
##############################################
#3.6 plot lod profiles-
#
ref.C13.dry.nocov<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2,
formula= y ~ Q1 + Q2 ,method="hk")
pos: 58.47 79.08
Iteration 1
Q2 pos: 79.08 -> 75.14
LOD increase: 0.86
Q1 pos: 58.47 -> 58.47
LOD increase: 0
all pos: 58.47 79.08 -> 58.47 75.14
LOD increase at this iteration: 0.86
Iteration 2
Q1 pos: 58.47 -> 58.47
LOD increase: 0
Q2 pos: 75.14 -> 75.14
LOD increase: 0
all pos: 58.47 75.14 -> 58.47 75.14
LOD increase at this iteration: 0
overall pos: 58.47 79.08 -> 58.47 75.14
LOD increase overall: 0.86
#
ref.proline.dry.nocov<-refineqtl(cross, pheno.col=cyto.phes[1], qtl=model.out[[cyto.phes[1]]],
formula=formula(model.out[[cyto.phes[1]]]),method="hk")
pos: 74.36 50.17 2.791
Iteration 1
Q3 pos: 2.791 -> 2.791
LOD increase: 0
Q1 pos: 74.36 -> 74.36
LOD increase: 0
Q2 pos: 50.17 -> 50.17
LOD increase: 0
all pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791
LOD increase at this iteration: 0
overall pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791
LOD increase overall: 0
#
formula.in1<- y ~cytoplasm + Q1 + Q2 + Q3 + cytoplasm:Q1
ref.proline.dry<-refineqtl(cross, pheno.col=cyto.phes[1], qtl=model.out[[cyto.phes[1]]],
covar=cyto,formula=formula.in1,method="hk")
pos: 74.36 50.17 2.791
Iteration 1
Q2 pos: 50.17 -> 50.17
LOD increase: 0
Q1 pos: 74.36 -> 74.36
LOD increase: 0
Q3 pos: 2.791 -> 2.791
LOD increase: 0
all pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791
LOD increase at this iteration: 0
overall pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791
LOD increase overall: 0
#
model.out[[cyto.phes[2]]]
QTL object containing genotype probabilities.
name chr pos n.gen
Q1 3@58.5 3 58.473 2
Formula: y ~ Q1
pLOD: 0.659
model.in<-model.out[[cyto.phes[2]]]
model.in1<-makeqtl(cross, chr=3, pos=58.473) #from stepwise
model.in2<-addtoqtl(cross, qtl=model.in1, chr=2, pos=79.1)
formula.in1<- y ~cytoplasm + Q1 + Q2 + cytoplasm:Q2
ref.C13.dry<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2,
covar=cyto,formula=formula(formula.in1),method="hk")
pos: 58.47 79.08
Iteration 1
Q2 pos: 79.08 -> 77.07
LOD increase: 0.389
Q1 pos: 58.47 -> 58.47
LOD increase: 0
all pos: 58.47 79.08 -> 58.47 77.07
LOD increase at this iteration: 0.389
Iteration 2
Q1 pos: 58.47 -> 58.47
LOD increase: 0
Q2 pos: 77.07 -> 77.07
LOD increase: 0
all pos: 58.47 77.07 -> 58.47 77.07
LOD increase at this iteration: 0
overall pos: 58.47 79.08 -> 58.47 77.07
LOD increase overall: 0.389
proline.dry.lp<-as.data.frame(attr(ref.proline.dry, "lodprofile")[[1]])
C13.dry.lp<-as.data.frame(attr(ref.C13.dry, "lodprofile")[[2]])
proline.dry.nocov.lp<-as.data.frame(attr(ref.proline.dry.nocov, "lodprofile")[[1]])
C13.dry.nocov.lp<-as.data.frame(attr(ref.C13.dry.nocov, "lodprofile")[[2]])
lps2.2cov<-cbind(proline.dry.lp, C13.dry.lp[,3],proline.dry.nocov.lp[,3],C13.dry.nocov.lp[,3])
colnames(lps2.2cov)<-c("chr","pos","proline.dry","C13.dry","proline.dry.nocov","C13.dry.nocov")
toplot.lps<-melt(lps2.2cov, id.vars=c("chr","pos"))
length(toplot.lps[,1])/4
[1] 128
phe<-rep(c("C13.dry","proline.dry","C13.dry","proline.dry"), each=128)
covariate<-rep(c("cyto.interaction", "no.covariate"), each=256)
toplot.lps<-cbind(toplot.lps, phe, covariate)
ggplot(toplot.lps, aes(x=pos, y=value, col=covariate))+
geom_line()+facet_grid(phe~., scales="free")+theme_bw()
##############################################
#3.7 calculate new condifence intervals for region
ciout.C13dry<-bayesint(ref.C13.dry,qtl.index=2, expandtomarkers=T, prob=.9)
ciout.prolinedry<-bayesint(model.out[[cyto.phes[1]]],qtl.index=1, expandtomarkers=T, prob=.99)
reg2.2cov<-reg2.2; reg2.2cov[2]<-ciout.C13dry[1,2]; reg2.2cov[3]<-ciout.C13dry[3,2]
##############################################
#3.8 find genes for this regions (w/out cytoplasm effect in gene model)
#set the region
all.regions<-list(reg2.2cov)
all.focalphes<-c("proline.dry","C13.dry")
all.region.names<-c("reg2.2cov")
#plot region
exp.stats<-all.anova
exp.stats$gene.cat<-as.character(exp.stats$gene.id)
reg2.2cov.genes<-findgenes(all.regions[[1]])
exp.stats$gene.cat[exp.stats$gene.cat %in% reg2.2cov.genes]<-all.region.names[1]
exp.stats$gene.cat[!exp.stats$gene.cat %in% all.region.names]<-"other"
ggplot(exp.stats, aes(y=pos.cm, x=pos.bp, color=gene.cat))+
scale_color_manual(values=c("black",rainbow(4)))+
geom_point()+theme_bw()+ggtitle("physical gene position to cM marker position")+
facet_wrap(~chromosome)
##############################################
#3.9 Extract genes w/ significant expression differences (from ANOVA)
temp<-exp.stats[exp.stats$gene.cat==all.region.names[1],]
#extract 10 most signifiant genes
thresh.geno<-as.numeric(quantile(temp$sig.geno,(length(temp[,1])-(length(temp[,1])-10))/length(temp[,1])))
thresh.trt<-as.numeric(quantile(temp$sig.trt,(length(temp[,1])-(length(temp[,1])-10))/length(temp[,1])))
siggenes<-temp[temp$sig.geno<thresh.geno,]
print(all.region.names[1])
[1] "reg2.2cov"
sig.genes<-print(as.character(siggenes$gene.id))
[1] "AT2G39890" "AT2G40960" "AT2G41440" "AT2G42170" "AT2G42490"
[6] "AT2G43210" "AT2G43530" "AT2G43680" "AT2G43980" "AT2G44290"
geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
##############################################
#3.10 Run scanone w/o covars
outall<-data.frame()
s1.out<-scanone(cross,pheno.col=cyto.phes[1],addcovar=NULL)
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[1],"nocovar",
"nocovar","nocovar")
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
for (i in colnames(exp.covars)[3:length(colnames(exp.covars))]){
s1.out<-scanone(cross,pheno.col=cyto.phes[1],addcovar=exp.covars[,i])
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[1],as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
#inputs-
# toplot: the full scanout-out object,
#region: the region and phenotype: the phenotype
#maxcm- maximum cm to plot (mincm same way)
#plotcovscan<-function(toplot, region, phenotype, maxcm, mincm, genes)
plotcovscan(outall, region=reg2.2cov, phenotype="proline.dry",
mincm=45, maxcm=100, genes=sig.genes)
##############################################
#3.10 Run scanone etc w/ cytoplasm covars
#subset dataframe with stats including cytoplasm effects
all.anova<-allanova.cyto
all.anova$gene.cat<-as.character(all.anova$gene.id)
reg2.2cov.genes<-findgenes(reg2.2cov) #needs input stats to have name ("all.anova")
all.anova$gene.cat[all.anova$gene.cat %in% reg2.2cov.genes]<-all.region.names[1]
all.anova$gene.cat[!all.anova$gene.cat %in% all.region.names]<-"other"
ggplot(all.anova, aes(y=pos.cm, x=pos.bp, color=gene.cat))+
scale_color_manual(values=c("black",rainbow(4)))+
geom_point()+theme_bw()+ggtitle("physical gene position to cM marker position")+
facet_wrap(~chromosome)
all.anova.region<-all.anova[all.anova$gene.cat=="reg2.2cov",]
length(all.anova.region[,1])
[1] 555
#extract the most signifiant genes for both cyto and geno
print(thresh.geno<-as.numeric(quantile(all.anova.region$q.geno,0.05)))
[1] 0.007253
print(thresh.cyto<-as.numeric(quantile(all.anova.region$q.cyto,0.05)))
[1] 1.338e-14
siggenes<-all.anova.region[all.anova.region$q.geno<thresh.geno &
all.anova.region$q.cyto<thresh.cyto,]
print(all.region.names[1])
[1] "reg2.2cov"
sig.genes<-print(as.character(siggenes$gene.id))
[1] "AT2G39800" "AT2G43500" "AT2G44210"
geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
#run scanones with cytoplasm as an interactive covariate
#for proline.dry
outall<-data.frame()
#plot(s1.out<-scanone(cross,pheno.col=cyto.phes[1],addcovar=NULL, intcovar=cyto))
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[1],"nocovar",
"nocovar","nocovar")
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
for (i in colnames(exp.covars)[3:length(colnames(exp.covars))]){
s1.out<-scanone(cross,pheno.col=cyto.phes[1],addcovar=exp.covars[,i], intcovar=cyto)
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[1],as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
Error: numbers of columns of arguments do not match
plotcovscan(outall, region=reg2.2cov, phenotype="proline.dry",
mincm=45, maxcm=100, genes=sig.genes)
#for C13.dry
outall<-data.frame()
plot(s1.out<-scanone(cross,pheno.col=cyto.phes[2],addcovar=NULL, intcovar=cyto))
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[2],"nocovar",
"nocovar","nocovar")
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
for (i in colnames(exp.covars)[3:length(colnames(exp.covars))]){
s1.out<-scanone(cross,pheno.col=cyto.phes[2],addcovar=exp.covars[,i], intcovar=cyto)
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[2],as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
plotcovscan(outall, region=reg2.2cov, phenotype="C13.dry",
mincm=45, maxcm=100, genes=sig.genes)
#this doesn't work- need to look for genes with interactions-
#extract the most signifiant genes for both cyto and geno
print(thresh.p.drykas.gen<-as.numeric(quantile(all.anova.region$drykas.gen,
1-(length(all.anova.region[,1])-10)/length(all.anova.region[,1]))))
[1] 0.005088
siggenes<-all.anova.region[all.anova.region$drykas.gen<thresh.p.drykas.gen,]
#13 genes... not too bad remove 3 worst
siggenes
gene.id marker.id chromosome pos.bp pos.cm p.geno p.cyto
9791 AT2G39690 2_16908453 2 16541084 78.49 2.362e-01 4.366e-01
9901 AT2G40711 C2_16916847 2 16981691 75.14 3.648e-04 2.909e-02
9977 AT2G41410 C2_17106201 2 17261728 75.59 1.600e-09 1.658e-07
9981 AT2G41440 C2_17106201 2 17271172 75.59 1.358e-27 3.764e-02
10169 AT2G43210 2.Frag50 2 17960711 79.08 2.583e-20 1.071e-04
10202 AT2G43530 2.T1024 2 18070056 79.52 1.232e-16 9.309e-01
10215 AT2G43680 C2_18135222 2 18108349 80.73 2.142e-28 5.535e-13
10245 AT2G43980 C2_18220776 2 18208253 76.29 7.595e-13 2.761e-02
10280 AT2G44290 2_18253446 2 18305201 80.21 5.923e-14 7.648e-01
10296 AT2G44480 2_18253446 2 18359734 80.21 1.429e-01 9.522e-01
p.trt p.gxe p.gxc p.cxe p.gxcxe drykas.gen drytsu.gen
9791 0.023770 0.969911 0.1354835 0.11084 0.0005061 3.395e-03 8.905e-02
9901 0.091700 0.136192 0.7149177 0.87056 0.1951509 8.903e-04 5.424e-02
9977 0.293411 0.772577 0.0448285 0.27265 0.8792595 2.210e-03 2.179e-03
9981 0.670700 0.003731 0.5084716 0.02554 0.6631865 2.506e-08 8.189e-12
10169 0.016626 0.613634 0.6381025 0.78517 0.6002974 3.358e-04 1.215e-08
10202 0.924136 0.163013 0.4486678 0.15479 0.6545268 8.229e-04 4.714e-07
10215 0.001021 0.005227 0.0033593 0.11767 0.8259121 3.299e-03 3.543e-17
10245 0.309986 0.099286 0.8496225 0.34699 0.6227202 1.329e-03 4.361e-08
10280 0.599573 0.842075 0.9358935 0.24363 0.0869445 8.919e-05 5.425e-04
10296 0.034743 0.183006 0.0005201 0.01168 0.0448691 1.991e-03 3.581e-01
q.geno q.cyto q.trt q.gxe q.gxc q.cxe q.gxcxe
9791 4.681e-01 3.575e-01 0.11961 0.9996 0.7295 0.9040 0.6466
9901 6.379e-03 4.560e-02 0.23359 0.9996 0.8803 0.9788 0.9996
9977 1.139e-07 7.330e-07 0.39617 0.9996 0.6292 0.9353 0.9996
9981 6.222e-25 5.667e-02 0.56396 0.5371 0.8513 0.7597 0.9996
10169 5.785e-18 3.022e-04 0.10116 0.9996 0.8732 0.9772 0.9996
10202 2.227e-14 5.361e-01 0.63772 0.9996 0.8370 0.9196 0.9996
10215 1.111e-25 4.872e-12 0.02289 0.6204 0.3978 0.9093 0.9996
10245 8.298e-11 4.367e-02 0.40613 0.9996 0.8913 0.9500 0.9996
10280 7.435e-12 4.874e-01 0.53935 0.9996 0.9004 0.9336 0.9913
10296 3.687e-01 5.414e-01 0.14332 0.9996 0.2548 0.6912 0.9913
q.drytsu.gen q.drykas.gen gene.cat
9791 5.023e-01 1.945e-01 reg2.2cov
9901 4.201e-01 9.864e-02 reg2.2cov
9977 7.089e-02 1.628e-01 reg2.2cov
9981 5.660e-09 3.397e-05 reg2.2cov
10169 3.421e-06 5.999e-02 reg2.2cov
10202 7.467e-05 9.561e-02 reg2.2cov
10215 1.154e-13 1.924e-01 reg2.2cov
10245 9.663e-06 1.259e-01 reg2.2cov
10280 2.622e-02 2.259e-02 reg2.2cov
10296 7.491e-01 1.538e-01 reg2.2cov
print(all.region.names[1])
[1] "reg2.2cov"
sig.genes<-print(as.character(siggenes$gene.id))
[1] "AT2G39690" "AT2G40711" "AT2G41410" "AT2G41440" "AT2G43210"
[6] "AT2G43530" "AT2G43680" "AT2G43980" "AT2G44290" "AT2G44480"
geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
names(idnames)[2]<-"line"
exp.covars<-merge(idnames,geneexp1, by="line", all=T)
outall<-data.frame()
plot(s1.out<-scanone(cross,pheno.col=cyto.phes[2],addcovar=NULL, intcovar=cyto))
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[2],"nocovar",
"nocovar","nocovar")
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-s1.out
for (i in colnames(exp.covars)[3:length(colnames(exp.covars))]){
s1.out<-scanone(cross,pheno.col=cyto.phes[2],addcovar=exp.covars[,i], intcovar=cyto)
s1.out<-cbind(as.data.frame(s1.out),cyto.phes[2],as.character(i),
unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
outall<-rbind(outall,s1.out)
}
plotcovscan(outall, region=reg2.2cov, phenotype="C13.dry",
mincm=45, maxcm=100, genes=sig.genes)
# #3.11: Bioinformatics of DNA sequence polymorphism in cytoplasm
#
# ##############################################
# ##############################################
# #Part 4: Epistasis
# ##############################################
# ##############################################
#
# ##############################################
# #4.1: Extract epistasis components from stepwiseqtl
#
epi.model<-model.out[["proline.dry"]]
statsout<-read.csv("dd2014_allqtlstatistics_nocovar.csv",header=T)
epi.stats<-statsout[statsout$phenotype=="proline.dry",]
epi.stats
phenotype chromosome position df type3SS LOD perc.var Fstat
22 proline.dry 2 74.357 2 287810 19.163 21.774 49.956
23 proline.dry 3 50.173 2 76294 5.611 5.772 13.243
24 proline.dry 4 2.791 1 45064 3.368 3.409 15.644
25 proline.dry NA NA 1 24261 1.833 1.835 8.422
P.chi2 P.F effect.estimate effect.SE effect.t lowCImarker
22 0.000e+00 0.000e+00 31.785 3.254 9.769 2-15986145
23 2.449e-06 2.991e-06 -13.039 3.126 -4.172 3.GAPABX
24 8.202e-05 9.432e-05 -12.032 3.042 -3.955 4.F6N15
25 3.664e-03 3.966e-03 -9.722 3.350 -2.902 <NA>
hiCImarker lowCIpos hiCIpos
22 C2_16916847 73.57 75.14
23 C3_15414721 44.18 57.16
24 C4_1416972 0.00 19.22
25 <NA> NA NA
#2@74.4*3@50.2
# ##############################################
# #4.2: Interaction plots of epistasis
#
par(mfrow=c(1,1))
effectplot(cross, pheno.col="proline.dry",
mname1=as.character(summary(epi.model)$name[1]),
mname2=as.character(summary(epi.model)$name[2]))
# ##############################################
# #4.3: Scantwo of epistasis
#
out.allS2.summary<-data.frame()
for (i in qtlphes){
print(i)
s2.nocov<-scantwo(cross, pheno.col=i, method="hk", verbose=F)
s2.cov<-scantwo(cross, pheno.col=i, method="hk", verbose=F, addcov=cyto, intcov=cyto)
plot(s2.cov, main=paste("scantwo of", i, "with cytoplasm covariate"))
plot(s2.nocov, main=paste("scantwo of", i, "without covariates"))
out<-as.data.frame(summary(s2.nocov)); out<-cbind(i,"nocovar",out); colnames(out)[1:2]<-c("phenotype","covariate")
out.cyto<-as.data.frame(summary(s2.cov)); out.cyto<-cbind(i,"cytoplasm",out.cyto); colnames(out.cyto)[1:2]<-c("phenotype","covariate")
out.allS2.summary<-rbind(out.allS2.summary,out)
out.allS2.summary<-rbind(out.allS2.summary,out.cyto)
}
[1] "ABAlyoph.dry"
[1] "C13.dry"
[1] "C13.wet"
[1] "FT.June"
[1] "GR.dry"
[1] "RMR.dry"
[1] "Rel.GR.wet"
[1] "Rolled.dry"
[1] "Wilted.dry"
[1] "g0"
[1] "proline.dry"
write.csv(out.allS2.summary, file="dd2014_allS2summaries.csv")
# ##############################################
# #4.4: fitqtl of epistasis
test<-fitqtl(cross, pheno.col="proline.dry", qtl=epi.model, formula=formula(epi.model), method="hk")
# ##############################################
# ##############################################
# #Part 5: QTL*environment interactions
# ##############################################
# ##############################################
#
# ##############################################
# #5.1: work on later...