dd2014_finalqtlscript

dd2014_finalqtlscript_draft4.R

Lovell — Jul 31, 2014, 7:38 PM

####################################################
### Title: Complete analysis for drydown manuscript
### Author: JTLovell
### Date Created: 29-July 2014

####################################################
###Outline:###
####################################################
# A1: set up the environment

# Part 1 (QTL analysis w/out covariates): 
#   1.1: Load permutations
#   1.2: Load Cross
#   1.3: Plot map, information and other diagnostics
#   1.4: Load list of phenotypes w/ qtls 
#   1.5: Plot correlations and nj tree of phenotypes
#   1.6: Calculate Penalties (and concatenate from different files)
#   1.7: Run stepwiseQTL, output results and save results
#   1.8: Make lod profile plots
#   1.9: Make qtl position plots
# Part 2 QTL fine mapping w/ gene expression
#   2.1: Load gene expression statistics
#     2.1a: plotting of genomic distribution of expression
#     2.1b: determination of GxE genes
#   2.2: Load other files needed for analysis
#   2.3: Calculate intervals (2q, 4p, 4q, 5q)
#   2.4: Plot intervals (and gene-cM correlations)
#   2.5: Extract genes w/ significant expression differences (from ANOVA)
#   2.6: Run covariate scans w/ significant genes
#   2.7: Plot covariate scans
#   2.8: Summarize covariate analysis
#   2.9: Extract go terms from significant genes
#   2.10: Summarize candidate genes
# Part 3: Effect of cytoplasm
#   3.1: T.tests and varcomp analysis of cytoplasm
#   3.2: Plots and summary of cytoplasm stats
#   3.3: Bioinformatics of DNA sequence polymorphism in cytoplasm
#   3.4: Determination of candidate genes in cytoplasm
#   3.5: Cytoplasm main effect in QTLs (oneways)
#   3.6: Cytoplasm main effect in multi-QTL models
#   3.7: Cytoplasm interactive effects in QTLs (oneway)- compare w/ 3.3
#   3.8: Interaction plots of QTL*cyto and cyto main effect at proline
#   3.8: Bioinformatics of potential QTL-by-cytoplasm interactions
# Part 4: Epistasis
#   4.1: Extract epistasis components from stepwiseqtl
#   4.2: Interaction plots of epistasis
#   4.3: Scantwo of epistasis
#   4.4: fitqtl of epistasis
# Part 5: QTL*environment interactions
#   5.1: work on later...



####################################################
###Libraries:###
####################################################



##############################################
#A1 Set up the environment
rm(list=ls())
options(warn=-1)
pkg <- c("biomaRt","qtl","snow","rlecuyer","plyr","ggplot2","reshape","reshape2","qvalue","lme4","nlme","car","doBy","scales")
invisible(lapply(pkg, function(x) {cat(x,"..."); library(x, character.only=T, verbose=F, warn.conflicts=F,quietly=T)} ))

biomaRt ...qtl ...snow ...rlecuyer ...plyr ...ggplot2 ...reshape ...reshape2 ...qvalue ...lme4 ...


Attaching package: 'Matrix'

The following object is masked from 'package:reshape':

    expand

nlme ...car ...doBy ...

Loading required package: splines

Attaching package: 'MASS'

The following object is masked from 'package:biomaRt':

    select

scales ...

sessionInfo()

R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] splines   stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] scales_0.2.4    doBy_4.5-10     MASS_7.3-33     survival_2.37-7
 [5] car_2.0-20      nlme_3.1-117    lme4_1.1-7      Rcpp_0.11.2    
 [9] Matrix_1.1-4    qvalue_1.38.0   reshape2_1.4    reshape_0.8.5  
[13] ggplot2_1.0.0   plyr_1.8.1      rlecuyer_0.3-3  snow_0.3-13    
[17] qtl_1.32-10     biomaRt_2.20.0  knitr_1.6      

loaded via a namespace (and not attached):
 [1] AnnotationDbi_1.26.0 Biobase_2.24.0       BiocGenerics_0.10.0 
 [4] colorspace_1.2-4     DBI_0.2-7            digest_0.6.4        
 [7] evaluate_0.5.5       formatR_0.10         GenomeInfoDb_1.0.2  
[10] grid_3.1.0           gtable_0.1.2         IRanges_1.22.10     
[13] lattice_0.20-29      minqa_1.2.3          munsell_0.4.2       
[16] nloptr_1.0.0         nnet_7.3-8           parallel_3.1.0      
[19] proto_0.3-10         RCurl_1.95-4.1       RSQLite_0.11.4      
[22] stats4_3.1.0         stringr_0.6.2        tcltk_3.1.0         
[25] tools_3.1.0          XML_3.98-1.1

setwd("~/Desktop/Drydown2014")
##############################################
##############################################
#Part 1 (QTL analysis w/out covariates): 
##############################################
##############################################


##############################################
#1.1: Load permutations
load("dd2014_allqtldata.RData")
##############################################
#1.2: Load Cross
cross<-read.cross("csvs",dir="", genotypes=c("a","b"),
                  genfile="TKrils_map55.csv", phefile= "TKrils_full_phe.csv",
                  na.strings=c("NA","-", "#VALUE!","#NUM!"))

 --Read the following data:
     341  individuals
     450  markers
     38  phenotypes
 --Cross type: bc

cross.2008<-read.cross("csvs",dir="", genotypes=c("A","H"),
                  genfile="McKay_revised_map_1_27_12.csv",phefile= "TKrils_full_phe.csv",
                  na.strings=c("NA","-", "#VALUE!","#NUM!"))

 --Read the following data:
     341  individuals
     168  markers
     38  phenotypes
 --Cross type: bc

cross.2008<-convert2riself(cross.2008)
cross<-convert2riself(cross)
cross<-calc.genoprob(cross, step=1, stepwidth="max",error.prob=0.01, map.function="kosambi")

##############################################
#1.3: Plot map, information and other diagnostics
par(mfrow=c(1,1))
plot.map(cross.2008, main="2008 map")

plot of chunk unnamed-chunk-1

plot.map(cross, main="new map")

plot of chunk unnamed-chunk-1

plotInfo(cross.2008, main="missing information, 2008 map",col="red",lty=2)

plot of chunk unnamed-chunk-1

plotInfo(cross, main="missing information, new map")

plot of chunk unnamed-chunk-1

summary(cross)

    RI strains via selfing

    No. individuals:    341 

    No. phenotypes:     38 
    Percent phenotyped: 78.7 

    No. chromosomes:    5 
        Autosomes:      1 2 3 4 5 

    Total markers:      450 
    No. markers:        103 80 82 74 111 
    Percent genotyped:  70.4 
    Genotypes (%):      AA:51.2  BB:48.8

##############################################
#1.4: Load list of phenotypes w/ qtls 
qtlphes<-c("ABAlyoph.dry", "C13.dry","C13.wet","FT.June","GR.dry","RMR.dry",
           "Rel.GR.wet","Rolled.dry","Wilted.dry","g0","proline.dry")
##############################################
#1.5: Plot correlations and nj tree of phenotypes
pheno.matrix<-as.data.frame(cross$phe)
pheno.matrix.qtl<-pheno.matrix[,qtlphes]
heatmap(cor(pheno.matrix.qtl, use="pairwise"), na.rm=T) #can make this look better

plot of chunk unnamed-chunk-1

#print(cor(pheno.matrix.main[,-1], use="pairwise"))

##############################################
#1.6: Calculate Penalties (and concatenate from different files)
pens<-calc.penalties(perms.raw, alpha=0.05)
Wilted.dry<-calc.penalties(perms.wilted, alpha=0.05)
pens<-rbind(pens, Wilted.dry)
##############################################
#1.7: Run stepwiseQTL, output results and save results
#stepwiseqtl.summary- function found here and published on my website (johntlovell.wordpress.com)
if(!exists("stepwiseqtl.summary", mode="function")) source("stepwiseqtl.summary_function.R")
#inputs: cross-, phes-, max.qtls- pens- covar- type-
stats_out<-stepwiseqtl.summary(cross=cross, phes=qtlphes, max.qtls=8, 
                               pens=pens, covar=NULL, type="stats", printout=T)

conducting stepwise QTL model selection 
phenotype (1:11) -- ABAlyoph.dry

phenotype (2:11) -- C13.dry

phenotype (3:11) -- C13.wet

phenotype (4:11) -- FT.June

plot of chunk unnamed-chunk-1

phenotype (5:11) -- GR.dry

phenotype (6:11) -- RMR.dry

phenotype (7:11) -- Rel.GR.wet

phenotype (8:11) -- Rolled.dry

plot of chunk unnamed-chunk-1

phenotype (9:11) -- Wilted.dry

phenotype (10:11) -- g0

phenotype (11:11) -- proline.dry

par(mfrow=c(4,1),mar = c(2,4,2,1))

plot of chunk unnamed-chunk-1

write.csv(stats_out, file="dd2014_allqtlstatistics_nocovar.csv", row.names=F)
##############################################
#1.8 LOD PRofile plots (see above)
##############################################
#1.9: Make qtl position plots
statsout<-read.csv("dd2014_allqtlstatistics_nocovar.csv",header=T) 
head(statsout)

     phenotype chromosome position df type3SS   LOD perc.var  Fstat
1 ABAlyoph.dry          2   15.501  1 270.044 3.635    5.160 270.04
2      C13.dry          3   58.473  1   4.620 3.234    4.466   4.62
3      C13.wet          3   17.916  1   2.482 3.520    3.626  16.26
4      C13.wet          3   75.535  1   2.643 3.743    3.862  17.32
5      C13.wet          4    4.408  1   3.936 5.506    5.751  25.79
6      C13.wet          4   42.395  1   2.499 3.544    3.652  16.38
     P.chi2       P.F effect.estimate effect.SE effect.t lowCImarker
1 4.283e-05 4.593e-05        -0.98573   0.23849   -4.133    2_518639
2 1.138e-04 1.209e-04        -0.12470   0.03204   -3.892    3.GAPABX
3 5.672e-05 6.874e-05         0.09106   0.02258    4.032  C3_2497708
4 3.299e-05 4.045e-05        -0.09617   0.02311   -4.162 C3_18396211
5 4.764e-07 6.400e-07        -0.11447   0.02254   -5.079    4_208623
6 5.342e-05 6.483e-05        -0.09174   0.02267   -4.047  C4_5538105
   hiCImarker lowCIpos hiCIpos
1  C2_6997430    8.380  33.796
2  3-21632379   44.183  89.501
3   3_7307956   13.874  26.475
4  3_21728447   64.783  85.984
5    C4_70965    1.473   6.311
6 C4_14773912   31.209  70.164

statsout<-statsout[complete.cases(statsout),]
#function to plot pvals
reverselog_trans <- function(base = exp(1)) {
  trans <- function(y) -log(y, base); inv <- function(y) base^(-y)
  trans_new(paste0("reverselog-", format(base)), trans, inv, 
            log_breaks(base = base), domain = c(1e-100, Inf))}

ggplot(statsout,aes(x=position,y=as.numeric(effect.t)))+
  geom_point() +   scale_y_continuous("t statistic for QTL point estimate")+
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(phenotype~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position,y=P.F))+
  geom_point() +
  geom_errorbarh(aes(xmax = hiCIpos, xmin = lowCIpos, height = 0))+
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  scale_y_continuous(trans=reverselog_trans(10),"p.value of F statistic")+
  theme_bw()+  ggtitle("distribution of QTL positions and effect sizes")

plot of chunk unnamed-chunk-1

ggplot(statsout,aes(x=position))+
  geom_histogram(binwidth=5) + 
  facet_grid(.~chromosome, scales="free_y", margins=F)+
  theme_bw()+  ggtitle("distribution and density of QTL point estimates")

plot of chunk unnamed-chunk-1

##############################################
##############################################
#Part 2 QTL fine mapping w/ gene expression
##############################################
##############################################

##############################################
#2.1: Load gene expression statistics, generate qvalue statistics and process
all.anova<-read.csv("dd2014_allgenes_geneexpressionstats.csv", header=T)
all.anova.wcyto<-read.csv("dd2014_allgenes_geneexpressionstats_wcyto.csv", header=T)
qvals<-data.frame(rownames(all.anova))
par(mfrow=c(5,1),mar = c(2,4,2,1))
for (i in c("p.geno","p.trt","p.gxe","dry_pval","wet_pval")){
  q.out<-as.numeric(qvalue(all.anova[,i])$qvalue)
  hist(q.out, main=paste("qvalue statistics calculated from", i))
  qvals<-cbind(qvals,as.data.frame(q.out))
}

plot of chunk unnamed-chunk-1

qvals<-qvals[,-1];colnames(qvals)<-c("sig.geno","sig.trt","sig.gxe","dry_qval","wet_qval")
all.anova<-cbind(all.anova,qvals)
summary(all.anova)

      gene.id            marker.id       chromosome       pos.bp        
 AT1G01010:    1   C3_3623963 :  311   Min.   :1.00   Min.   :    1251  
 AT1G01020:    1   C3_4648688 :  225   1st Qu.:1.00   1st Qu.: 5822841  
 AT1G01030:    1   C3_2497708 :  217   Median :3.00   Median :11920794  
 AT1G01040:    1   C3_19161491:  205   Mean   :2.94   Mean   :12591720  
 AT1G01050:    1   2-15986145 :  193   3rd Qu.:4.00   3rd Qu.:18724714  
 AT1G01060:    1   C4_9316828 :  189   Max.   :5.00   Max.   :30426839  
 (Other)  :25618   (Other)    :24284                                    
     pos.cm          f.geno          f.trt            f.gxe      
 Min.   :  0.0   Min.   :  0.0   Min.   :   0.0   Min.   : 0.00  
 1st Qu.: 22.1   1st Qu.:  0.2   1st Qu.:   0.3   1st Qu.: 0.09  
 Median : 51.5   Median :  0.8   Median :   1.9   Median : 0.42  
 Mean   : 48.7   Mean   :  3.5   Mean   :  14.6   Mean   : 1.01  
 3rd Qu.: 71.7   3rd Qu.:  2.7   3rd Qu.:  10.0   3rd Qu.: 1.26  
 Max.   :111.1   Max.   :882.7   Max.   :2003.5   Max.   :64.02  

     p.geno          p.trt            p.gxe           AA.dry     
 Min.   :0.000   Min.   :0.0000   Min.   :0.000   Min.   : 6.68  
 1st Qu.:0.104   1st Qu.:0.0018   1st Qu.:0.264   1st Qu.: 6.99  
 Median :0.366   Median :0.1713   Median :0.519   Median : 7.14  
 Mean   :0.403   Mean   :0.2989   Mean   :0.511   Mean   : 7.26  
 3rd Qu.:0.677   3rd Qu.:0.5598   3rd Qu.:0.759   3rd Qu.: 7.39  
 Max.   :1.000   Max.   :0.9999   Max.   :1.000   Max.   :12.75  

     BB.dry          AA.wet          BB.wet        dry_estAA    
 Min.   : 6.65   Min.   : 6.68   Min.   : 6.67   Min.   : 6.68  
 1st Qu.: 6.99   1st Qu.: 6.99   1st Qu.: 6.99   1st Qu.: 6.99  
 Median : 7.15   Median : 7.14   Median : 7.14   Median : 7.14  
 Mean   : 7.26   Mean   : 7.26   Mean   : 7.26   Mean   : 7.26  
 3rd Qu.: 7.39   3rd Qu.: 7.39   3rd Qu.: 7.39   3rd Qu.: 7.39  
 Max.   :12.53   Max.   :12.36   Max.   :12.36   Max.   :12.75  

   dry_estBB        dry_pval       wet_estAA       wet_estBB    
 Min.   : 6.65   Min.   :0.000   Min.   : 6.68   Min.   : 6.67  
 1st Qu.: 6.99   1st Qu.:0.158   1st Qu.: 6.99   1st Qu.: 6.99  
 Median : 7.15   Median :0.420   Median : 7.14   Median : 7.14  
 Mean   : 7.26   Mean   :0.440   Mean   : 7.26   Mean   : 7.26  
 3rd Qu.: 7.39   3rd Qu.:0.705   3rd Qu.: 7.39   3rd Qu.: 7.39  
 Max.   :12.53   Max.   :1.000   Max.   :12.36   Max.   :12.36  

    wet_pval        sig.geno        sig.trt         sig.gxe     
 Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
 1st Qu.:0.162   1st Qu.:0.322   1st Qu.:0.004   1st Qu.:1.000  
 Median :0.422   Median :0.565   Median :0.190   Median :1.000  
 Mean   :0.440   Mean   :0.489   Mean   :0.219   Mean   :0.991  
 3rd Qu.:0.704   3rd Qu.:0.697   3rd Qu.:0.414   3rd Qu.:1.000  
 Max.   :1.000   Max.   :0.772   Max.   :0.555   Max.   :1.000  

    dry_qval        wet_qval    
 Min.   :0.000   Min.   :0.000  
 1st Qu.:0.529   1st Qu.:0.546  
 Median :0.705   Median :0.712  
 Mean   :0.623   Mean   :0.630  
 3rd Qu.:0.788   3rd Qu.:0.791  
 Max.   :0.839   Max.   :0.843

##############################################
#2.1a: make plots of genomic distribution of pvalues
#set significance (based on qvalues) as a threshold variable
sig.geno.fac<- transform(all.anova, cut=cut(sig.geno, breaks=c(0,.05,1)))$cut
sig.trt.fac<- transform(all.anova, cut=cut(sig.trt, breaks=c(0,.05,1)))$cut
sig.gxe.fac<- transform(all.anova, cut=cut(sig.gxe, breaks=c(0,.05,1)))$cut
dry_pval.fac<- transform(all.anova, cut=cut(dry_pval, breaks=c(0,.05,1)))$cut
wet_pval.fac<- transform(all.anova, cut=cut(wet_pval, breaks=c(0,.05,1)))$cut
all.anova.toplot<-cbind(all.anova,sig.geno.fac,sig.trt.fac,sig.gxe.fac,dry_pval.fac,wet_pval.fac)
facstoplot<-list(c("sig.geno.fac","p.geno","Pvalues of genotype effect (typeIII)"),
                 c("sig.trt.fac","p.trt","Pvalues of environment effect (typeIII)"),
                 c("dry_pval.fac","dry_pval","Pvalues of genotype effect in dry (t.test)"),
                 c("wet_pval.fac","wet_pval","Pvalues of genotype effect in wet (t.test)"))
#threshold is at alpha=0.05 (via Q-values), colored in plots this way
for(i in facstoplot){
  print(ggplot(all.anova.toplot,aes(y=get(i[2]),x=pos.bp,color=get(i[1])))+
    geom_point()+theme_bw()+ggtitle(i[3])+
    scale_y_continuous(trans=reverselog_trans(10))+ facet_grid(chromosome~.))}

plot of chunk unnamed-chunk-1

##############################################
#2.1b Identify genes w/ significant G*E, output for later
head(all.anova)

    gene.id marker.id chromosome pos.bp pos.cm f.geno   f.trt   f.gxe
1 AT1G01010 C1_103654          1   3631  0.186 3.6251 6.63512 7.83503
2 AT1G01020 C1_103654          1   5928  0.186 4.2390 1.87075 0.34134
3 AT1G01030 C1_103654          1  11649  0.186 2.1099 5.92443 1.01925
4 AT1G01040 C1_103654          1  23146  0.186 0.0727 1.53420 0.01366
5 AT1G01050 C1_103654          1  31170  0.186 0.3614 0.03060 0.03141
6 AT1G01060 C1_103654          1  33666  0.186 4.9961 0.06019 0.60216
   p.geno   p.trt    p.gxe AA.dry BB.dry AA.wet BB.wet dry_estAA dry_estBB
1 0.05836 0.01073 0.005631  7.240  7.219  7.245  7.354     7.240     7.219
2 0.04081 0.17294 0.559722  7.198  7.139  7.217  7.184     7.198     7.139
3 0.14793 0.01582 0.313930  7.072  7.119  7.134  7.142     7.072     7.119
4 0.78772 0.21695 0.907088  7.624  7.619  7.606  7.604     7.624     7.619
5 0.54844 0.86131 0.859505  7.469  7.497  7.480  7.495     7.469     7.497
6 0.02652 0.80645 0.438682  7.438  7.551  7.407  7.641     7.438     7.551
  dry_pval wet_estAA wet_estBB wet_pval sig.geno sig.trt sig.gxe dry_qval
1  0.49431     7.245     7.354 0.003923   0.2334 0.01946  0.6680   0.7342
2  0.04312     7.217     7.184 0.352691   0.1902 0.19130  0.9997   0.3092
3  0.11033     7.134     7.142 0.733678   0.3789 0.02728  0.9997   0.4637
4  0.79055     7.606     7.604 0.908446   0.7274 0.22526  0.9997   0.8065
5  0.55787     7.480     7.495 0.763103   0.6489 0.51776  0.9997   0.7545
6  0.27156     7.407     7.641 0.054034   0.1481 0.50217  0.9997   0.6282
  wet_qval
1  0.08107
2  0.67949
3  0.79762
4  0.83166
5  0.80225
6  0.35175

#effect of g*e
ggplot(all.anova.toplot,aes(y=p.gxe,x=pos.bp,color=sig.gxe.fac))+
  geom_point()+theme_bw()+ggtitle("Pvalues of GxE effect (typeIII)")+
  scale_y_continuous(trans=reverselog_trans(10))+
  facet_grid(chromosome~.)

plot of chunk unnamed-chunk-1

gxe.genes<-all.anova[all.anova$sig.gxe<0.05,]
as.character(gxe.genes$gene.id)

 [1] "AT1G05200" "AT1G23940" "AT1G27030" "AT1G48760" "AT1G56510"
 [6] "AT1G62290" "AT1G62810" "AT1G63420" "AT1G67365" "AT1G72800"
[11] "AT1G73010" "AT1G76240" "AT2G15480" "AT2G16990" "AT2G23110"
[16] "AT2G35075" "AT3G47580" "AT4G01360" "AT4G15910" "AT4G19530"
[21] "AT4G21410" "AT4G22990" "AT5G09230" "AT5G09240" "AT5G12230"
[26] "AT5G25980" "AT5G41760" "AT5G42825" "AT5G44310"

##############################################
#2.2: Load other files needed for analysis
cmgenpos<-read.csv("gene_locations_new_McKay_map.csv",header=T, na.strings="NA") #physical position of all genes
geneexp<-read.csv("May_SNP_5_4_12.csv",header=T) #gene expression of all AT genes in drydown
idnames<-read.csv("dd2014_markernametoid.csv",header=T)
colnames(idnames)[2]<-"line"
geneexp<-merge(idnames,geneexp, by="line")
physpos.gene<-cmgenpos[,c(1,4:5)]
annotations<-read.table("Functional_annotation_list.txt",sep="\t",header=F)
colnames(annotations)<-c("gene","annotation")
##############################################
#2.3: Look at the distribution of QTL and effects to figure out where intervals should be
#look at the lod profiles and scanones if needed
#n <- names(model.out);lapply(model.out, function(x) plotLodProfile(x, main=n[substitute(x)[[3]]])
#par(mfrow=c(2,2));for (i in qtlphes) plot(scanone(cross, pheno.col=i,ch=5), main=i)
#make a dataframe with effects to help decide how to structure the regions
cross<-sim.geno(cross, n.draws=64, step=1, stepwidth="max",error.prob=0.01, map.function="kosambi")
effectscan.out<-data.frame()
for (i in qtlphes){
  temp<-data.frame(effectscan(cross, pheno.col=i, draw=F),i)
  temp$st.effect<-scale(temp$a,center=T,scale=T);effectscan.out<-rbind(effectscan.out,temp)
}
#general plots of qtl effects
names(effectscan.out)[3:4]<-c("effect","phenotype")
ggplot(effectscan.out, aes(x=pos, y=st.effect, color=phenotype))+ geom_line()+
  theme_bw()+ggtitle("effect scans (of TSU allele) of all phenotypes")+  facet_wrap(~chr)

plot of chunk unnamed-chunk-1

es.phys<-effectscan.out[effectscan.out$phenotype %in% c("C13.wet","C13.dry","ABAlyoph.dry","proline.dry","g0"),]
ggplot(es.phys, aes(x=pos, y=st.effect, color=phenotype))+geom_line()+theme_bw()+
  facet_wrap(~chr)+ggtitle("effect scans (of TSU allele) of physiological phenotypes")

plot of chunk unnamed-chunk-1

es.growth<-effectscan.out[effectscan.out$phenotype %in% c("GR.dry","RMR.dry","Rel.GR.wet","Rolled.dry","Wilted.dry"),]
ggplot(es.growth, aes(x=pos, y=st.effect, color=phenotype))+
  geom_line()+theme_bw()+  facet_wrap(~chr)+ggtitle("effect scans (of TSU allele) of growth phenotypes")

plot of chunk unnamed-chunk-1

##############################################
#2.4: Check intervals around four regions...
#check out the qtl effects in a region... here is a function
#region needs to be a vector of length 3 (chromosome, start pos, end pos)
#phes needs to be a vector of exactly 2 phenotypes
ploteff<-function(region,phes){
  par(mfrow=c(3,1)); plot(scanone(cross, pheno.col=phes[1], chr=region[1]), main=paste("scanones on ch",region[1], "for phenotypes:", phes[1],phes[2]))
  plot(scanone(cross, pheno.col=phes[2], chr=region[1]), add=T, lty=3, col="red")
  rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
  effectscan(cross, pheno.col=phes[1], draw=T, chr=region[1],main=paste("effect scan, ch=",region[1],"; phenotype=", phes[1]))
  rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
  effectscan(cross, pheno.col=phes[2], draw=T, chr=region[1], main=paste("effect scan, ch=",region[1],"; phenotype=", phes[2]))
  rect(xleft=region[2], xright=region[3],ytop=100,ybottom=-100, col=rgb(0, 1, 0, alpha=0.2), border=F)
}

stats_out<-stepwiseqtl.summary(cross=cross, phes=qtlphes, max.qtls=8, 
                               pens=pens, covar=NULL, type="model", printout=T)

conducting stepwise QTL model selection 
phenotype (1:11) -- ABAlyoph.dry

phenotype (2:11) -- C13.dry

phenotype (3:11) -- C13.wet

phenotype (4:11) -- FT.June

plot of chunk unnamed-chunk-1

phenotype (5:11) -- GR.dry

phenotype (6:11) -- RMR.dry

phenotype (7:11) -- Rel.GR.wet

phenotype (8:11) -- Rolled.dry

plot of chunk unnamed-chunk-1

phenotype (9:11) -- Wilted.dry

phenotype (10:11) -- g0

phenotype (11:11) -- proline.dry

model.out<-stats_out
##############################################
#2.4.1 (reg2.1) --- aba-2.1 - Initially too wide- entire shoulder of peak
#look at wilted too:
statsout<-statsout[complete.cases(statsout),]
reg2.1<-statsout[statsout$chromosome==2 & statsout$position <20,c(2,16,17)]
phes2.1<-c("ABAlyoph.dry","Wilted.dry")
ploteff(region=reg2.1,phes=phes2.1)

plot of chunk unnamed-chunk-1

newint<-data.frame(bayesint(model.out["ABAlyoph.dry"][[1]], qtl.index=1, prob=.75)[1:3,1:3])
reg.test<-reg2.1; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes2.1)

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1)); plotLodProfile(model.out["ABAlyoph.dry"][[1]], main="Lod Profile of ABA, red-90% CI, purple-75% CI") 
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg2.1[2], xright=reg2.1[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)

plot of chunk unnamed-chunk-1

#the narrower region captures the peak much better (without the shoulders)
reg2.1<-reg.test

##############################################
#2.4.2 (reg2.2) ---proline-2.2 
reg2.2<-statsout[statsout$chromosome==2 & statsout$position >60 & statsout$phenotype=="proline.dry",c(2,16,17)]
phes2.2<-c("proline.dry","Rolled.dry")
ploteff(region=reg2.2,phes=phes2.2)

plot of chunk unnamed-chunk-1

newint<-data.frame(bayesint(model.out["proline.dry"][[1]], qtl.index=1, prob=.99)[1:3,1:3])
reg.test<-reg2.2; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes2.2)

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1)); plotLodProfile(model.out["proline.dry"][[1]], main="Lod Profile of proline, purple-90% CI, blue-99% CI",ch=2) 
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg2.2[2], xright=reg2.2[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)

plot of chunk unnamed-chunk-1

#the wider region captures the peak much better (without the shoulders)
reg2.2<-reg.test

##############################################
#2.4.3 (reg4.1) ---d13C-4.1 
reg4.1<-statsout[statsout$chromosome==4 & statsout$position <10 & statsout$phenotype=="C13.wet",c(2,16,17)]
phes4.1<-c("C13.wet","proline.dry")
ploteff(region=reg4.1,phes=phes4.1)

plot of chunk unnamed-chunk-1

##############################################
#2.4.4(reg5.2) ---d13C-5.2 -initially too wide (incorporates 2 peaks)
reg5.2<-statsout[statsout$chromosome==5 & statsout$position >60 & statsout$phenotype=="C13.wet",c(2,16,17)]
phes5.2<-c("C13.wet","C13.dry")
ploteff(region=reg5.2,phes=phes5.2)

newint<-data.frame(bayesint(model.out["C13.wet"][[1]], qtl.index=6, prob=.99)[1:3,1:3])
reg.test<-reg5.2; reg.test[2]<-newint[1,2];reg.test[3]<-newint[3,2]
ploteff(region=reg.test,phes=phes5.2)

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1)); plotLodProfile(model.out["C13.wet"][[1]],ch=5, main="Lod Profile of d13C.wet, red-90% CI, purple-80% CI") 
rect(xleft=reg.test[2], xright=reg.test[3],ytop=100,ybottom=-100, col=rgb(0, 0, 1, alpha=0.2), border=F)
rect(xleft=reg5.2[2], xright=reg5.2[3],ytop=100,ybottom=-100, col=rgb(1, 0, 0, alpha=0.2), border=F)

plot of chunk unnamed-chunk-1

#this looks much better- only single peak in coverages
reg5.2<-reg.test

##############################################
#2.5 list out and plot the regions and focal phenotypes
all.regions<-list(reg2.1,reg2.2,reg4.1,reg5.2)
all.focalphes<-c("ABAlyoph.dry","proline.dry","C13.wet","C13.wet")
all.region.names<-c("reg2.1","reg2.2","reg4.1","reg5.2")
#function to subset the data
findgenes<-function(region){
  reg.in<-as.numeric(region)
  culled.bycm<-all.anova[all.anova$chromosome==reg.in[1] & all.anova$pos.cm>reg.in[2] & all.anova$pos.cm<reg.in[3],]
  temp<-all.anova[
    all.anova$chromosome==reg.in[1] & all.anova$pos.cm>reg.in[2] & all.anova$pos.cm<reg.in[3]  |
      all.anova$chromosome==reg.in[1] & all.anova$pos.bp>min(culled.bycm$pos.bp) & all.anova$pos.bp<max(culled.bycm$pos.bp),]
  return(as.character(temp$gene.id))
}
exp.stats<-all.anova
exp.stats$gene.cat<-as.character(exp.stats$gene.id)
for (i in 1:4){
  temp<-findgenes(all.regions[[i]])
  exp.stats$gene.cat[exp.stats$gene.cat %in% temp]<-all.region.names[i]
}
exp.stats$gene.cat[!exp.stats$gene.cat %in% all.region.names]<-"other"
ggplot(exp.stats, aes(y=pos.cm, x=pos.bp, color=gene.cat))+
  scale_color_manual(values=c("black",rainbow(4)))+
  geom_point()+theme_bw()+ggtitle("physical gene position to cM marker position")+
  facet_wrap(~chromosome)

plot of chunk unnamed-chunk-1

##############################################
#2.6: Extract genes w/ significant expression differences (from ANOVA)
#2.7: Make dataframe with those genes only for covariate analysis
exp.covars.list<-list()
sig.genes.out<-list()
for (i in 1:4){
  temp<-exp.stats[exp.stats$gene.cat==all.region.names[i],]
  #extract 10 most signifiant genes
  siggenes<-temp[temp$sig.geno<as.numeric(quantile(temp$sig.geno,(length(temp[,1])-(length(temp[,1])-10))/length(temp[,1]))),]
  print(all.region.names[i])
  sig.genes<-print(as.character(siggenes$gene.id))
  sig.genes.out[[i]]<-sig.genes
  geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
  geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
  names(idnames)[2]<-"line"
  exp.covars<-merge(idnames,geneexp1, by="line", all=T)
  exp.covars.list[[i]]<-exp.covars
}

[1] "reg2.1"
 [1] "AT2G03140" "AT2G03820" "AT2G04170" "AT2G04378" "AT2G04380"
 [6] "AT2G05810" "AT2G05915" "AT2G07215" "AT2G11140" "AT2G11240"
[1] "reg2.2"
 [1] "AT2G38530" "AT2G39110" "AT2G40960" "AT2G41440" "AT2G42170"
 [6] "AT2G42490" "AT2G43210" "AT2G43530" "AT2G43680" "AT2G43980"
[1] "reg4.1"
 [1] "AT4G00650" "AT4G00710" "AT4G00720" "AT4G00740" "AT4G00755"
 [6] "AT4G00840" "AT4G00880" "AT4G00893" "AT4G00900" "AT4G00975"
[1] "reg5.2"
 [1] "AT5G53700" "AT5G53930" "AT5G54710" "AT5G54720" "AT5G54730"
 [6] "AT5G58575" "AT5G61410" "AT5G63020" "AT5G63030" "AT5G67480"

#2.8 Run scanone for w/ and w/o covars
outall<-data.frame(chr=numeric(), pos=numeric(),lod=numeric(),
                   phe=character(),covar=character(),
                   gene=character(),expression_env=character())
for (i in all.focalphes){
  temp<-cbind(as.data.frame(scanone(cross,pheno.col=i)),i,"nocovar","nocovar","nocovar")
  colnames(temp)[4:7]<-c("phe","covar","gene","expression_env")
  outall<-rbind(outall,temp)
}
for (j in 1:4){
  for (i in colnames(exp.covars.list[j][[1]])[3:length(colnames(exp.covars.list[j][[1]]))]){
    s1.out<-scanone(cross,pheno.col=all.focalphes[j],addcovar=exp.covars.list[[j]][,i])
    s1.out<-cbind(as.data.frame(s1.out),as.character(all.focalphes[j]),as.character(i),
                  unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
    colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
    outall<-rbind(outall,s1.out)
  }
}

##############################################
#2.9: Plot covariate scans
#for all phenotypes, just the focal ch
#inputs- 
# toplot: the full scanout-out object, 
#region: the region and phenotype: the phenotype
#maxcm- maximum cm to plot (mincm same way)
#plotcovscan<-function(toplot, region, phenotype, maxcm, mincm, genes)
if(!exists("plotcovscan", mode="function")) source("plotcovscan_function.R")
#plot all regions
max.cm<-c(45,100,45,100)
min.cm<-c(0,45,0,45)
for (i in 1:4){
  print(
  plotcovscan(outall,region=all.regions[[i]],phenotype=all.focalphes[i],
              genes=sig.genes.out[[i]],maxcm=max.cm[i], mincm=min.cm[i])
  )
}

plot of chunk unnamed-chunk-1

##############################################
#2.10: Summarize covariate analysis
for (i in 1:4) cat(all.focalphes[i],"-",all.region.names[i],":  ", sig.genes.out[[i]], sep=" ", "\n")

ABAlyoph.dry - reg2.1 :   AT2G03140 AT2G03820 AT2G04170 AT2G04378 AT2G04380 AT2G05810 AT2G05915 AT2G07215 AT2G11140 AT2G11240 
proline.dry - reg2.2 :   AT2G38530 AT2G39110 AT2G40960 AT2G41440 AT2G42170 AT2G42490 AT2G43210 AT2G43530 AT2G43680 AT2G43980 
C13.wet - reg4.1 :   AT4G00650 AT4G00710 AT4G00720 AT4G00740 AT4G00755 AT4G00840 AT4G00880 AT4G00893 AT4G00900 AT4G00975 
C13.wet - reg5.2 :   AT5G53700 AT5G53930 AT5G54710 AT5G54720 AT5G54730 AT5G58575 AT5G61410 AT5G63020 AT5G63030 AT5G67480

# ##############################################
# #2.9: Extract go terms from significant genes
# library(biomaRt)
# mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
# results <- getBM(attributes = c("refseq_dna", "go_molecular_function_id",
#                                 "go_molecular_function__dm_name_1006"), filters = "refseq_dna",
#                  values = c("NM_030621"), mart = mart)
##############################################
#2.10: Summarize candidate genes

##############################################
##############################################
#Part 3: Effect of cytoplasm
##############################################
###############################################
#read in gene expression statistics w/ cytoplasm as a term

##############################################
#3.1: T.tests and varcomp analysis of cytoplasm
cyto.vc<-data.frame(cbind(cross$phe, covs))
vc.out<-data.frame()
for (i in 2:38){
  data.in<-cyto.vc[,c(i,41)]; colnames(data.in)[1]<-"pheno.in"
  lmer.out<-lmer(pheno.in~ 1 | cytoplasm, data=data.in)
  out.ttest<-t.test(pheno.in~cytoplasm, data=data.in)$p.value
  out<-cbind(colnames(cyto.vc)[i],data.frame(as.numeric(VarCorr(lmer.out))), attr(VarCorr(lmer.out), "sc")^2,out.ttest)
  vc.out<-rbind(vc.out, out)
}
colnames(vc.out)[1:4]<-c("phenotype","vc.cyto","vc.residual","ttest.pvalue")
vc.out$prop.cyto<-vc.out$vc.cyto/(vc.out$vc.residual+vc.out$vc.cyto)
hist(vc.out$prop.cyto)

plot of chunk unnamed-chunk-1

ggplot(vc.out, aes(x=prop.cyto,y=ttest.pvalue))+
  geom_text(aes(label=phenotype))+
  scale_y_continuous(trans=reverselog_trans(10))+
  scale_x_continuous(expand=c(.02,.02))+
  geom_hline(yintercept=0.05, lty=3)+
  theme_bw()+ggtitle("")

plot of chunk unnamed-chunk-1

##############################################
#3.2: Plots and summary of cytoplasm Scanones
cytocovar<-read.csv("dd2014_cytocovar.csv", header=T)
covs<-read.csv("dd2014_cytocovar.csv", header=T)
addcov<-NULL; intcov<-NULL #model w/out covariates
cyto<-as.data.frame(covs$cyto.num)
#oneway scans
for (i in qtlphes){
  nocovar<-scanone(grid,pheno.col=i, method="hk")
  addcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto)
  intcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto, intcovar=cyto)
  ylim<-c(0,max(max(nocovar$lod),max(addcov$lod),max(intcov$lod)))
  plot(nocovar,main=i,ylim=ylim);plot(addcov, add=T, col="red",lty=2);plot(intcov, add=T, col="blue",lty=2)
}

plot of chunk unnamed-chunk-1

#plot the difference between models
par(mfrow=c(1,1))
for (i in qtlphes){
  nocovar<-scanone(grid,pheno.col=i, method="hk")
  addcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto)
  intcov<-scanone(grid,pheno.col=i, method="hk",addcovar=cyto, intcovar=cyto)
  ylim1<-c(min(min(addcov$lod-nocovar$lod),min(intcov$lod-nocovar$lod),min(intcov$lod-addcov$lod)))
  ylim2<-c(max(max(addcov$lod-nocovar$lod),max(intcov$lod-nocovar$lod),max(intcov$lod-addcov$lod))+2)
  plot(addcov-nocovar,main=i,
       lty=2, col=rgb(1,0,0,alpha=.7), ylim=c(ylim1,ylim2))
  plot(intcov-nocovar,col=rgb(0,0,1,alpha=.4),lty=1, add=T)
  plot(intcov-addcov,col=rgb(0,1,0),lty=3, add=T)
  abline(h=pens[i,1], lty=3)
  legend("topright",legend= c("addcov-nocov", "intcov-nocov", "int-add", "threshold"), 
         col=c("red","blue","green","black"), lty=c(2,1,3,3))
  print(i); print(max(addcov-nocovar));print(max(intcov-nocovar)); cat("\n")
}

plot of chunk unnamed-chunk-1

[1] "ABAlyoph.dry"
           chr  pos   lod
C3_5631875   3 19.7 0.739
          chr  pos  lod
4_4804292   4 30.3 1.49

plot of chunk unnamed-chunk-1

[1] "C13.dry"
            chr  pos   lod
C2_16916847   2 75.1 0.237
         chr  pos  lod
2.Frag50   2 79.1 2.56

plot of chunk unnamed-chunk-1

[1] "C13.wet"
            chr  pos  lod
C5_24599755   5 77.5 0.63
          chr  pos  lod
At2g22200   2 47.7 1.59

plot of chunk unnamed-chunk-1

[1] "FT.June"
        chr pos   lod
4.F6N15   4   0 0.432
          chr  pos  lod
C5_896146   5 7.41 1.99

plot of chunk unnamed-chunk-1

[1] "GR.dry"
          chr  pos    lod
3_7307956   3 26.5 0.0595
            chr  pos  lod
C4_18038808   4 81.4 2.36

plot of chunk unnamed-chunk-1

[1] "RMR.dry"
        chr  pos   lod
c3.loc6   3 6.99 0.787
         chr  pos  lod
3_620734   3 1.44 1.06

plot of chunk unnamed-chunk-1

[1] "Rel.GR.wet"
         chr  pos    lod
c3.loc30   3 50.9 0.0748
         chr  pos  lod
c5.loc26   5 54.7 2.56

plot of chunk unnamed-chunk-1

[1] "Rolled.dry"
         chr  pos   lod
c2.loc44   2 74.4 0.679
          chr  pos  lod
4_4804292   4 30.3 1.88

plot of chunk unnamed-chunk-1

[1] "Wilted.dry"
         chr  pos    lod
c2.loc12   2 15.5 0.0353
           chr  pos  lod
4_14385343   4 66.3 2.49

plot of chunk unnamed-chunk-1

[1] "g0"
          chr  pos    lod
1_2696684   1 9.35 0.0581
            chr pos   lod
C3_16998392   3  61 0.772

plot of chunk unnamed-chunk-1

[1] "proline.dry"
         chr  pos  lod
c2.loc44   2 74.4 4.79
            chr  pos  lod
C2_16916847   2 75.1 5.36

cyto.phes<-c("proline.dry","C13.dry")
#######################
#3.3  multiple QTL model generation and statistical analysis w/ cytoplasm as a covariate (additive)
cov_stats_out<-data.frame(phenotype=character(),chromosome=numeric(),position=numeric(),
                      df=numeric(),type3SS=numeric(),LOD=numeric(),perc.var=numeric(),Fstat=numeric(),P.chi2=numeric(),P.F=numeric(),
                      effect.estimate=numeric(),effect.SE=numeric(),effect.t=numeric(),
                      lowCImarker=character(), hiCImarker=character(),
                      lowCIpos=character(), hiCIpos=character())
statcols<-names(cov_stats_out)
par(mfrow=c(4,1),mar = c(2,4,2,1))
#this is the engine that generates statistics for each phenotype
names(cyto)<-"cytoplasm"

##############################################
#3.4:manual model selection for QTL*covariate interactions
#d13C.dry interacting locus:2@79.1
model.out[[cyto.phes[2]]]

  QTL object containing genotype probabilities. 

     name chr    pos n.gen
Q1 3@58.5   3 58.473     2

  Formula: y ~ Q1 

  pLOD:  0.659

model.in<-model.out[[cyto.phes[2]]]
model.in1<-makeqtl(cross, chr=3, pos=58.473) #from stepwise
model.in2<-addtoqtl(cross, qtl=model.in1, chr=2, pos=79.1)
formula.in1<- y ~cytoplasm + Q1 + Q2 + cytoplasm:Q2
fit1<-fitqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, covar=cyto,formula=formula(formula.in1),
       method="hk")
ref.C13.dry<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, covar=cyto,formula=formula(formula.in1),method="hk")

pos: 58.47 79.08 
Iteration 1 
 Q2 pos: 79.08 -> 76.29
    LOD increase:  0.155 
 Q1 pos: 58.47 -> 58.3
    LOD increase:  0.022 
all pos: 58.47 79.08 -> 58.3 76.29 
LOD increase at this iteration:  0.177 
Iteration 2 
 Q1 pos: 58.3 -> 58.3
    LOD increase:  0 
 Q2 pos: 76.29 -> 76.29
    LOD increase:  0 
all pos: 58.3 76.29 -> 58.3 76.29 
LOD increase at this iteration:  0 
overall pos: 58.47 79.08 -> 58.3 76.29 
LOD increase overall:  0.177

fit2<-fitqtl(cross, pheno.col=cyto.phes[2], qtl=ref.C13.dry, covar=cyto,formula=formula(formula.in1),
             method="hk")
plotLodProfile(ref.C13.dry)
summary(fit1)


        fitqtl summary

Method: multiple imputation 
Model:  normal phenotype
Number of observations : 326 

Full model result
----------------------------------  
Model formula: y ~ cytoplasm + Q1 + Q2 + cytoplasm:Q2 

       df     SS     MS   LOD  %var Pvalue(Chi2) Pvalue(F)
Model   4   9.56 2.3900 6.864 9.241      2.3e-06 2.762e-06
Error 321  93.90 0.2925                                   
Total 325 103.46                                          


Drop one QTL at a time ANOVA table: 
----------------------------------  
                 df Type III SS   LOD  %var F value Pvalue(Chi2) Pvalue(F)
cytoplasm         2       3.337 2.473 3.226   5.705        0.003  0.003676
3@58.5            1       3.163 2.345 3.057  10.814        0.001  0.001119
2@79.1            2       4.750 3.493 4.591   8.119        0.000  0.000363
cytoplasm:2@79.1  1       2.808 2.086 2.714   9.598        0.002  0.002120

cytoplasm        ** 
3@58.5           ** 
2@79.1           ***
cytoplasm:2@79.1 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(fit2)


        fitqtl summary

Method: multiple imputation 
Model:  normal phenotype
Number of observations : 326 

Full model result
----------------------------------  
Model formula: y ~ cytoplasm + Q1 + Q2 + cytoplasm:Q2 

       df      SS     MS   LOD  %var Pvalue(Chi2) Pvalue(F)
Model   4   9.795 2.4486 7.041 9.467    1.567e-06  1.89e-06
Error 321  93.661 0.2918                                   
Total 325 103.455                                          


Drop one QTL at a time ANOVA table: 
----------------------------------  
                 df Type III SS   LOD  %var F value Pvalue(Chi2) Pvalue(F)
cytoplasm         2       2.680 1.997 2.591   4.593        0.010  0.010797
3@58.3            1       3.142 2.336 3.037  10.770        0.001  0.001145
2@76.3            2       5.017 3.694 4.850   8.597        0.000  0.000231
cytoplasm:2@76.3  1       2.094 1.565 2.024   7.175        0.007  0.007773

cytoplasm        *  
3@58.3           ** 
2@76.3           ***
cytoplasm:2@76.3 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##############################################
#3.5 plot interactions
par(mfrow=c(2,1),mar = c(2,4,2,1))

plot of chunk unnamed-chunk-1

mname<-find.marker(cross, 2,76.3) #from manual fit C13.dry
cyto.eff<-as.numeric(covs$cyto.num)
effectplot(cross, pheno.col=cyto.phes[2], mname2="cytoplasm", mark2=cyto.eff, geno2=c("KAS","TSU"),
           mname1=mname)
mname<-find.marker(cross, 2,74.357) #from manual fit proline.dry
effectplot(cross, pheno.col=cyto.phes[1], mname2="cytoplasm", mark2=cyto.eff, geno2=c("KAS","TSU"),
           mname1=mname)

plot of chunk unnamed-chunk-1

##############################################
#3.6 plot lod profiles-
#
ref.C13.dry.nocov<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2,
                             formula= y ~ Q1 + Q2 ,method="hk")

pos: 58.47 79.08 
Iteration 1 
 Q2 pos: 79.08 -> 74.36
    LOD increase:  0.855 
 Q1 pos: 58.47 -> 59.13
    LOD increase:  0.044 
all pos: 58.47 79.08 -> 59.13 74.36 
LOD increase at this iteration:  0.898 
Iteration 2 
 Q1 pos: 59.13 -> 59.13
    LOD increase:  0 
 Q2 pos: 74.36 -> 74.36
    LOD increase:  0 
all pos: 59.13 74.36 -> 59.13 74.36 
LOD increase at this iteration:  0 
overall pos: 58.47 79.08 -> 59.13 74.36 
LOD increase overall:  0.898

#
ref.proline.dry.nocov<-refineqtl(cross, pheno.col=cyto.phes[1], qtl=model.out[[cyto.phes[1]]],
                                   formula=formula(model.out[[cyto.phes[1]]]),method="hk")

pos: 74.36 50.17 2.791 
Iteration 1 
 Q1 pos: 74.36 -> 74.36
    LOD increase:  0 
 Q2 pos: 50.17 -> 50.17
    LOD increase:  0 
 Q3 pos: 2.791 -> 2.791
    LOD increase:  0 
all pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791 
LOD increase at this iteration:  0 
overall pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791 
LOD increase overall:  0

#
formula.in1<- y ~cytoplasm + Q1 + Q2 + Q3 + cytoplasm:Q1
ref.proline.dry<-refineqtl(cross, pheno.col=cyto.phes[1], qtl=model.out[[cyto.phes[1]]],
                           covar=cyto,formula=formula.in1,method="hk")

pos: 74.36 50.17 2.791 
Iteration 1 
 Q1 pos: 74.36 -> 74.36
    LOD increase:  0 
 Q3 pos: 2.791 -> 2.791
    LOD increase:  0 
 Q2 pos: 50.17 -> 50.17
    LOD increase:  0 
all pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791 
LOD increase at this iteration:  0 
overall pos: 74.36 50.17 2.791 -> 74.36 50.17 2.791 
LOD increase overall:  0

#
model.out[[cyto.phes[2]]]

  QTL object containing genotype probabilities. 

     name chr    pos n.gen
Q1 3@58.5   3 58.473     2

  Formula: y ~ Q1 

  pLOD:  0.659

model.in<-model.out[[cyto.phes[2]]]
model.in1<-makeqtl(cross, chr=3, pos=58.473) #from stepwise
model.in2<-addtoqtl(cross, qtl=model.in1, chr=2, pos=79.1)
formula.in1<- y ~cytoplasm + Q1 + Q2 + cytoplasm:Q2
ref.C13.dry<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, 
                       covar=cyto,formula=formula(formula.in1),method="hk")

pos: 58.47 79.08 
Iteration 1 
 Q1 pos: 58.47 -> 58.47
    LOD increase:  0 
 Q2 pos: 79.08 -> 76.29
    LOD increase:  0.155 
all pos: 58.47 79.08 -> 58.47 76.29 
LOD increase at this iteration:  0.155 
Iteration 2 
 Q2 pos: 76.29 -> 76.29
    LOD increase:  0 
 Q1 pos: 58.47 -> 58.3
    LOD increase:  0.022 
all pos: 58.47 76.29 -> 58.3 76.29 
LOD increase at this iteration:  0.022 
Iteration 3 
 Q1 pos: 58.3 -> 58.3
    LOD increase:  0 
 Q2 pos: 76.29 -> 76.29
    LOD increase:  0 
all pos: 58.3 76.29 -> 58.3 76.29 
LOD increase at this iteration:  0 
overall pos: 58.47 79.08 -> 58.3 76.29 
LOD increase overall:  0.177

proline.dry.lp<-as.data.frame(attr(ref.proline.dry, "lodprofile")[[1]])
C13.dry.lp<-as.data.frame(attr(ref.C13.dry, "lodprofile")[[2]])
proline.dry.nocov.lp<-as.data.frame(attr(ref.proline.dry.nocov, "lodprofile")[[1]])
C13.dry.nocov.lp<-as.data.frame(attr(ref.C13.dry.nocov, "lodprofile")[[2]])

lps2.2cov<-cbind(proline.dry.lp, C13.dry.lp[,3],proline.dry.nocov.lp[,3],C13.dry.nocov.lp[,3])
colnames(lps2.2cov)<-c("chr","pos","proline.dry","C13.dry","proline.dry.nocov","C13.dry.nocov")
toplot.lps<-melt(lps2.2cov, id.vars=c("chr","pos"))
length(toplot.lps[,1])/4

[1] 128

phe<-rep(c("C13.dry","proline.dry","C13.dry","proline.dry"), each=128)
covariate<-rep(c("cyto.interaction", "no.covariate"), each=256)
toplot.lps<-cbind(toplot.lps, phe, covariate)
ggplot(toplot.lps, aes(x=pos, y=value, col=covariate))+
  geom_line()+facet_grid(phe~., scales="free")+theme_bw()

plot of chunk unnamed-chunk-1

##############################################
#3.7 calculate new condifence intervals for region
ciout.C13dry<-bayesint(ref.C13.dry,qtl.index=2, expandtomarkers=T, prob=.9)
ciout.prolinedry<-bayesint(model.out[[cyto.phes[1]]],qtl.index=1, expandtomarkers=T, prob=.99)
reg2.2cov<-reg2.2; reg2.2cov[2]<-ciout.C13dry[1,2]; reg2.2cov[3]<-ciout.C13dry[3,2]

##############################################
#3.8 find genes for this regions (w/out cytoplasm effect in gene model)
#set the region
all.regions<-list(reg2.2cov)
all.focalphes<-c("proline.dry","C13.dry")
all.region.names<-c("reg2.2cov")
#plot region
exp.stats<-all.anova
exp.stats$gene.cat<-as.character(exp.stats$gene.id)
reg2.2cov.genes<-findgenes(all.regions[[1]])
exp.stats$gene.cat[exp.stats$gene.cat %in% reg2.2cov.genes]<-all.region.names[1]
exp.stats$gene.cat[!exp.stats$gene.cat %in% all.region.names]<-"other"
ggplot(exp.stats, aes(y=pos.cm, x=pos.bp, color=gene.cat))+
  scale_color_manual(values=c("black",rainbow(4)))+
  geom_point()+theme_bw()+ggtitle("physical gene position to cM marker position")+
  facet_wrap(~chromosome)

plot of chunk unnamed-chunk-1

##############################################
#3.9 Extract genes w/ significant expression differences (from ANOVA)
exp.covars.list<-list()
sig.genes.out<-list()
for (i in 1){
  temp<-exp.stats[exp.stats$gene.cat==all.region.names[i],]
  #extract 10 most signifiant genes
  siggenes<-temp[temp$sig.geno<as.numeric(quantile(temp$sig.geno,(length(temp[,1])-(length(temp[,1])-10))/length(temp[,1]))),]
  print(all.region.names[i])
  sig.genes<-print(as.character(siggenes$gene.id))
  sig.genes.out[[i]]<-sig.genes
  geneexp.culled<-geneexp[,c("treatment","line",sig.genes)]
  geneexp1<-reshape(geneexp.culled, idvar="line", timevar="treatment",direction="wide")
  names(idnames)[2]<-"line"
  exp.covars<-merge(idnames,geneexp1, by="line", all=T)
  exp.covars.list[[i]]<-exp.covars
}

[1] "reg2.2cov"
 [1] "AT2G39890" "AT2G40960" "AT2G41440" "AT2G42170" "AT2G42490"
 [6] "AT2G43210" "AT2G43530" "AT2G43680" "AT2G43980" "AT2G44290"

geneexp.culled<-geneexp[,c("treatment","line","cyto","id",reg2.2cov.genes)]
length(reg2.2cov.genes)

[1] 623

for (i in 5:length(geneexp.culled)){
  exp.in<-geneexp.culled[,c(1:4,i)]
  names(exp.in)[5]<-"expression"
  anova.out<-aov(expression ~ treatment, data= exp.in)
}


# #2.8 Run scanone for w/ and w/o covars
# outall<-data.frame(chr=numeric(), pos=numeric(),lod=numeric(),
#                    phe=character(),covar=character(),
#                    gene=character(),expression_env=character())
# for (i in all.focalphes){
#   temp<-cbind(as.data.frame(scanone(cross,pheno.col=i)),i,"nocovar","nocovar","nocovar")
#   colnames(temp)[4:7]<-c("phe","covar","gene","expression_env")
#   outall<-rbind(outall,temp)
# }
# for (j in 1:4){
#   for (i in colnames(exp.covars.list[j][[1]])[3:length(colnames(exp.covars.list[j][[1]]))]){
#     s1.out<-scanone(cross,pheno.col=all.focalphes[j],addcovar=exp.covars.list[[j]][,i])
#     s1.out<-cbind(as.data.frame(s1.out),as.character(all.focalphes[j]),as.character(i),
#                   unlist(strsplit(i,"[.]"))[1],unlist(strsplit(i,"[.]"))[2])
#     colnames(s1.out)[4:7]<-c("phe","covar","gene","expression_env")
#     outall<-rbind(outall,s1.out)
#   }
# }
# 
# ref.C13.dry<-refineqtl(cross, pheno.col=cyto.phes[2], qtl=model.in2, covar=cyto,formula=formula(formula.in1),method="hk")
# 
# lowmarker<-rownames(ciout)[1]; highmarker<-rownames(ciout)[3]
# lowposition<-ciout[1,2]; highposition<-ciout[3,2]
# cis<-rbind(cis,cbind(lowmarker,highmarker,lowposition,highposition))
# 
#        method="hk")
# addint(cross, pheno.col=cyto.phes[2], qtl=model.in, covar=cyto,formula=formula(model.in),
#        method="hk")
# addpair(cross, pheno.col=cyto.phes[2], qtl=model.in, covar=cyto,formula=formula(model.in),
#         method="hk")
# 
# out1.c4r <- addqtl(hyper, qtl=revqtl, formula=y˜Q3)
# plot(out1.c4, out1.c4r, col=c("blue", "red"))
# plot(out1.c4r - out1.c4, ylim=c(-1.7, 1.7))
# abline(h=0, lty=2, col="gray")
# out2.c4r <- addpair(hyper, qtl=revqtl, formula=y˜Q3, chr=c(1,6,7,15))
# 
# 
# 
# ##############################################
# #3.3: Bioinformatics of DNA sequence polymorphism in cytoplasm
# 
# ##############################################
# #3.4: Determination of candidate genes in cytoplasm
# 
# ##############################################
# #3.5: Cytoplasm main effect in QTLs (oneways)
# 
# ##############################################
# #3.6: Cytoplasm main effect in multi-QTL models
# 
# ##############################################
# #3.7: Cytoplasm interactive effects in QTLs (oneway)- compare w/ 3.3
# 
# ##############################################
# #3.8: Interaction plots of QTL*cyto and cyto main effect at proline
# 
# ##############################################
# #3.8: Bioinformatics of potential QTL-by-cytoplasm interactions
# 
# ##############################################
# ##############################################
# #Part 4: Epistasis
# ##############################################
# ##############################################
# 
# ##############################################
# #4.1: Extract epistasis components from stepwiseqtl
# 
# ##############################################
# #4.2: Interaction plots of epistasis
# 
# ##############################################
# #4.3: Scantwo of epistasis
# 
# ##############################################
# #4.4: fitqtl of epistasis
# 
# ##############################################
# ##############################################
# #Part 5: QTL*environment interactions
# ##############################################
# ##############################################
# 
# ##############################################
# #5.1: work on later...