# Differential expression analysis of Hal Fil F1 population
# Script #2
# Author- JT Lovell
# Data - 25-March 2015
# Cis -trans test post-processing, plotting and generation of figures for MS

# Table of contents:
#1. Get data in shape, QC
#2. 
#3. 
#4. 

setwd("~/Library/Mobile Documents/com~apple~CloudDocs/ph2015_eqtl")
library(qvalue)
# Part 1: Set up data
# 1.1: Read in the data
ct.stats<-read.csv("ct2015_lmstats.csv")
# resid<-read.csv("ct2015_allexpresiduals.csv")
# norm.counts<-read.csv("ct2015_deseqf012_normcounts.csv")
# time.stats<-read.csv("ct2015_timetrtstats.csv")

# 1.2: Generate adjusted p-values
for(i in colnames(ct.stats)[which(grepl("p_", colnames(ct.stats)))]){
  dat<-ct.stats[,i]
  qs<-data.frame(qvalue(as.numeric(dat))$qvalue)
  colnames(qs)<-gsub("p_","q_",i)
  ct.stats<-cbind(ct.stats,qs)
}
# 1.3: Generate q-value ranks of genes for plotting
for(i in colnames(ct.stats)[which(grepl("p_", colnames(ct.stats)))]){
  dat<-ct.stats[,i]
  rs<-data.frame(rank(dat))
  colnames(rs)<-gsub("p_","rank_",i)
  ct.stats<-cbind(ct.stats,rs)
}
#1.4: Generate "best" trans column
ct.stats$p_best.trans<-with(ct.stats,ifelse(p_trans.hal<p_trans.fil, p_trans.hal,p_trans.fil))
ct.stats$q_best.trans<-with(ct.stats,ifelse(p_trans.hal<p_trans.fil, q_trans.hal,q_trans.fil))
ct.stats$rank_best.trans<-rank(ct.stats$p_best.trans)

ct.stats$p_best.tt<-with(ct.stats,ifelse(p_th.trt<p_tf.trt, p_th.trt,p_tf.trt))
ct.stats$q_best.tt<-with(ct.stats,ifelse(p_th.trt<p_tf.trt, q_th.trt,q_tf.trt))
ct.stats$rank_best.tt<-rank(ct.stats$p_best.tt)

# 1.5: basic QC... make sure p-value/qvalue distributions look ok
par(mfrow=c(2,1))
for(i in colnames(ct.stats)[grep("p_", colnames(ct.stats))]){
  j<-gsub("p_","q_",i)
  hist(ct.stats[,i],main=i,xlim=c(0,1),breaks=((max(ct.stats[,i])-min(ct.stats[,i]))*1000))
  hist(ct.stats[,j],main=j,xlim=c(0,1),breaks=((max(ct.stats[,j])-min(ct.stats[,j]))*1000))
  cat(gsub("p_","",i),": n. p<0.05 = ", length(ct.stats[,i][ct.stats[,i]<0.05]),
      "  ..  n. q<0.05 = ", length(ct.stats[,j][ct.stats[,j]<0.05]),
      "  ..  n. q<0.1 = ", length(ct.stats[,j][ct.stats[,j]<0.1]),
      "\n", sep="")
}

## intercept: n. p<0.05 = 7253  ..  n. q<0.05 = 8791  ..  n. q<0.1 = 10993

## cis: n. p<0.05 = 5753  ..  n. q<0.05 = 5215  ..  n. q<0.1 = 6385

## trans.fil: n. p<0.05 = 1412  ..  n. q<0.05 = 96  ..  n. q<0.1 = 186

## trans.hal: n. p<0.05 = 1199  ..  n. q<0.05 = 45  ..  n. q<0.1 = 88

## trt: n. p<0.05 = 831  ..  n. q<0.05 = 4  ..  n. q<0.1 = 15

## cis.trt: n. p<0.05 = 1124  ..  n. q<0.05 = 164  ..  n. q<0.1 = 260

## tf.trt: n. p<0.05 = 705  ..  n. q<0.05 = 0  ..  n. q<0.1 = 1

## th.trt: n. p<0.05 = 535  ..  n. q<0.05 = 0  ..  n. q<0.1 = 0

## best.trans: n. p<0.05 = 2416  ..  n. q<0.05 = 137  ..  n. q<0.1 = 266

## best.tt: n. p<0.05 = 1198  ..  n. q<0.05 = 0  ..  n. q<0.1 = 1
# length(ct.stats$q_th.trt[ct.stats$q_th.trt<0.05])
# length(ct.stats$p_best.trans[ct.stats$q_best.trans<0.05])
# 
# 
# with(ct.stats, plot(rank_cis,p_cis, pch="."))
# 
# # Part 2: Summary plots:
# # 2.1: Histograms and volcano plots
# # 2.2: Venn diagrams
# # 2.3: Ternary plots
# 
# # Part 3: Clustering
# # 3.1: All genes - categorize by significance
# # 3.2: Cis * treatment directional effects 
# # 3.1: 
# 
# # Part 2:
# # Part 2:
# 
# lmstats<-read.csv("ct2015_lmstats.csv")
# pdf("ct2015_statsfromresidmodel.pdf")
# par(mfrow=c(2,1))
# for( i in 1:8){
#   dat<-stats2.out[,c(i,i+8,i+16)]
#   hist(dat[,3], main=sn[i], xlab="pvalues", breaks=100)
#   plot(dat[,1], -log10(dat[,3]), ylab="-log10 Pvalue", xlab="effect estimate", main=sn[i])
# }   
# dev.off()
# r2<-summary(lm.out)[[8]]
# stats<-data.frame(ptime,ptt,r2)
# stats.out<-rbind(stats.out,stats)
# print(length(stats.out[,1]))
# }
# 
# }
# 
# stats2.out[stats2.out$p_th.trt<0.00001,]
# stats2.out[stats2.out$p_tf.trt<0.00001,]
# dds$Treatment <- relevel(dds$Treatment, "Dry")
# dds$generation <- relevel(dds$generation, "F0")
# dds$allele <- relevel(dds$allele, "fil")
# 
# dge <- DGEList(counts=counts.t)
# dge <- calcNormFactors(dge)
# v <- voom(dge,plot=TRUE)
# i="Pahalv11b035147m.g"
# dat<-all.raw[,c("Treatment","generation","allele","cis","trans.fil","trans.hal",
#                 "time.sampled","VWC_July",i)]
# dat$Pahalv11b035147m.g[which(dat$Pahalv11b035147m.g<100)]<-NA
# dat$qn<-quantnorm(dat[,i])
# 
# ggplot(dat, aes(x=time.sampled, y=qn,color=Treatment, shape=allele))+
#   geom_point()+theme_classic()+
#   geom_smooth(aes(group=Treatment), method="lm", fullrange=TRUE)
# colnames(dat)[9]<-"counts.knmm"
# dat$counts.knmm<-as.numeric(as.character(dat$counts.knmm))
# dat$qn<-quantnorm(dat$counts.knmm)
# nb1<-glm.nb(counts.knmm ~ cis + trans.fil + trans.hal + 
#               Treatment + time.sampled + Treatment*time.sampled +
#               cis*Treatment + trans.fil*Treatment+ trans.hal*Treatment,link="log",
#             data=dat)
# 
# library(MASS)
# library(ggplot2)
# lm.out<-data.frame()
# for(i in all.genes){
#   dat<-kd[,c("Treatment","generation","Allele","Cis","Trans_fil","Trans_hal",
#              "time.sampled","MD_LWP_July",i)]
#   colnames(dat)[9]<-"counts.knmm"
#   dat$counts.knmm<-as.numeric(as.character(dat$counts.knmm))
#   dat$qn<-quantnorm(dat$counts.knmm)
#   nb1<-glm.nb(counts.knmm~generation+Allele+Treatment+
#                 #MD_LWP_July+
#                 time.sampled+
#                 #generation*MD_LWP_July+Allele*MD_LWP_July+
#                 #generation*time.sampled+Allele*time.sampled+
#                 generation*Treatment+Allele*Treatment,link="log",
#               data=dat)
#   
#   hcc<-data.frame(t(hal.counts.cull))
#   fcc<-data.frame(t(fil.counts.cull))
#   
#   
#   
#   all.raw2<-all.raw[,grep("Pahalv", colnames(all.raw))]
#   raw.to.jmpgenomics<-data.frame(t(all.raw2))
#   raw.to.jmpgenomics[1:5,1:5]
#   write.csv(raw.to.jmpgenomics, file="ct2015_rawcounts_tojmpg.csv", row.names=T)
#   write.csv(design, file="ct2015_design_tojmpg.csv", row.names=F)
#   
#   #look at some particular genes
#   colnames(all.counts)
#   
#   xu<-data.frame(t(all.counts[c("Pahalv11b016018m", "Pahalv11b035147m","Pahalv11b011369m", "Pahalv11b055185m"),]))
#   xu$jgi.id<-rownames(xu)
#   # xu<-merge(info,xu,by="jgi.id")
#   test<-melt(xu, id.vars="jgi.id")
#   colnames(test)[2:3]<-c("gene","counts")
#   xu<-merge(info,test,by="jgi.id")
#   xu<-xu[-grep("FH",xu$id),]
#   xu$id<-as.character(xu$id)
#   
#   time.sampled <- as.character(xu$Tissue_Time_July)
#   time.sampled<-sapply(strsplit(time.sampled,":"),
#                        function(x) {
#                          x <- as.numeric(x)
#                          (x[1]-10)*60+(x[2]-58)
#                        }
#   )
#   
#   xu<-xu[,c("id","Treatment","counts","id","gene")]
#   xu$time<-time.sampled
#   pdf("xiaoyu.plots.pdf")
#   ggplot(xu, aes(x=id.1,y=counts,color=Treatment))+
#     geom_boxplot(position="dodge")+
#     facet_wrap(~gene, scales="free")+
#     theme_bw()
#   ggplot(xu, aes(x=time,y=counts,color=Treatment, shape=id.1))+
#     geom_point()+
#     facet_wrap(~gene, scales="free")+
#     theme_bw()
#   dev.off()
#