#!/usr/bin/env Rscript

# Tophat/cufflinks lung data DE analysis
# Author AEM
# REF https://bioconductor.org/packages/release/bioc/vignettes/cummeRbund/inst/doc/cummeRbund-example-workflow.pdf 
#     https://github.com/trinityrnaseq/RNASeq_Trinity_Tuxedo_Workshop/wiki/Tuxedo-Genome-Guided-Transcriptome-Assembly-Workshop
#     http://genomicsclass.github.io/book/pages/rnaseq_isoform_cummerbund.html
#     http://overviewdifferentialexpression.blogspot.com/2014/03/analysis-of-results-cuffset-instance.html

# Install packages & set library
#source("https://bioconductor.org/biocLite.R") 
#biocLite("cummeRbund", dependencies = TRUE) 
library(cummeRbund)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: RSQLite
## Loading required package: ggplot2
## Loading required package: reshape2
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## Loading required package: rtracklayer
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Gviz
## Loading required package: grid
## 
## Attaching package: 'cummeRbund'
## The following object is masked from 'package:GenomicRanges':
## 
##     promoters
## The following object is masked from 'package:IRanges':
## 
##     promoters
## The following object is masked from 'package:BiocGenerics':
## 
##     conditions
# Read-in cuffdiff data
setwd("/mnt/bd2/tuxedo/cuffdiff_results")
TC_genes = readCufflinks()


# QA check: no overdisperion appears
d = dispersionPlot(genes(TC_genes))
d
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis

# Outlier replicates
pBoxRep = csBoxplot(genes(TC_genes),replicates=T) 
pBoxRep

pDendro = csDendro(genes(TC_genes),replicates=T)

pDendro
## 'dendrogram' with 2 branches and 38 members total, at height 0.3498898
# Distribution of expression
csDensity(genes(TC_genes))
## Warning: Removed 11751 rows containing non-finite values (stat_density).

# Expression as scatterplot
csScatter(genes(TC_genes), 'R', 'DF')

# Relationship between conditions
pBox = csBoxplot(genes(TC_genes))
pBox

# Find DE genes
sigGeneIds = getSig(TC_genes,alpha=0.05,level="genes") 
length(sigGeneIds)
## [1] 590
# Sig gene list
sigGenes = getGenes(TC_genes,sigGeneIds)
## Warning: Closing open result set, pending rows
## Warning: Closing open result set, pending rows
sigGenes
## CuffGeneSet instance for  590  genes
##  
## Slots:
##   annotation
##   fpkm
##   repFpkm
##   diff
##   count
##   isoforms    CuffFeatureSet instance of size 2941 
##   TSS         CuffFeatureSet instance of size 1343 
##   CDS         CuffFeatureSet instance of size 728 
##   promoters       CuffFeatureSet instance of size 590 
##   splicing        CuffFeatureSet instance of size 1343 
##   relCDS      CuffFeatureSet instance of size 590
# Find DE features (iso)
sigFeatureIds = getSig(TC_genes,alpha=0.05,level="isoforms")
sigFeatureIds
##   [1] "TCONS_00004754" "TCONS_00004760" "TCONS_00004791" "TCONS_00004794"
##   [5] "TCONS_00004894" "TCONS_00012749" "TCONS_00012764" "TCONS_00012767"
##   [9] "TCONS_00012773" "TCONS_00012787" "TCONS_00016119" "TCONS_00016367"
##  [13] "TCONS_00016737" "TCONS_00016905" "TCONS_00016916" "TCONS_00017004"
##  [17] "TCONS_00017043" "TCONS_00017075" "TCONS_00017096" "TCONS_00017513"
##  [21] "TCONS_00020897" "TCONS_00024344" "TCONS_00024392" "TCONS_00024443"
##  [25] "TCONS_00024660" "TCONS_00024668" "TCONS_00024801" "TCONS_00024810"
##  [29] "TCONS_00025073" "TCONS_00026169" "TCONS_00026618" "TCONS_00027297"
##  [33] "TCONS_00030029" "TCONS_00030033" "TCONS_00030035" "TCONS_00031789"
##  [37] "TCONS_00032046" "TCONS_00033978" "TCONS_00034198" "TCONS_00034410"
##  [41] "TCONS_00035413" "TCONS_00039639" "TCONS_00040671" "TCONS_00040690"
##  [45] "TCONS_00043221" "TCONS_00043398" "TCONS_00043452" "TCONS_00043642"
##  [49] "TCONS_00043695" "TCONS_00043848" "TCONS_00045284" "TCONS_00046718"
##  [53] "TCONS_00047322" "TCONS_00052992" "TCONS_00053094" "TCONS_00053580"
##  [57] "TCONS_00055096" "TCONS_00058445" "TCONS_00059518" "TCONS_00059721"
##  [61] "TCONS_00061612" "TCONS_00061758" "TCONS_00062452" "TCONS_00063521"
##  [65] "TCONS_00064767" "TCONS_00065331" "TCONS_00065827" "TCONS_00066378"
##  [69] "TCONS_00066580" "TCONS_00066660" "TCONS_00068134" "TCONS_00069139"
##  [73] "TCONS_00070023" "TCONS_00072534" "TCONS_00073126" "TCONS_00073486"
##  [77] "TCONS_00074126" "TCONS_00074760" "TCONS_00074984" "TCONS_00075611"
##  [81] "TCONS_00075617" "TCONS_00075747" "TCONS_00076178" "TCONS_00076220"
##  [85] "TCONS_00077028" "TCONS_00079108" "TCONS_00079427" "TCONS_00080691"
##  [89] "TCONS_00081329" "TCONS_00083613" "TCONS_00083620" "TCONS_00084810"
##  [93] "TCONS_00086448" "TCONS_00086518" "TCONS_00086798" "TCONS_00088040"
##  [97] "TCONS_00088041" "TCONS_00088049" "TCONS_00088055" "TCONS_00088060"
## [101] "TCONS_00088838" "TCONS_00088891" "TCONS_00088921" "TCONS_00089180"
## [105] "TCONS_00090482" "TCONS_00093935" "TCONS_00093942" "TCONS_00094154"
## [109] "TCONS_00094260" "TCONS_00097873" "TCONS_00099864" "TCONS_00100542"
## [113] "TCONS_00100937" "TCONS_00100940" "TCONS_00100942" "TCONS_00101068"
## [117] "TCONS_00101436" "TCONS_00101804" "TCONS_00101972" "TCONS_00102164"
## [121] "TCONS_00102231" "TCONS_00102238" "TCONS_00102847" "TCONS_00103158"
## [125] "TCONS_00103159" "TCONS_00103160" "TCONS_00103753" "TCONS_00104524"
## [129] "TCONS_00106130" "TCONS_00106896" "TCONS_00109352" "TCONS_00113178"
## [133] "TCONS_00113195" "TCONS_00113475" "TCONS_00117978" "TCONS_00120872"
## [137] "TCONS_00122082" "TCONS_00123305" "TCONS_00123488" "TCONS_00123879"
## [141] "TCONS_00125042" "TCONS_00125043" "TCONS_00125045" "TCONS_00125339"
## [145] "TCONS_00129937" "TCONS_00130509" "TCONS_00130527" "TCONS_00130901"
## [149] "TCONS_00130906" "TCONS_00131325" "TCONS_00131340" "TCONS_00131542"
## [153] "TCONS_00133545" "TCONS_00135005" "TCONS_00135030" "TCONS_00139042"
## [157] "TCONS_00139203" "TCONS_00139470" "TCONS_00139843" "TCONS_00139863"
## [161] "TCONS_00139875" "TCONS_00139927" "TCONS_00140689" "TCONS_00140690"
## [165] "TCONS_00140694" "TCONS_00142444" "TCONS_00143480" "TCONS_00148913"
## [169] "TCONS_00149115" "TCONS_00149201" "TCONS_00149202" "TCONS_00149258"
## [173] "TCONS_00149348" "TCONS_00149373" "TCONS_00149591" "TCONS_00149592"
## [177] "TCONS_00149633" "TCONS_00149683" "TCONS_00149789" "TCONS_00155597"
## [181] "TCONS_00158861" "TCONS_00159179" "TCONS_00160783" "TCONS_00160806"
## [185] "TCONS_00161381" "TCONS_00161506" "TCONS_00165758" "TCONS_00166834"
## [189] "TCONS_00167535" "TCONS_00167999" "TCONS_00168207" "TCONS_00168300"
## [193] "TCONS_00174886" "TCONS_00174887" "TCONS_00174972" "TCONS_00175066"
## [197] "TCONS_00175336" "TCONS_00175459" "TCONS_00178583" "TCONS_00178674"
## [201] "TCONS_00179366" "TCONS_00180911" "TCONS_00181333" "TCONS_00181431"
## [205] "TCONS_00181476" "TCONS_00181545" "TCONS_00181547" "TCONS_00181752"
## [209] "TCONS_00181771"
length(sigFeatureIds)
## [1] 209
# Scatterplot matrix of gene and isoform level expression
csScatterMatrix(genes(TC_genes))

csScatterMatrix(isoforms(TC_genes))

# MA plot comparing two conditions
MAplot(genes(TC_genes),"DF","R")
## Warning: Removed 3932 rows containing missing values (geom_point).

MAplot(isoforms(TC_genes),"DF","R")
## Warning: Removed 7402 rows containing missing values (geom_point).

# Volcano plot: These look kid of odd b/c the fpkm's are
# so close to 0, not sure if we we want to cutoff outliers?
csVolcanoMatrix(genes(TC_genes))

csVolcanoMatrix(isoforms(TC_genes))

# Heatmap
gene_diff_data = diffData(genes(TC_genes))
sig_gene_data = subset(gene_diff_data,(significant=='yes' | p_value < 0.01))
sig_genes = getGenes(TC_genes, sig_gene_data$gene_id)
## Warning: Closing open result set, pending rows

## Warning: Closing open result set, pending rows
csHeatmap(sig_genes, cluster='both')
## Using tracking_id, sample_name as id variables
## No id variables; using all as measure variables