Dependencies: This document depends on the following packages:

library(devtools)
library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colnames,
##     do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff,
##     sort, table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-17. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
library(broom)

module 2, quiz question #1

Download the data

con = url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/montpick_eset.RData")
load(file = con)
close(con)
mp = montpick.eset
pdata = pData(mp) # Phenotype data set
edata = as.data.frame(exprs(mp)) # Experimental data set
fdata = fData(mp) # Feature data set
ls()
## [1] "con"           "edata"         "fdata"         "montpick.eset"
## [5] "mp"            "pdata"
dim(edata)
## [1] 52580   129
edata
summary(edata)
hist(log2(edata[,1] + 1),
     breaks = 100,
     col = 2,
     xlim = c(0, 15),
     ylim = c(0, 300))

Calculate singular vectors & percent variance explained

Here we calculate the singular vectors for the three conditions. a) NO transformations? b) log2(data + 1) transform? c) log2(data + 1) transform and subtract row means?

svd_a = svd(edata)
svd_b = svd(log2(edata + 1))

edata = log2(edata + 1)
edata_centered = edata - rowMeans(edata)

svd_c = svd(edata_centered)
names(svd_a)
## [1] "d" "u" "v"

Now, look at the percent variance explained

The percent of variance explained is given by the formula = d_ii / {∑_j d_jj^2}

plot(svd1\(d^2/sum(svd1\)d^2),ylab=“Percent Variance Explained”,col=2)

par(pch = 19)
par(mfrow=c(1,3))

# NO Transformations
plot(svd_a$d^2 / sum(svd_a$d^2),
     main = "%Var w NO Transform",
     ylim = c(0, 1),
     ylab = "Percent Variance Explained",
     col = 2)
# Log2 Transformation
plot(svd_b$d^2 / sum(svd_b$d^2),
     main = "%Var w Log2 Transform",
     ylim = c(0, 1),
     ylab = "Percent Variance Explained",
     col = 2)

# Log2 Transformation and Subtract rowMeans
plot(svd_c$d^2 / sum(svd_c$d^2),
     main = "%Var w Log2 & Subtract rowMeans",
     ylim = c(0, 1),
     ylab = "Percent Variance Explained",
     col = 2)

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] broom_0.4.2         sva_3.22.0          genefilter_1.56.0  
## [4] mgcv_1.8-17         nlme_3.1-131        Biobase_2.34.0     
## [7] BiocGenerics_0.20.0 devtools_1.13.2    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.11         plyr_1.8.4           bitops_1.0-6        
##  [4] tools_3.3.1          digest_0.6.12        tibble_1.3.3        
##  [7] annotate_1.52.1      evaluate_0.10        memoise_1.1.0       
## [10] RSQLite_1.1-2        lattice_0.20-35      rlang_0.1.1         
## [13] psych_1.7.5          Matrix_1.2-8         DBI_0.6-1           
## [16] yaml_2.1.14          withr_1.0.2          stringr_1.2.0       
## [19] dplyr_0.5.0          knitr_1.16           S4Vectors_0.12.1    
## [22] IRanges_2.8.1        stats4_3.3.1         rprojroot_1.2       
## [25] grid_3.3.1           R6_2.2.1             AnnotationDbi_1.36.2
## [28] XML_3.98-1.7         survival_2.41-2      foreign_0.8-68      
## [31] rmarkdown_1.5        tidyr_0.6.3          reshape2_1.4.2      
## [34] magrittr_1.5         backports_1.1.0      htmltools_0.3.6     
## [37] splines_3.3.1        mnormt_1.5-5         assertthat_0.2.0    
## [40] xtable_1.8-2         stringi_1.1.5        RCurl_1.95-4.8

EOF