This document depends on the following packages:
library(devtools)
library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colnames,
## do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff,
## sort, table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(broom)
module 2, quiz question #5
con = url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/montpick_eset.RData")
load(file = con)
close(con)
mp = montpick.eset
pdata = pData(mp)
edata = as.data.frame(exprs(mp))
fdata = fData(mp)
ls()
## [1] "con" "edata" "fdata" "montpick.eset"
## [5] "mp" "pdata"
Perform the log2(data + 1) transform. Then fit a regression model to each sample using population as the outcome. Do this using the lm.fit function (hint: don’t forget the intercept). What is the dimension of the residual matrix, the effects matrix and the coefficients matrix?
A. Residual matrix: 52580 x 129 Effects matrix: 52580 x129 Coefficients matrix: 2 x 52580
B. Residual matrix: 129 x 52580 Effects matrix: 129 x 52580 Coefficients matrix: 2 x 52580
C. Residual matrix: 52580 x 129 Effects matrix: 129 x 52580 Coefficients matrix: 2 x 52580
D. Residual matrix: 52580 x 129 Effects matrix: 52580 x129 Coefficients matrix: 52580 x 2
names(pdata)
## [1] "sample.id" "num.tech.reps" "population" "study"
edata = as.matrix(edata)
edata = log2(edata + 1)
mod = model.matrix(~ pdata$population)
fit = lm.fit(mod, t(edata))
names(fit)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
fit$coefficients[,1]
## (Intercept) pdata$populationYRI
## 0.08333333 0.00608587
tidy(lm(as.numeric(edata[1,]) ~ pdata$population))
## term estimate std.error statistic p.value
## 1 (Intercept) 0.08333333 0.04470046 1.86426110 0.06459378
## 2 pdata$populationYRI 0.00608587 0.06111986 0.09957271 0.92084057
par(mfrow = c(1, 2))
hist(fit$coefficients[1, ], breaks = 10, col = 2, ylim = c(0, 50000), xlab = "Intercept")
hist(fit$coefficients[2, ], breaks = 10, col = 2, ylim = c(0, 50000), xlab = "PopulationYRI")
abline(v = 0, lwd = 2, col = 1)
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] broom_0.4.2 Biobase_2.34.0 BiocGenerics_0.20.0
## [4] devtools_1.13.2
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.11 knitr_1.16 magrittr_1.5 mnormt_1.5-5
## [5] lattice_0.20-35 R6_2.2.1 rlang_0.1.1 stringr_1.2.0
## [9] plyr_1.8.4 dplyr_0.5.0 tools_3.3.1 grid_3.3.1
## [13] nlme_3.1-131 psych_1.7.5 DBI_0.6-1 withr_1.0.2
## [17] htmltools_0.3.6 yaml_2.1.14 rprojroot_1.2 digest_0.6.12
## [21] assertthat_0.2.0 tibble_1.3.3 tidyr_0.6.3 reshape2_1.4.2
## [25] memoise_1.1.0 evaluate_0.10 rmarkdown_1.5 stringi_1.1.5
## [29] backports_1.1.0 foreign_0.8-68