Gene expression data
require(Biobase)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
if ( !exists("OMPATH") ) {
if ( file.exists("Parameters.R") )
source( "Parameters.R" )
else if ( Sys.getenv("OMPATH") != "" )
OMPATH <- Sys.getenv("OMPATH")
else stop( "OMPATH must be set" )
}
Creating an ExpressionSet from Multiple Parts
#Container for high-throughput assays and experimental metadata. ExpressionSet class is derived from eSet, and requires a matrix named exprs as assayData member.
#An expression set is a data object consisting of three entities:
#1 expression matrix (`exprs`)
#2 the phenotye data (`pData`)
#3 feature data (`fData`)
Read in dataset
EXP <- read.table("data/ESet_exprs.xls", header= TRUE, as.is = TRUE)
PDAT <- read.table("data/ESet_pData.xls", header= TRUE, as.is = TRUE)
FDAT<- read.table("data/ESet_fData.xls", header= TRUE, as.is = TRUE)
ESET <- ExpressionSet(assayData=as.matrix(EXP),phenoData=AnnotatedDataFrame(PDAT),featureData = AnnotatedDataFrame(FDAT))
head(ESET@assayData$exprs)
## sample01 sample02 sample03 sample04 sample05 sample06
## gene001 0.5709374 0.4914444 9.0142833 0.4890737 0.9290841 0.5477738
## gene002 0.7943926 1.2928948 3.7151274 0.4710981 0.3107858 0.3702049
## gene003 4.7526783 0.7813814 0.7670947 0.3911991 0.5300689 2.7920750
## gene004 1.0730536 0.7064219 1.7214967 0.3490594 0.9715704 2.1192480
## gene005 1.1380175 0.3861156 0.6607763 0.6458684 1.9555979 0.2210942
## gene006 5.5570366 0.9559710 0.6211101 1.3926093 0.1919450 0.9092389
## sample07 sample08 sample09 sample10
## gene001 2.9271003 0.4827680 1.4280121 0.3627236
## gene002 0.9730236 0.2142863 0.5178808 0.4532489
## gene003 0.9672190 0.5000263 2.3518499 1.3493105
## gene004 0.2195736 1.1262003 3.1674797 5.1502843
## gene005 2.2042457 0.2554549 1.3182097 2.9583066
## gene006 0.8099893 1.8039572 1.1550050 0.5354930
gMedian <- apply(ESET@assayData$exprs, 1, median) #by row
sMedian <- apply(ESET@assayData$exprs, 2, median) #by col
gMedian<-as.vector(gMedian) #100
sMedian<-as.vector(sMedian) #10
#to homework 1_Eset in my folder
OUT <- list(ESET=ESET,
gMedian=gMedian,
sMedian=sMedian)
OUT
## $ESET
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 100 features, 10 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: sample01 sample02 ... sample10 (10 total)
## varLabels: sDescription1 sDescription2 sDescription3
## varMetadata: labelDescription
## featureData
## featureNames: gene001 gene002 ... gene100 (100 total)
## fvarLabels: gDescription1 gDescription2
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
##
## $gMedian
## [1] 0.5593556 0.4944894 0.8743002 1.0996270 0.8993969 0.9326049 0.6116994
## [8] 0.4783166 1.2079784 1.1013008 1.3813021 1.2586186 1.2995820 0.9314463
## [15] 1.0111228 2.1473925 0.9054838 0.5179156 0.9399881 0.7274695 0.6121366
## [22] 1.1145169 0.9422735 1.3177968 0.7301309 1.1729574 1.2257000 1.1151584
## [29] 0.9938840 0.9692458 2.2991819 0.6735898 1.1406925 1.2501420 0.9672506
## [36] 0.9453710 0.8615181 1.1858280 0.9926621 0.8600166 0.6300623 1.0618184
## [43] 1.5525065 0.9382182 0.8927044 1.0417579 0.5969019 1.3070250 2.1395198
## [50] 0.8776800 1.9765331 0.9016377 1.4697224 1.1811412 1.5220523 0.6511810
## [57] 1.6324440 0.8925207 1.0122151 0.9752057 1.9531188 1.4684316 1.1089660
## [64] 1.0099235 0.7542320 1.3510494 1.5125574 0.8273333 0.9963571 2.0968497
## [71] 0.9312484 0.6956434 0.7976390 0.6819794 1.2090997 0.9018078 1.0172295
## [78] 1.0761175 1.3254350 0.8333492 1.0279081 1.4194976 0.7838568 0.6815782
## [85] 0.9637676 0.9822416 1.5212472 0.8257384 0.7235211 0.9331107 1.0758978
## [92] 1.6612200 2.3693938 0.5475130 1.1054799 0.9914648 1.3735738 0.6715934
## [99] 1.1406047 1.0275897
##
## $sMedian
## [1] 1.0637438 0.7978973 1.0366329 0.9965556 1.1794886 0.9165416 0.8986762
## [8] 1.2569936 1.1097286 0.9765686
saveRDS(OUT,file=file.path("homework_Hsu","homework1_ESet.RDS"))
Bayes Thereom
#This example is taken from Spiegelhalter et al. (2004). Suppose a new HIV test is claimed to have "95% sensitivity and 98% specificity." In a population with an HIV prevalence of 1/1000, what is the probability that a patient testing positive actually has HIV? Report your answer as follows
ANS<-(0.95*0.001)/((0.95*0.001)+(0.02*(1-0.001)))
saveRDS(ANS,file=file.path("homework_Hsu", "homework1_Bayes.RDS"))