Gene expression data

require(Biobase)

## Loading required package: Biobase

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

if ( !exists("OMPATH") ) {
    if ( file.exists("Parameters.R") )
        source( "Parameters.R" )
    else if ( Sys.getenv("OMPATH") != "" )
        OMPATH <- Sys.getenv("OMPATH")
    else stop( "OMPATH must be set" )
}

Creating an ExpressionSet from Multiple Parts

#Container for high-throughput assays and experimental metadata. ExpressionSet class is derived from eSet, and requires a matrix named exprs as assayData member.

#An expression set is a data object consisting of three entities: 
#1 expression matrix (`exprs`)
#2 the phenotye data (`pData`)
#3 feature data (`fData`)

Read in dataset

EXP <- read.table("data/ESet_exprs.xls", header= TRUE, as.is = TRUE)
PDAT <- read.table("data/ESet_pData.xls", header= TRUE, as.is = TRUE)
FDAT<- read.table("data/ESet_fData.xls", header= TRUE, as.is = TRUE)

ESET <- ExpressionSet(assayData=as.matrix(EXP),phenoData=AnnotatedDataFrame(PDAT),featureData = AnnotatedDataFrame(FDAT))
  head(ESET@assayData$exprs)

##          sample01  sample02  sample03  sample04  sample05  sample06
## gene001 0.5709374 0.4914444 9.0142833 0.4890737 0.9290841 0.5477738
## gene002 0.7943926 1.2928948 3.7151274 0.4710981 0.3107858 0.3702049
## gene003 4.7526783 0.7813814 0.7670947 0.3911991 0.5300689 2.7920750
## gene004 1.0730536 0.7064219 1.7214967 0.3490594 0.9715704 2.1192480
## gene005 1.1380175 0.3861156 0.6607763 0.6458684 1.9555979 0.2210942
## gene006 5.5570366 0.9559710 0.6211101 1.3926093 0.1919450 0.9092389
##          sample07  sample08  sample09  sample10
## gene001 2.9271003 0.4827680 1.4280121 0.3627236
## gene002 0.9730236 0.2142863 0.5178808 0.4532489
## gene003 0.9672190 0.5000263 2.3518499 1.3493105
## gene004 0.2195736 1.1262003 3.1674797 5.1502843
## gene005 2.2042457 0.2554549 1.3182097 2.9583066
## gene006 0.8099893 1.8039572 1.1550050 0.5354930

gMedian <- apply(ESET@assayData$exprs, 1, median) #by row
sMedian <- apply(ESET@assayData$exprs, 2, median) #by col

gMedian<-as.vector(gMedian) #100
sMedian<-as.vector(sMedian) #10


#to homework 1_Eset in my folder
OUT <- list(ESET=ESET,
            gMedian=gMedian,
            sMedian=sMedian)
OUT

## $ESET
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 100 features, 10 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: sample01 sample02 ... sample10 (10 total)
##   varLabels: sDescription1 sDescription2 sDescription3
##   varMetadata: labelDescription
## featureData
##   featureNames: gene001 gene002 ... gene100 (100 total)
##   fvarLabels: gDescription1 gDescription2
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:  
## 
## $gMedian
##   [1] 0.5593556 0.4944894 0.8743002 1.0996270 0.8993969 0.9326049 0.6116994
##   [8] 0.4783166 1.2079784 1.1013008 1.3813021 1.2586186 1.2995820 0.9314463
##  [15] 1.0111228 2.1473925 0.9054838 0.5179156 0.9399881 0.7274695 0.6121366
##  [22] 1.1145169 0.9422735 1.3177968 0.7301309 1.1729574 1.2257000 1.1151584
##  [29] 0.9938840 0.9692458 2.2991819 0.6735898 1.1406925 1.2501420 0.9672506
##  [36] 0.9453710 0.8615181 1.1858280 0.9926621 0.8600166 0.6300623 1.0618184
##  [43] 1.5525065 0.9382182 0.8927044 1.0417579 0.5969019 1.3070250 2.1395198
##  [50] 0.8776800 1.9765331 0.9016377 1.4697224 1.1811412 1.5220523 0.6511810
##  [57] 1.6324440 0.8925207 1.0122151 0.9752057 1.9531188 1.4684316 1.1089660
##  [64] 1.0099235 0.7542320 1.3510494 1.5125574 0.8273333 0.9963571 2.0968497
##  [71] 0.9312484 0.6956434 0.7976390 0.6819794 1.2090997 0.9018078 1.0172295
##  [78] 1.0761175 1.3254350 0.8333492 1.0279081 1.4194976 0.7838568 0.6815782
##  [85] 0.9637676 0.9822416 1.5212472 0.8257384 0.7235211 0.9331107 1.0758978
##  [92] 1.6612200 2.3693938 0.5475130 1.1054799 0.9914648 1.3735738 0.6715934
##  [99] 1.1406047 1.0275897
## 
## $sMedian
##  [1] 1.0637438 0.7978973 1.0366329 0.9965556 1.1794886 0.9165416 0.8986762
##  [8] 1.2569936 1.1097286 0.9765686

saveRDS(OUT,file=file.path("homework_Hsu","homework1_ESet.RDS"))

Bayes Thereom

#This example is taken from Spiegelhalter et al. (2004). Suppose a new HIV test is claimed to have "95% sensitivity and 98% specificity." In a population with an HIV prevalence of 1/1000, what is the probability that a patient testing positive actually has HIV? Report your answer as follows 

ANS<-(0.95*0.001)/((0.95*0.001)+(0.02*(1-0.001)))
saveRDS(ANS,file=file.path("homework_Hsu", "homework1_Bayes.RDS"))

BS831HW1Hsu - Gene expression

Michelle C. Hsu

April 1, 2019

Gene expression data

Creating an ExpressionSet from Multiple Parts

Read in dataset

Bayes Thereom