Part 1 - set up
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("Glimma")
#BiocManager::install("limma")
#BiocManager::install("edgeR")
#BiocManager::install("Homo.sapiens")
#BiocManager::install("stringr")
library(limma)
library(Glimma)
library(edgeR)
library(Homo.sapiens)#for human genes
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: OrganismDbi
## Loading required package: GenomicFeatures
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GO.db
##
## Loading required package: org.Hs.eg.db
##
## Loading required package: TxDb.Hsapiens.UCSC.hg19.knownGene
library(stringr)
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final')
Part 2 - Unzip
# unzip whitemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteMale')
dir.create("SampleFiles")
## Warning in dir.create("SampleFiles"): 'SampleFiles'å·²å˜åœ¨
filepath <- dir(path ="./gdc_download_20201111_071754.200344",full.names = TRUE)
for(wd in filepath){
files <-dir(path = wd,pattern="gz$") #
fromfilepath <- paste(wd,"/",files,sep ="")
tofilepath <- paste("./SampleFiles/",files,sep ="")
file.copy(fromfilepath,tofilepath)
}
# unzip whitefemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteFemale')
dir.create("SampleFiles2")
## Warning in dir.create("SampleFiles2"): 'SampleFiles2'å·²å˜åœ¨
filepath <- dir(path ="./gdc_download_20201111_071940.367741",full.names = TRUE)
for(wd in filepath){
files <-dir(path = wd,pattern="gz$")
fromfilepath <- paste(wd,"/",files,sep ="")
tofilepath <- paste("./SampleFiles2/",files,sep ="")
file.copy(fromfilepath,tofilepath)
}
# unzip asianfemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/AsianFemale')
dir.create("SampleFiles3")
## Warning in dir.create("SampleFiles3"): 'SampleFiles3'å·²å˜åœ¨
filepath <- dir(path ="./gdc_download_20201111_071444.004912",full.names = TRUE)
for(wd in filepath){
files <-dir(path = wd,pattern="gz$")
fromfilepath <- paste(wd,"/",files,sep ="")
tofilepath <- paste("./SampleFiles3/",files,sep ="")
file.copy(fromfilepath,tofilepath)
}
# unzip asianmale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/AsianMale')
dir.create("SampleFiles4")
## Warning in dir.create("SampleFiles4"): 'SampleFiles4'å·²å˜åœ¨
filepath <- dir(path ="./gdc_download_20201111_071307.660905",full.names = TRUE)
for(wd in filepath){
files <-dir(path = wd,pattern="gz$")
fromfilepath <- paste(wd,"/",files,sep ="")
tofilepath <- paste("./SampleFiles4/",files,sep ="")
file.copy(fromfilepath,tofilepath)
}
# Check Files and manuanlly unzip desired data
Part 3 - Rename and transfer files to working directory
#tarDir <- "/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups"
#dir.create(tarDir)
#setwd("/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups")
#old_files1 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteMale/SampleFiles", pattern = ".htseq.counts", full.names = TRUE)
#new_files1 <- c("WM01.htseq.counts",
#"WM02.htseq.counts",
#"WM03.htseq.counts",
#"WM04.htseq.counts",
#"WM05.htseq.counts",
#"WM06.htseq.counts",
#"WM07.htseq.counts",
#"WM08.htseq.counts",
#"WM09.htseq.counts",
#"WM10.htseq.counts",
#"WM11.htseq.counts",
#"WM12.htseq.counts",
#"WM13.htseq.counts",
#"WM14.htseq.counts",
#"WM15.htseq.counts",
#"WM16.htseq.counts",
#"WM17.htseq.counts",
#"WM18.htseq.counts",
#"WM19.htseq.counts",
#"WM20.htseq.counts",
#"WM21.htseq.counts",
#"WM22.htseq.counts",
#"WM23.htseq.counts",
#"WM24.htseq.counts",
#"WM25.htseq.counts",
#"WM26.htseq.counts",
#"WM27.htseq.counts",
#"WM28.htseq.counts",
#"WM29.htseq.counts",
#"WM30.htseq.counts")
#file.copy(from = old_files1, to = new_files1)
#old_files2 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteFemale/SampleFiles2", pattern = ".htseq.counts", full.names = TRUE)
#new_files2 <- c("WF01.htseq.counts",
#"WF02.htseq.counts",
#"WF03.htseq.counts",
#"WF04.htseq.counts")
#file.copy(from = old_files2, to = new_files2)
#old_files3 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/AsianFemale/SampleFiles3", pattern = ".htseq.counts", full.names = TRUE)
#new_files3 <- c("AF01.htseq.counts",
#"AF02.htseq.counts",
#"AF03.htseq.counts",
#"AF04.htseq.counts")
#file.copy(from = old_files3, to = new_files3)
#old_files4 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/AsianMale/SampleFiles4", pattern = ".htseq.counts", full.names = TRUE)
#new_files4 <- c("AM01.htseq.counts",
#"AM02.htseq.counts",
#"AM03.htseq.counts",
#"AM04.htseq.counts",
#"AM05.htseq.counts",
#"AM06.htseq.counts",
#"AM07.htseq.counts",
#"AM08.htseq.counts",
#"AM09.htseq.counts",
#"AM10.htseq.counts",
#"AM11.htseq.counts",
#"AM12.htseq.counts",
#"AM13.htseq.counts",
#"AM14.htseq.counts",
#"AM15.htseq.counts",
#"AM16.htseq.counts",
#"AM17.htseq.counts",
#"AM18.htseq.counts",
#"AM19.htseq.counts",
#"AM20.htseq.counts",
#"AM21.htseq.counts",
#"AM22.htseq.counts",
#"AM23.htseq.counts",
#"AM24.htseq.counts",
#"AM25.htseq.counts",
#"AM26.htseq.counts",
#"AM27.htseq.counts",
#"AM28.htseq.counts",
#"AM29.htseq.counts",
#"AM30.htseq.counts")
#file.copy(from = old_files4, to = new_files4)
Part 4 - Reading in counts data via readDGE
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups')
files <- c("WM01.htseq.counts",
"WM02.htseq.counts",
"WM03.htseq.counts",
"WM04.htseq.counts",
"WM05.htseq.counts",
"WM06.htseq.counts",
"WM07.htseq.counts",
"WM08.htseq.counts",
"WM09.htseq.counts",
"WM10.htseq.counts",
"WM11.htseq.counts",
"WM12.htseq.counts",
"WM13.htseq.counts",
"WM14.htseq.counts",
"WM15.htseq.counts",
"WM16.htseq.counts",
"WM17.htseq.counts",
"WM18.htseq.counts",
"WM19.htseq.counts",
"WM20.htseq.counts",
"WM21.htseq.counts",
"WM22.htseq.counts",
"WM23.htseq.counts",
"WM24.htseq.counts",
"WM25.htseq.counts",
"WM26.htseq.counts",
"WM27.htseq.counts",
"WM28.htseq.counts",
"WM29.htseq.counts",
"WM30.htseq.counts","WF01.htseq.counts",
"WF02.htseq.counts",
"WF03.htseq.counts",
"WF04.htseq.counts","AF01.htseq.counts",
"AF02.htseq.counts",
"AF03.htseq.counts",
"AF04.htseq.counts",
"AM01.htseq.counts",
"AM02.htseq.counts",
"AM03.htseq.counts",
"AM04.htseq.counts",
"AM05.htseq.counts",
"AM06.htseq.counts",
"AM07.htseq.counts",
"AM08.htseq.counts",
"AM09.htseq.counts",
"AM10.htseq.counts",
"AM11.htseq.counts",
"AM12.htseq.counts",
"AM13.htseq.counts",
"AM14.htseq.counts",
"AM15.htseq.counts",
"AM16.htseq.counts",
"AM17.htseq.counts",
"AM18.htseq.counts",
"AM19.htseq.counts",
"AM20.htseq.counts",
"AM21.htseq.counts",
"AM22.htseq.counts",
"AM23.htseq.counts",
"AM24.htseq.counts",
"AM25.htseq.counts",
"AM26.htseq.counts",
"AM27.htseq.counts",
"AM28.htseq.counts",
"AM29.htseq.counts",
"AM30.htseq.counts")
#for(i in paste(files, ".gz", sep=""))
# R.utils::gunzip(i, overwrite=TRUE)
x <- readDGE(files, columns=c(1,2))
## Meta tags detected: __no_feature, __ambiguous, __too_low_aQual, __not_aligned, __alignment_not_unique
Part 5 - Sorting Data
# strip the suffix
samplenames <- substring(colnames(x), 1, 4)
samplenames
## [1] "WM01" "WM02" "WM03" "WM04" "WM05" "WM06" "WM07" "WM08" "WM09" "WM10"
## [11] "WM11" "WM12" "WM13" "WM14" "WM15" "WM16" "WM17" "WM18" "WM19" "WM20"
## [21] "WM21" "WM22" "WM23" "WM24" "WM25" "WM26" "WM27" "WM28" "WM29" "WM30"
## [31] "WF01" "WF02" "WF03" "WF04" "AF01" "AF02" "AF03" "AF04" "AM01" "AM02"
## [41] "AM03" "AM04" "AM05" "AM06" "AM07" "AM08" "AM09" "AM10" "AM11" "AM12"
## [51] "AM13" "AM14" "AM15" "AM16" "AM17" "AM18" "AM19" "AM20" "AM21" "AM22"
## [61] "AM23" "AM24" "AM25" "AM26" "AM27" "AM28" "AM29" "AM30"
colnames(x) <- samplenames
# Group by Race
group <- as.factor(rep(c("White","Asian"),c(34,34)))
x$samples$group <- group
# Group by gender
gender <- as.factor(rep(c("Male","Female","Male"),c(30,8,30)))
x$samples$gender<-gender
# Look for the geneid corresponding gene-symbols and locations of chromosome
geneid <- rownames(x)
#geneid
# Remove the decimal point at ENSEMBL
gene <- str_match(geneid, "(\\w*).*")
geneid <- gene[,2]
# go through Homo pacakge and get symbol and chromosome location
genes <- select(Homo.sapiens,
keys=geneid,
columns=c("SYMBOL","TXCHROM"),
keytype="ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
head(genes)
## ENSEMBL SYMBOL TXCHROM
## 1 ENSG00000000005 TNMD chrX
## 2 ENSG00000000419 DPM1 chr20
## 3 ENSG00000000457 SCYL3 chr1
## 4 ENSG00000000460 C1orf112 chr1
## 5 ENSG00000000938 FGR chr1
## 6 ENSG00000000971 CFH chr1
# remove the overlapped genes
genes <- genes[!duplicated(genes$ENSEMBL),]
# import genes to DGEList
x$genes <- genes
#x
Part 6 - Data preprocessing
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)
# Get the average and median of the sample library size
L <- mean(x$samples$lib.size) * 1e-6
M <- median(x$samples$lib.size) * 1e-6
c(L, M)
## [1] 80.27711 75.63755
# see the summary
summary(lcpm)
## WM01 WM02 WM03 WM04
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-4.6226 Median :-4.5340 Median :-4.5704
## Mean :-2.6314 Mean :-2.5396 Mean :-2.3890 Mean :-2.4082
## 3rd Qu.:-0.4149 3rd Qu.:-0.4529 3rd Qu.:-0.2724 3rd Qu.:-0.1645
## Max. :17.7454 Max. :17.9583 Max. :17.4635 Max. :17.8388
## WM05 WM06 WM07 WM08
## Min. :-5.32692 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.32692 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-4.80685 Median :-4.7634 Median :-4.6601 Median :-4.8768
## Mean :-2.41955 Mean :-2.6985 Mean :-2.4729 Mean :-2.6704
## 3rd Qu.:-0.06668 3rd Qu.:-0.9969 3rd Qu.:-0.2688 3rd Qu.:-0.9503
## Max. :18.52531 Max. :17.9985 Max. :17.8434 Max. :18.1149
## WM09 WM10 WM11 WM12
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-4.7118 Median :-5.3269 Median :-5.3269
## Mean :-2.5955 Mean :-2.5468 Mean :-2.6742 Mean :-2.6826
## 3rd Qu.:-0.6431 3rd Qu.:-0.4083 3rd Qu.:-0.7364 3rd Qu.:-0.9285
## Max. :17.5749 Max. :17.5749 Max. :17.9109 Max. :17.8394
## WM13 WM14 WM15 WM16
## Min. :-5.327 Min. :-5.32692 Min. :-5.3269 Min. :-5.327
## 1st Qu.:-5.327 1st Qu.:-5.32692 1st Qu.:-5.3269 1st Qu.:-5.327
## Median :-4.655 Median :-4.63508 Median :-5.3269 Median :-5.327
## Mean :-2.560 Mean :-2.43548 Mean :-2.5495 Mean :-2.655
## 3rd Qu.:-0.446 3rd Qu.:-0.01343 3rd Qu.:-0.4172 3rd Qu.:-0.967
## Max. :17.770 Max. :17.94493 Max. :17.5603 Max. :17.344
## WM17 WM18 WM19 WM20
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-4.6507 Median :-4.7548 Median :-4.8064 Median :-4.6185
## Mean :-2.5070 Mean :-2.4360 Mean :-2.5228 Mean :-2.2647
## 3rd Qu.:-0.5244 3rd Qu.:-0.1169 3rd Qu.:-0.5684 3rd Qu.: 0.3118
## Max. :17.8456 Max. :18.1089 Max. :17.7854 Max. :17.7193
## WM21 WM22 WM23 WM24
## Min. :-5.3269 Min. :-5.3269 Min. :-5.327 Min. :-5.327
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.327 1st Qu.:-5.327
## Median :-4.6693 Median :-4.9014 Median :-5.327 Median :-4.714
## Mean :-2.4430 Mean :-2.5493 Mean :-2.802 Mean :-2.456
## 3rd Qu.:-0.1947 3rd Qu.:-0.6824 3rd Qu.:-1.163 3rd Qu.:-0.202
## Max. :17.8367 Max. :17.6007 Max. :18.095 Max. :17.655
## WM25 WM26 WM27 WM28
## Min. :-5.3269 Min. :-5.327 Min. :-5.3269 Min. :-5.327
## 1st Qu.:-5.3269 1st Qu.:-5.327 1st Qu.:-5.3269 1st Qu.:-5.327
## Median :-5.3269 Median :-4.818 Median :-4.5844 Median :-4.591
## Mean :-2.4899 Mean :-2.672 Mean :-2.5616 Mean :-2.942
## 3rd Qu.:-0.1856 3rd Qu.:-1.003 3rd Qu.:-0.6317 3rd Qu.:-1.467
## Max. :17.3813 Max. :17.897 Max. :18.2407 Max. :19.398
## WM29 WM30 WF01 WF02
## Min. :-5.326917 Min. :-5.327 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.326917 1st Qu.:-5.327 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-4.805374 Median :-4.803 Median :-4.8442 Median :-5.3269
## Mean :-2.351516 Mean :-2.692 Mean :-2.5755 Mean :-2.5922
## 3rd Qu.: 0.001993 3rd Qu.:-1.048 3rd Qu.:-0.4983 3rd Qu.:-0.5136
## Max. :17.551316 Max. :17.732 Max. :17.8535 Max. :17.7878
## WF03 WF04 AF01 AF02
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-5.3269 Median :-5.3269 Median :-4.6169
## Mean :-2.5155 Mean :-2.6229 Mean :-2.7296 Mean :-2.3594
## 3rd Qu.:-0.3728 3rd Qu.:-0.5169 3rd Qu.:-0.8329 3rd Qu.: 0.2209
## Max. :17.5632 Max. :17.7754 Max. :18.2142 Max. :17.6560
## AF03 AF04 AM01 AM02
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-4.6721 Median :-4.8093 Median :-4.8828
## Mean :-2.5815 Mean :-2.5082 Mean :-2.5406 Mean :-2.6353
## 3rd Qu.:-0.6028 3rd Qu.:-0.4616 3rd Qu.:-0.6487 3rd Qu.:-0.8039
## Max. :17.5836 Max. :17.7331 Max. :17.8091 Max. :18.0378
## AM03 AM04 AM05 AM06
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-5.3269 Median :-4.9834 Median :-5.3269
## Mean :-2.6791 Mean :-2.5671 Mean :-2.6703 Mean :-2.5643
## 3rd Qu.:-0.6953 3rd Qu.:-0.6244 3rd Qu.:-0.8173 3rd Qu.:-0.5037
## Max. :18.1162 Max. :17.6917 Max. :17.9144 Max. :17.6848
## AM07 AM08 AM09 AM10
## Min. :-5.32692 Min. :-5.3269 Min. :-5.327 Min. :-5.3269
## 1st Qu.:-5.32692 1st Qu.:-5.3269 1st Qu.:-5.327 1st Qu.:-5.3269
## Median :-4.88125 Median :-4.6396 Median :-5.327 Median :-5.3269
## Mean :-2.36136 Mean :-2.4553 Mean :-2.737 Mean :-2.6980
## 3rd Qu.:-0.00175 3rd Qu.:-0.2675 3rd Qu.:-1.244 3rd Qu.:-0.9101
## Max. :17.56140 Max. :17.6274 Max. :17.758 Max. :18.0273
## AM11 AM12 AM13 AM14
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.3269
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269
## Median :-5.3269 Median :-5.3269 Median :-4.8546 Median :-5.3269
## Mean :-2.7170 Mean :-2.7597 Mean :-2.7981 Mean :-2.6732
## 3rd Qu.:-0.7161 3rd Qu.:-0.9733 3rd Qu.:-0.9787 3rd Qu.:-0.9632
## Max. :17.8248 Max. :17.9879 Max. :18.4898 Max. :17.7811
## AM15 AM16 AM17 AM18
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.327
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.327
## Median :-4.7736 Median :-5.3269 Median :-4.7915 Median :-5.327
## Mean :-2.5800 Mean :-2.5495 Mean :-2.5335 Mean :-2.879
## 3rd Qu.:-0.4899 3rd Qu.:-0.5002 3rd Qu.:-0.3886 3rd Qu.:-1.225
## Max. :17.9565 Max. :17.6605 Max. :18.1492 Max. :18.435
## AM19 AM20 AM21 AM22
## Min. :-5.3269 Min. :-5.3269 Min. :-5.3269 Min. :-5.327
## 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.3269 1st Qu.:-5.327
## Median :-4.7207 Median :-4.6545 Median :-4.0121 Median :-5.327
## Mean :-2.5716 Mean :-2.3618 Mean :-2.3625 Mean :-3.130
## 3rd Qu.:-0.6817 3rd Qu.: 0.1628 3rd Qu.:-0.5834 3rd Qu.:-1.757
## Max. :17.7992 Max. :18.3595 Max. :18.6399 Max. :18.478
## AM23 AM24 AM25 AM26
## Min. :-5.327 Min. :-5.3269 Min. :-5.327 Min. :-5.3269
## 1st Qu.:-5.327 1st Qu.:-5.3269 1st Qu.:-5.327 1st Qu.:-5.3269
## Median :-5.327 Median :-5.3269 Median :-4.436 Median :-4.6663
## Mean :-2.964 Mean :-2.6495 Mean :-2.386 Mean :-2.5526
## 3rd Qu.:-1.422 3rd Qu.:-0.5852 3rd Qu.:-0.226 3rd Qu.:-0.3635
## Max. :18.269 Max. :17.9668 Max. :17.729 Max. :18.0451
## AM27 AM28 AM29 AM30
## Min. :-5.327 Min. :-5.3269 Min. :-5.327 Min. :-5.3269
## 1st Qu.:-5.327 1st Qu.:-5.3269 1st Qu.:-5.327 1st Qu.:-5.3269
## Median :-5.327 Median :-5.3269 Median :-5.327 Median :-4.7689
## Mean :-2.800 Mean :-2.7496 Mean :-2.820 Mean :-2.4558
## 3rd Qu.:-1.028 3rd Qu.:-0.9491 3rd Qu.:-1.140 3rd Qu.:-0.2777
## Max. :17.949 Max. :18.0113 Max. :18.695 Max. :17.5229
# to filter genes, while keeping as many genes as possible with worthwhile counts.
keep.exprs <- filterByExpr(x, group=group)
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
dim(x)
## [1] 20138 68
# Compare the data before and after filtering
lcpm.cutoff <- log2(5/M + 2/L)
library(RColorBrewer)
nsamples <- ncol(x)
col <- brewer.pal(nsamples, "Paired")
## Warning in brewer.pal(nsamples, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
par(mfrow=c(1,2))
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="A. Raw data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")
lcpm <- cpm(x, log=TRUE)
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="B. Filtered data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")
# normalization
x <- calcNormFactors(x, method = "TMM")
x$samples$norm.factors
## [1] 0.9906711 1.1128612 1.2862404 1.1637129 0.9685468 0.8655872 1.1624764
## [8] 0.9960807 1.1034987 1.1138525 0.9253751 1.0598805 0.9132135 1.1127265
## [15] 0.9570849 1.1136734 1.1194157 1.1416051 1.1210267 1.3611276 1.1907137
## [22] 1.0514294 0.7971595 1.1937833 1.1827919 1.0175912 1.0532701 0.3703736
## [29] 1.3687501 0.9834462 1.0066632 1.0079499 1.2806465 1.0120290 0.8116220
## [36] 1.2169187 1.1680961 1.2434374 1.0576294 0.9613168 0.8563864 1.1025868
## [43] 0.9498651 1.1407080 1.2936701 1.2823856 1.0368950 0.9188302 1.0105678
## [50] 0.8765149 0.6811246 0.9871650 0.9549061 1.2145905 1.0973692 0.7117738
## [57] 1.1288755 1.2203519 0.7962916 0.3952453 0.5630903 0.8561179 1.2654744
## [64] 0.9873156 0.8244647 0.9445294 0.7698379 1.2427766
# To give a better visual representation of the effects of normalisation, the data was duplicated then adjusted so that the counts of the first sample are reduced to 5% of their original values, and in the second sample they are inflated to be 5-times larger.
x2 <- x
x2$samples$norm.factors <- 1
x2$counts[,1] <- ceiling(x2$counts[,1]*0.05)
x2$counts[,2] <- x2$counts[,2]*5
par(mfrow=c(1,2))
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="A. Example: Unnormalised data",ylab="Log-cpm")
x2 <- calcNormFactors(x2)
x2$samples$norm.factors
## [1] 0.06240712 5.50619855 1.31601164 1.16928053 1.02167803 0.86378941
## [7] 1.19567682 1.01974874 1.10916962 1.13109466 0.90931476 1.04943861
## [13] 0.95473805 1.15803559 1.04964202 1.11633889 1.13669152 1.17265012
## [19] 1.13926357 1.41574855 1.20762870 1.08110764 0.81483561 1.21823370
## [25] 1.19402358 1.04324607 1.06618421 0.38185077 1.41023715 1.01427805
## [31] 1.00653393 1.02269599 1.29978858 0.99161931 0.81675714 1.24670532
## [37] 1.17386503 1.25571264 1.04720965 0.98072175 0.84671839 1.09670615
## [43] 0.96640510 1.13301635 1.32622946 1.35960194 1.01892413 0.92254800
## [49] 1.00068939 0.89716104 0.70388504 1.00144044 0.98062456 1.26415319
## [55] 1.11233774 0.71553871 1.13762498 1.23488358 0.80335275 0.39924271
## [61] 0.57635838 0.88887026 1.29163654 1.03118686 0.82952801 0.96506159
## [67] 0.78340100 1.32116348
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="B. Example: Normalised data",ylab="Log-cpm")
Part 7 - Unsupervised clustering of samples
# By Race
lcpm <- cpm(x, log=TRUE)
par(mfrow=c(1,2))
col.group <- group
levels(col.group) <- brewer.pal(nlevels(col.group), "Set1")
## Warning in brewer.pal(nlevels(col.group), "Set1"): minimal value for n is 3, returning requested palette with 3 different levels
col.group <- as.character(col.group)
plotMDS(lcpm, labels=group, col=col.group)
title(main="A. Sample groups")
# By gender
lcpm <- cpm(x, log=TRUE)
par(mfrow=c(1,2))
col.gender <- gender
levels(col.gender) <- brewer.pal(nlevels(col.gender), "Set1")
## Warning in brewer.pal(nlevels(col.gender), "Set1"): minimal value for n is 3, returning requested palette with 3 different levels
col.gender <- as.character(col.gender)
plotMDS(lcpm, labels=gender, col=col.gender)
title(main="B. Sample genders")
# The glMDSPlot function generates an html page (that is opened in a browser if launch=TRUE) with an MDS plot in the left panel and a barplot showing the proportion of variation explained by each dimension in the right panel.
glMDSPlot(lcpm, groups=group,genders=gender)
Part 8 - Differential expression analysis
# Creating a design matrix and contrasts
design <- model.matrix(~0+group+gender)
colnames(design) <- gsub("group", "", colnames(design))
design
## Asian White genderMale
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 1 1
## 5 0 1 1
## 6 0 1 1
## 7 0 1 1
## 8 0 1 1
## 9 0 1 1
## 10 0 1 1
## 11 0 1 1
## 12 0 1 1
## 13 0 1 1
## 14 0 1 1
## 15 0 1 1
## 16 0 1 1
## 17 0 1 1
## 18 0 1 1
## 19 0 1 1
## 20 0 1 1
## 21 0 1 1
## 22 0 1 1
## 23 0 1 1
## 24 0 1 1
## 25 0 1 1
## 26 0 1 1
## 27 0 1 1
## 28 0 1 1
## 29 0 1 1
## 30 0 1 1
## 31 0 1 0
## 32 0 1 0
## 33 0 1 0
## 34 0 1 0
## 35 1 0 0
## 36 1 0 0
## 37 1 0 0
## 38 1 0 0
## 39 1 0 1
## 40 1 0 1
## 41 1 0 1
## 42 1 0 1
## 43 1 0 1
## 44 1 0 1
## 45 1 0 1
## 46 1 0 1
## 47 1 0 1
## 48 1 0 1
## 49 1 0 1
## 50 1 0 1
## 51 1 0 1
## 52 1 0 1
## 53 1 0 1
## 54 1 0 1
## 55 1 0 1
## 56 1 0 1
## 57 1 0 1
## 58 1 0 1
## 59 1 0 1
## 60 1 0 1
## 61 1 0 1
## 62 1 0 1
## 63 1 0 1
## 64 1 0 1
## 65 1 0 1
## 66 1 0 1
## 67 1 0 1
## 68 1 0 1
## attr(,"assign")
## [1] 1 1 2
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
##
## attr(,"contrasts")$gender
## [1] "contr.treatment"
contr.matrix <- makeContrasts(
RaceVS= White - Asian,
levels = colnames(design))
contr.matrix
## Contrasts
## Levels RaceVS
## Asian -1
## White 1
## genderMale 0
# Removing heteroscedascity from count data
par(mfrow=c(1,2))
v <- voom(x, design, plot=TRUE)
v
## An object of class "EList"
## $genes
## ENSEMBL SYMBOL TXCHROM
## 2 ENSG00000000419 DPM1 chr20
## 3 ENSG00000000457 SCYL3 chr1
## 4 ENSG00000000460 C1orf112 chr1
## 5 ENSG00000000938 FGR chr1
## 6 ENSG00000000971 CFH chr1
## 20133 more rows ...
##
## $targets
## files group lib.size norm.factors gender
## WM01 WM01.htseq.counts White 38200220 0.9906711 Male
## WM02 WM02.htseq.counts White 70884638 1.1128612 Male
## WM03 WM03.htseq.counts White 140620491 1.2862404 Male
## WM04 WM04.htseq.counts White 135250062 1.1637129 Male
## WM05 WM05.htseq.counts White 89403687 0.9685468 Male
## 63 more rows ...
##
## $E
## Samples
## Tags WM01 WM02 WM03 WM04 WM05 WM06
## ENSG00000000419.11 5.112452 4.902788 5.702458 3.5199625 4.440694 5.517074
## ENSG00000000457.12 1.847779 2.778895 2.908048 4.4394135 3.463913 3.132169
## ENSG00000000460.15 1.669303 3.186872 2.691680 5.2823213 2.813506 3.385158
## ENSG00000000938.11 3.364711 3.731416 2.648154 -0.1665961 3.239691 4.281166
## ENSG00000000971.14 6.124140 5.204918 6.471263 1.3778954 5.434550 3.317002
## Samples
## Tags WM07 WM08 WM09 WM10 WM11 WM12
## ENSG00000000419.11 5.104507 4.585725 4.720851 4.840853 4.734839 5.531583
## ENSG00000000457.12 3.878056 3.974512 3.914860 2.750162 2.842638 5.169846
## ENSG00000000460.15 3.123671 1.739874 2.486408 3.149460 2.607313 4.289612
## ENSG00000000938.11 2.324897 3.419306 2.054731 2.848931 3.839503 1.886268
## ENSG00000000971.14 6.316593 6.410086 2.868994 3.827895 5.964925 3.300104
## Samples
## Tags WM13 WM14 WM15 WM16 WM17 WM18
## ENSG00000000419.11 4.382119 4.674764 4.801646 4.695444 4.181286 3.705288
## ENSG00000000457.12 4.228433 3.387291 2.733078 3.398690 4.012883 3.066093
## ENSG00000000460.15 4.163366 3.011345 1.377316 3.190493 4.088282 1.390401
## ENSG00000000938.11 2.027298 2.252827 3.300740 3.221140 1.855113 1.993779
## ENSG00000000971.14 3.778249 4.755819 7.316368 5.979005 2.699563 2.816733
## Samples
## Tags WM19 WM20 WM21 WM22 WM23 WM24
## ENSG00000000419.11 5.116220 3.725857 4.912533 5.097922 4.584477 4.361430
## ENSG00000000457.12 3.956813 2.611544 3.428323 3.005259 3.272594 3.677833
## ENSG00000000460.15 5.922911 3.517059 3.194669 3.435043 1.812065 3.773162
## ENSG00000000938.11 1.578752 1.165460 2.503378 1.538305 2.710715 1.980041
## ENSG00000000971.14 4.767323 3.762240 5.939669 4.424408 5.984270 3.807775
## Samples
## Tags WM25 WM26 WM27 WM28 WM29 WM30
## ENSG00000000419.11 4.898056 4.792862 5.546109 4.2631967 5.170252 5.434577
## ENSG00000000457.12 3.731335 2.459557 2.984828 3.9067157 3.881153 3.025893
## ENSG00000000460.15 2.351077 2.577020 2.344328 3.6229540 3.425462 3.907418
## ENSG00000000938.11 3.016312 2.786643 2.531015 0.7121162 3.523467 1.067629
## ENSG00000000971.14 5.309299 5.344066 2.221471 5.5536482 5.490555 3.938092
## Samples
## Tags WF01 WF02 WF03 WF04 AF01 AF02
## ENSG00000000419.11 5.0841573 4.927416 5.071312 5.894614 4.252053 4.918583
## ENSG00000000457.12 3.7194016 3.153973 3.374131 3.015404 3.129501 3.280771
## ENSG00000000460.15 4.0624242 2.823269 3.786885 3.458271 1.903642 2.920146
## ENSG00000000938.11 0.3284319 3.944378 1.705485 3.051963 2.117766 3.776446
## ENSG00000000971.14 1.2136617 6.032502 6.049270 5.856737 2.340533 5.120693
## Samples
## Tags AF03 AF04 AM01 AM02 AM03 AM04
## ENSG00000000419.11 5.861504 4.46902368 4.283468 4.526542 4.654500 4.362484
## ENSG00000000457.12 2.784974 4.54078885 3.998478 3.674677 2.831129 3.660270
## ENSG00000000460.15 2.638460 5.08530326 1.714626 1.082830 2.386934 2.145565
## ENSG00000000938.11 2.080634 -0.02114107 3.366601 2.191678 4.743130 2.982383
## ENSG00000000971.14 3.215963 0.57781471 5.876951 1.406668 5.959805 6.846107
## Samples
## Tags AM05 AM06 AM07 AM08 AM09 AM10
## ENSG00000000419.11 4.491436 4.673514 4.294396 4.000286 5.048078 4.116208
## ENSG00000000457.12 3.838001 3.563122 3.430802 4.140950 3.684263 3.408600
## ENSG00000000460.15 1.089240 2.427980 2.009989 1.075892 2.710034 1.839518
## ENSG00000000938.11 3.494826 3.156267 2.092659 1.704879 3.914194 2.355748
## ENSG00000000971.14 7.388289 3.856205 6.850257 6.491152 6.373695 6.889500
## Samples
## Tags AM11 AM12 AM13 AM14 AM15 AM16
## ENSG00000000419.11 4.534507 4.163513 3.9115709 4.469462 3.510425 4.484946
## ENSG00000000457.12 2.598466 2.922363 3.3344022 3.363438 4.132262 3.864419
## ENSG00000000460.15 1.363704 1.523146 0.7725716 2.764862 2.113561 1.256073
## ENSG00000000938.11 1.904519 2.368272 3.9636257 2.409309 3.591556 2.969818
## ENSG00000000971.14 6.675617 6.177294 5.4077816 6.655467 7.341050 7.329022
## Samples
## Tags AM17 AM18 AM19 AM20 AM21 AM22
## ENSG00000000419.11 5.556262 4.411512 4.388391 4.957028 4.192049 3.193876
## ENSG00000000457.12 4.935219 3.399239 4.348918 3.371500 3.689088 3.582171
## ENSG00000000460.15 5.344403 2.133149 2.540951 3.479324 3.138922 1.873029
## ENSG00000000938.11 2.109927 1.425988 4.004665 2.640741 3.471760 1.987258
## ENSG00000000971.14 7.529107 5.966683 8.413037 3.465087 3.632331 3.198195
## Samples
## Tags AM23 AM24 AM25 AM26 AM27 AM28
## ENSG00000000419.11 4.464370 3.897764 4.854980 4.485652 4.564356 4.4484236
## ENSG00000000457.12 3.570924 3.897764 3.967465 3.218172 3.296920 3.4541650
## ENSG00000000460.15 1.400412 2.791900 4.582040 2.173138 2.076433 0.9448575
## ENSG00000000938.11 2.079144 3.455461 1.725644 1.877362 2.577953 2.7491020
## ENSG00000000971.14 4.507940 5.853690 4.607197 5.257700 4.567915 6.1194068
## Samples
## Tags AM29 AM30
## ENSG00000000419.11 4.412534 4.294323
## ENSG00000000457.12 3.601820 4.056226
## ENSG00000000460.15 1.305810 1.945671
## ENSG00000000938.11 4.077250 3.342629
## ENSG00000000971.14 6.229582 7.476813
## 20133 more rows ...
##
## $weights
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 1.7271550 2.159817 2.404807 2.396558 2.272489 2.172735 2.219394 2.341862
## [2,] 1.0515474 1.463765 1.996918 1.969175 1.649946 1.482371 1.554073 1.810478
## [3,] 0.9455721 1.297622 1.834342 1.805553 1.472147 1.314804 1.380121 1.631805
## [4,] 0.7992185 1.040287 1.499423 1.468796 1.172917 1.052880 1.101406 1.307110
## [5,] 1.7822841 2.196873 2.416737 2.410990 2.297776 2.210069 2.249431 2.361110
## [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
## [1,] 2.192091 2.244076 1.9081492 2.318517 2.0776748 2.171932 2.0256700 2.123756
## [2,] 1.510539 1.598102 1.1861570 1.752328 1.3574779 1.481210 1.2971903 1.412577
## [3,] 1.340410 1.423399 1.0571265 1.574171 1.2029668 1.313749 1.1508470 1.251160
## [4,] 1.071929 1.134073 0.8734815 1.256606 0.9727464 1.052095 0.9365379 1.006979
## [5,] 2.225838 2.274533 1.9565721 2.340814 2.1209624 2.209249 2.0682361 2.160644
## [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## [1,] 2.190840 2.291708 2.323201 2.254134 2.237275 2.375245 2.249889 2.277328
## [2,] 1.508709 1.691231 1.763189 1.616310 1.585877 1.899443 1.608606 1.658900
## [3,] 1.338746 1.512093 1.584717 1.440725 1.411366 1.726638 1.433533 1.480518
## [4,] 1.070692 1.204974 1.265831 1.147705 1.124925 1.393925 1.141935 1.179634
## [5,] 2.224838 2.315311 2.344580 2.283680 2.267616 2.390428 2.280415 2.301485
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
## [1,] 2.0830919 2.300478 2.0880302 2.128446 2.381293 2.275158 2.406526 2.2617111
## [2,] 1.3638913 1.711018 1.3697526 1.418644 1.918718 1.654880 1.677245 1.2982546
## [3,] 1.2085524 1.532698 1.2136833 1.256468 1.746287 1.476760 1.957644 1.5791142
## [4,] 0.9767426 1.221642 0.9803921 1.010735 1.412943 1.176618 1.043436 0.8418882
## [5,] 2.1265209 2.324195 2.1307505 2.164829 2.395610 2.299822 1.761638 1.3762635
## [,33] [,34] [,35] [,36] [,37] [,38] [,39]
## [1,] 2.3617622 2.2922838 2.1386956 2.233412 2.0735481 2.288444 2.194570
## [2,] 1.5217761 1.3533932 1.4226904 1.567849 1.3421819 1.672473 1.845229
## [3,] 1.8155449 1.6405632 1.1320911 1.249433 1.0719335 1.340572 1.100302
## [4,] 0.9516473 0.8677584 0.9245998 1.006537 0.8837216 1.072927 1.386273
## [5,] 1.6071592 1.4349997 1.8096468 1.946898 1.7239932 2.037406 2.438268
## [,40] [,41] [,42] [,43] [,44] [,45] [,46]
## [1,] 2.235121 1.9006633 1.8977935 2.341440 1.8998101 2.344254 2.108275
## [2,] 1.910239 1.4780827 1.4752063 2.100857 1.4772274 2.107234 1.723569
## [3,] 1.150167 0.8866288 0.8853202 1.339853 0.8862398 1.347180 1.018062
## [4,] 1.451459 1.0827918 1.0807688 1.678127 1.0821903 1.685836 1.273833
## [5,] 2.445454 2.3509527 2.3499119 2.450091 2.3506434 2.449930 2.419057
## [,47] [,48] [,49] [,50] [,51] [,52] [,53]
## [1,] 1.9074658 2.114940 1.9963978 2.0581195 1.9925522 1.9314207 2.090758
## [2,] 1.4849071 1.731834 1.5849638 1.6601117 1.5807257 1.5109247 1.701927
## [3,] 0.8897306 1.023174 0.9391901 0.9799751 0.9369952 0.9022556 1.004684
## [4,] 1.0875906 1.281121 1.1609891 1.2197271 1.1576892 1.1058755 1.254787
## [5,] 2.3534143 2.420556 2.3862990 2.4057710 2.3851727 2.3626997 2.415092
## [,54] [,55] [,56] [,57] [,58] [,59] [,60]
## [1,] 2.033653 2.192418 1.8521565 2.126996 2.092772 1.9276754 1.4782346
## [2,] 1.627199 1.842287 1.4296738 1.746827 1.704409 1.5065572 1.1089540
## [3,] 0.961638 1.098106 0.8645017 1.032669 1.006218 0.9001366 0.7289766
## [4,] 1.193911 1.383291 1.0487209 1.294363 1.256969 1.1028072 0.8506770
## [5,] 2.397357 2.437936 2.3328457 2.423257 2.415550 2.3611518 2.1225951
## [,61] [,62] [,63] [,64] [,65] [,66] [,67]
## [1,] 1.7593647 1.6659536 2.281417 1.968386 1.7534269 1.7590386 1.9681370
## [2,] 1.3391212 1.2558355 1.984046 1.554184 1.3337914 1.3388284 1.5539108
## [3,] 0.8253801 0.7903577 1.216053 0.923228 0.8231370 0.8252570 0.9230862
## [4,] 0.9895267 0.9372441 1.534009 1.137039 0.9860733 0.9893371 1.1368272
## [5,] 2.2913566 2.2421720 2.449674 2.377845 2.2888361 2.2912183 2.3777438
## [,68]
## [1,] 2.229283
## [2,] 1.899877
## [3,] 1.142132
## [4,] 1.441377
## [5,] 2.444328
## 20133 more rows ...
##
## $design
## Asian White genderMale
## 1 0 1 1
## 2 0 1 1
## 3 0 1 1
## 4 0 1 1
## 5 0 1 1
## 63 more rows ...
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit, main="Final model: Mean-variance trend")
# Examining the number of DE genes
summary(decideTests(efit, p.value = 0.05))# significance: p.value=0.05
## RaceVS
## Down 2451
## NotSig 15000
## Up 2687
tfit <- treat(vfit, lfc=log2(1.5))# get more differentially expressed genes
dt <- decideTests(tfit, p.value=0.05)
summary(dt)
## RaceVS
## Down 84
## NotSig 19906
## Up 148
tfit <- treat(vfit, lfc=log2(1.5))
dt <- decideTests(tfit, p.value = 0.05)
summary(dt)
## RaceVS
## Down 84
## NotSig 19906
## Up 148
# Examining individual DE genes from top to bottom
RaceVS <- topTreat(tfit, coef=1, n=Inf)
head(RaceVS)
## ENSEMBL SYMBOL TXCHROM logFC AveExpr
## ENSG00000207425.1 ENSG00000207425 <NA> <NA> 4.003742 -2.6870360
## ENSG00000115414.17 ENSG00000115414 FN1 chr2 3.150491 7.5899529
## ENSG00000269918.1 ENSG00000269918 <NA> <NA> -2.401672 0.3975525
## ENSG00000221947.6 ENSG00000221947 XKR9 chr8 2.657988 -0.7579479
## ENSG00000226278.1 ENSG00000226278 <NA> <NA> -5.067213 -1.5758057
## ENSG00000247627.2 ENSG00000247627 MTND4P12 <NA> -3.477716 4.5334316
## t P.Value adj.P.Val
## ENSG00000207425.1 7.281302 2.083745e-10 4.196245e-06
## ENSG00000115414.17 5.961984 4.799089e-08 4.832203e-04
## ENSG00000269918.1 -5.852638 7.450607e-08 5.001344e-04
## ENSG00000221947.6 5.388856 4.681350e-07 2.129847e-03
## ENSG00000226278.1 -5.358541 5.288129e-07 2.129847e-03
## ENSG00000247627.2 -5.294286 6.768272e-07 2.271658e-03
# plotMD function
plotMD(tfit, column=1, status=dt[,1], main=colnames(tfit)[1],
xlim=c(-8,13))
glMDPlot(tfit, coef=1, status=dt, main=colnames(tfit)[1],
side.main="ENSEMBL", counts=lcpm, groups=group, launch=FALSE)
# get heatmap of log-CPM values
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
RaceVS.topgenes <- RaceVS$ENSEMBL[1:100]
i <- which(v$genes$ENSEMBL %in% RaceVS.topgenes)
mycol <- colorpanel(1000,"blue","white","red")
heatmap.2(lcpm[i,], scale="row",
labRow=v$genes$SYMBOL[i], labCol=group,
col=mycol, trace="none", density.info="none",
lhei=NULL, dendrogram="column")
Part 9 - Gene set testing with camera
load('/Users/dongzeyuan/Desktop/zeyuan-510final/human_c2_v5p2.rdata')
#load(system.file("extdata", "human_c2_v5p2.rda", package = "RNAseq123"))
idx <- ids2indices(Hs.c2,id=rownames(v$genes))
cam.RaceVS <- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.RaceVS,60)# look for appropriate hallmark gene sets
## NGenes
## KANG_IMMORTALIZED_BY_TERT_DN 83
## BIOCARTA_GABA_PATHWAY 8
## ST_G_ALPHA_S_PATHWAY 12
## HOFMANN_CELL_LYMPHOMA_DN 31
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES 22
## REACTOME_GABA_A_RECEPTOR_ACTIVATION 9
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP 25
## REACTOME_CA_DEPENDENT_EVENTS 21
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP 57
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN 7
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP 5
## BIOCARTA_TOB1_PATHWAY 20
## ST_GA12_PATHWAY 18
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP 62
## CHENG_RESPONSE_TO_NICKEL_ACETATE 26
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP 155
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP 42
## BARIS_THYROID_CANCER_DN 48
## KEGG_CALCIUM_SIGNALING_PATHWAY 140
## BROWNE_HCMV_INFECTION_14HR_DN 219
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN 28
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP 22
## HOLLMANN_APOPTOSIS_VIA_CD40_UP 115
## SHIRAISHI_PLZF_TARGETS_UP 9
## FARMER_BREAST_CANCER_CLUSTER_4 10
## ZEILSTRA_CD44_TARGETS_DN 7
## DASU_IL6_SIGNALING_UP 46
## BROWNE_HCMV_INFECTION_8HR_UP 80
## LOPES_METHYLATED_IN_COLON_CANCER_UP 24
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN 244
## BRIDEAU_IMPRINTED_GENES 39
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3 2
## KEGG_DILATED_CARDIOMYOPATHY 64
## BIOCARTA_NOS1_PATHWAY 16
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB 11
## BIOCARTA_THELPER_PATHWAY 14
## OXFORD_RALB_TARGETS_UP 2
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP 49
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN 71
## KEGG_ALZHEIMERS_DISEASE 127
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3 170
## KEGG_MELANOGENESIS 77
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN 51
## YAGI_AML_SURVIVAL 87
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND 8
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS 29
## PETRETTO_BLOOD_PRESSURE_UP 6
## LI_PROSTATE_CANCER_EPIGENETIC 25
## WANG_BARRETTS_ESOPHAGUS_DN 22
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG 13
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP 47
## WANG_MLL_TARGETS 159
## PID_IL12_STAT4_PATHWAY 30
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION 31
## NIELSEN_LEIOMYOSARCOMA_UP 13
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP 48
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP 9
## CHANG_IMMORTALIZED_BY_HPV31_UP 36
## VANASSE_BCL2_TARGETS_UP 26
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING 141
## Direction
## KANG_IMMORTALIZED_BY_TERT_DN Up
## BIOCARTA_GABA_PATHWAY Up
## ST_G_ALPHA_S_PATHWAY Up
## HOFMANN_CELL_LYMPHOMA_DN Up
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES Up
## REACTOME_GABA_A_RECEPTOR_ACTIVATION Up
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP Up
## REACTOME_CA_DEPENDENT_EVENTS Up
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP Up
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN Up
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP Up
## BIOCARTA_TOB1_PATHWAY Up
## ST_GA12_PATHWAY Up
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP Up
## CHENG_RESPONSE_TO_NICKEL_ACETATE Up
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP Up
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP Up
## BARIS_THYROID_CANCER_DN Up
## KEGG_CALCIUM_SIGNALING_PATHWAY Up
## BROWNE_HCMV_INFECTION_14HR_DN Up
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN Up
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP Up
## HOLLMANN_APOPTOSIS_VIA_CD40_UP Up
## SHIRAISHI_PLZF_TARGETS_UP Up
## FARMER_BREAST_CANCER_CLUSTER_4 Up
## ZEILSTRA_CD44_TARGETS_DN Up
## DASU_IL6_SIGNALING_UP Up
## BROWNE_HCMV_INFECTION_8HR_UP Up
## LOPES_METHYLATED_IN_COLON_CANCER_UP Up
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN Up
## BRIDEAU_IMPRINTED_GENES Up
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3 Up
## KEGG_DILATED_CARDIOMYOPATHY Up
## BIOCARTA_NOS1_PATHWAY Up
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB Up
## BIOCARTA_THELPER_PATHWAY Up
## OXFORD_RALB_TARGETS_UP Down
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP Up
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN Up
## KEGG_ALZHEIMERS_DISEASE Up
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3 Up
## KEGG_MELANOGENESIS Up
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN Up
## YAGI_AML_SURVIVAL Up
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND Down
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS Up
## PETRETTO_BLOOD_PRESSURE_UP Up
## LI_PROSTATE_CANCER_EPIGENETIC Up
## WANG_BARRETTS_ESOPHAGUS_DN Up
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG Up
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP Up
## WANG_MLL_TARGETS Up
## PID_IL12_STAT4_PATHWAY Up
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION Up
## NIELSEN_LEIOMYOSARCOMA_UP Up
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP Up
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP Up
## CHANG_IMMORTALIZED_BY_HPV31_UP Up
## VANASSE_BCL2_TARGETS_UP Up
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING Up
## PValue
## KANG_IMMORTALIZED_BY_TERT_DN 0.0005577753
## BIOCARTA_GABA_PATHWAY 0.0009533798
## ST_G_ALPHA_S_PATHWAY 0.0012156076
## HOFMANN_CELL_LYMPHOMA_DN 0.0016398925
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES 0.0019591011
## REACTOME_GABA_A_RECEPTOR_ACTIVATION 0.0020135950
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP 0.0020701011
## REACTOME_CA_DEPENDENT_EVENTS 0.0023701152
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP 0.0026184082
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN 0.0027526153
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP 0.0027545525
## BIOCARTA_TOB1_PATHWAY 0.0028799548
## ST_GA12_PATHWAY 0.0029281430
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP 0.0031859966
## CHENG_RESPONSE_TO_NICKEL_ACETATE 0.0037154177
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP 0.0040024234
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP 0.0046875855
## BARIS_THYROID_CANCER_DN 0.0047916937
## KEGG_CALCIUM_SIGNALING_PATHWAY 0.0049527284
## BROWNE_HCMV_INFECTION_14HR_DN 0.0051969690
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN 0.0059129174
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP 0.0060851635
## HOLLMANN_APOPTOSIS_VIA_CD40_UP 0.0061785150
## SHIRAISHI_PLZF_TARGETS_UP 0.0062528755
## FARMER_BREAST_CANCER_CLUSTER_4 0.0062768235
## ZEILSTRA_CD44_TARGETS_DN 0.0063413522
## DASU_IL6_SIGNALING_UP 0.0066886588
## BROWNE_HCMV_INFECTION_8HR_UP 0.0068729429
## LOPES_METHYLATED_IN_COLON_CANCER_UP 0.0069392738
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN 0.0072064989
## BRIDEAU_IMPRINTED_GENES 0.0072453852
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3 0.0073120033
## KEGG_DILATED_CARDIOMYOPATHY 0.0073168212
## BIOCARTA_NOS1_PATHWAY 0.0076693385
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB 0.0076755853
## BIOCARTA_THELPER_PATHWAY 0.0077251403
## OXFORD_RALB_TARGETS_UP 0.0079015909
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP 0.0081876622
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN 0.0082522653
## KEGG_ALZHEIMERS_DISEASE 0.0085008789
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3 0.0085646776
## KEGG_MELANOGENESIS 0.0085690475
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN 0.0086201993
## YAGI_AML_SURVIVAL 0.0090649316
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND 0.0090896750
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS 0.0092489879
## PETRETTO_BLOOD_PRESSURE_UP 0.0095922098
## LI_PROSTATE_CANCER_EPIGENETIC 0.0096915556
## WANG_BARRETTS_ESOPHAGUS_DN 0.0102885624
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG 0.0104367740
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP 0.0107650189
## WANG_MLL_TARGETS 0.0113261064
## PID_IL12_STAT4_PATHWAY 0.0114246952
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION 0.0116866384
## NIELSEN_LEIOMYOSARCOMA_UP 0.0117180907
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP 0.0117801340
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP 0.0120054946
## CHANG_IMMORTALIZED_BY_HPV31_UP 0.0120914323
## VANASSE_BCL2_TARGETS_UP 0.0122333161
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING 0.0132544878
## FDR
## KANG_IMMORTALIZED_BY_TERT_DN 0.73499
## BIOCARTA_GABA_PATHWAY 0.73499
## ST_G_ALPHA_S_PATHWAY 0.73499
## HOFMANN_CELL_LYMPHOMA_DN 0.73499
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES 0.73499
## REACTOME_GABA_A_RECEPTOR_ACTIVATION 0.73499
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP 0.73499
## REACTOME_CA_DEPENDENT_EVENTS 0.73499
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP 0.73499
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN 0.73499
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP 0.73499
## BIOCARTA_TOB1_PATHWAY 0.73499
## ST_GA12_PATHWAY 0.73499
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP 0.73499
## CHENG_RESPONSE_TO_NICKEL_ACETATE 0.73499
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP 0.73499
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP 0.73499
## BARIS_THYROID_CANCER_DN 0.73499
## KEGG_CALCIUM_SIGNALING_PATHWAY 0.73499
## BROWNE_HCMV_INFECTION_14HR_DN 0.73499
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN 0.73499
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP 0.73499
## HOLLMANN_APOPTOSIS_VIA_CD40_UP 0.73499
## SHIRAISHI_PLZF_TARGETS_UP 0.73499
## FARMER_BREAST_CANCER_CLUSTER_4 0.73499
## ZEILSTRA_CD44_TARGETS_DN 0.73499
## DASU_IL6_SIGNALING_UP 0.73499
## BROWNE_HCMV_INFECTION_8HR_UP 0.73499
## LOPES_METHYLATED_IN_COLON_CANCER_UP 0.73499
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN 0.73499
## BRIDEAU_IMPRINTED_GENES 0.73499
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3 0.73499
## KEGG_DILATED_CARDIOMYOPATHY 0.73499
## BIOCARTA_NOS1_PATHWAY 0.73499
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB 0.73499
## BIOCARTA_THELPER_PATHWAY 0.73499
## OXFORD_RALB_TARGETS_UP 0.73499
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP 0.73499
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN 0.73499
## KEGG_ALZHEIMERS_DISEASE 0.73499
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3 0.73499
## KEGG_MELANOGENESIS 0.73499
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN 0.73499
## YAGI_AML_SURVIVAL 0.73499
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND 0.73499
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS 0.73499
## PETRETTO_BLOOD_PRESSURE_UP 0.73499
## LI_PROSTATE_CANCER_EPIGENETIC 0.73499
## WANG_BARRETTS_ESOPHAGUS_DN 0.73499
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG 0.73499
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP 0.73499
## WANG_MLL_TARGETS 0.73499
## PID_IL12_STAT4_PATHWAY 0.73499
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION 0.73499
## NIELSEN_LEIOMYOSARCOMA_UP 0.73499
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP 0.73499
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP 0.73499
## CHANG_IMMORTALIZED_BY_HPV31_UP 0.73499
## VANASSE_BCL2_TARGETS_UP 0.73499
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING 0.73499
# use camera function to perform a competitive test to assess whether the genes in a given set are highly ranked in terms of differential expression relative to genes that are not in the set.
barcodeplot(efit$t[,1], index=idx$KANG_IMMORTALIZED_BY_TERT_DN,
index2=idx$REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND, main="RaceVS")