Part 1 - set up

#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("Glimma")
#BiocManager::install("limma")
#BiocManager::install("edgeR")
#BiocManager::install("Homo.sapiens")
#BiocManager::install("stringr")
library(limma)
library(Glimma)
library(edgeR)
library(Homo.sapiens)#for human genes
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: OrganismDbi
## Loading required package: GenomicFeatures
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GO.db
## 
## Loading required package: org.Hs.eg.db
## 
## Loading required package: TxDb.Hsapiens.UCSC.hg19.knownGene
library(stringr)
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final')

Part 2 - Unzip

# unzip whitemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteMale')
dir.create("SampleFiles")
## Warning in dir.create("SampleFiles"): 'SampleFiles'已存在
filepath <- dir(path ="./gdc_download_20201111_071754.200344",full.names = TRUE)
for(wd in filepath){
  files <-dir(path = wd,pattern="gz$") #
  fromfilepath <- paste(wd,"/",files,sep ="")
  tofilepath <- paste("./SampleFiles/",files,sep ="")
  file.copy(fromfilepath,tofilepath)
}

# unzip whitefemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteFemale')
dir.create("SampleFiles2")
## Warning in dir.create("SampleFiles2"): 'SampleFiles2'已存在
filepath <- dir(path ="./gdc_download_20201111_071940.367741",full.names = TRUE)
for(wd in filepath){
  files <-dir(path = wd,pattern="gz$") 
  fromfilepath <- paste(wd,"/",files,sep ="")
  tofilepath <- paste("./SampleFiles2/",files,sep ="")
  file.copy(fromfilepath,tofilepath)
}

# unzip asianfemale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/AsianFemale')
dir.create("SampleFiles3")
## Warning in dir.create("SampleFiles3"): 'SampleFiles3'已存在
filepath <- dir(path ="./gdc_download_20201111_071444.004912",full.names = TRUE)
for(wd in filepath){
  files <-dir(path = wd,pattern="gz$") 
  fromfilepath <- paste(wd,"/",files,sep ="")
  tofilepath <- paste("./SampleFiles3/",files,sep ="")
  file.copy(fromfilepath,tofilepath)
}

# unzip asianmale data
setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/AsianMale')
dir.create("SampleFiles4")
## Warning in dir.create("SampleFiles4"): 'SampleFiles4'已存在
filepath <- dir(path ="./gdc_download_20201111_071307.660905",full.names = TRUE)
for(wd in filepath){
  files <-dir(path = wd,pattern="gz$") 
  fromfilepath <- paste(wd,"/",files,sep ="")
  tofilepath <- paste("./SampleFiles4/",files,sep ="")
  file.copy(fromfilepath,tofilepath)
}
# Check Files and manuanlly unzip desired data

Part 3 - Rename and transfer files to working directory

#tarDir <- "/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups"
#dir.create(tarDir)
      
#setwd("/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups")
#old_files1 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteMale/SampleFiles", pattern = ".htseq.counts", full.names = TRUE)
#new_files1 <- c("WM01.htseq.counts",
#"WM02.htseq.counts",
#"WM03.htseq.counts",
#"WM04.htseq.counts",
#"WM05.htseq.counts",
#"WM06.htseq.counts",
#"WM07.htseq.counts",
#"WM08.htseq.counts",
#"WM09.htseq.counts",
#"WM10.htseq.counts",
#"WM11.htseq.counts",
#"WM12.htseq.counts",
#"WM13.htseq.counts",
#"WM14.htseq.counts",
#"WM15.htseq.counts",
#"WM16.htseq.counts",
#"WM17.htseq.counts",
#"WM18.htseq.counts",
#"WM19.htseq.counts",
#"WM20.htseq.counts",
#"WM21.htseq.counts",
#"WM22.htseq.counts",
#"WM23.htseq.counts",
#"WM24.htseq.counts",
#"WM25.htseq.counts",
#"WM26.htseq.counts",
#"WM27.htseq.counts",
#"WM28.htseq.counts",
#"WM29.htseq.counts",
#"WM30.htseq.counts")

#file.copy(from = old_files1, to = new_files1)


#old_files2 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/WhiteFemale/SampleFiles2", pattern = ".htseq.counts", full.names = TRUE)
#new_files2 <- c("WF01.htseq.counts",
#"WF02.htseq.counts",
#"WF03.htseq.counts",
#"WF04.htseq.counts")

#file.copy(from = old_files2, to = new_files2)

#old_files3 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/AsianFemale/SampleFiles3", pattern = ".htseq.counts", full.names = TRUE)
#new_files3 <- c("AF01.htseq.counts",
#"AF02.htseq.counts",
#"AF03.htseq.counts",
#"AF04.htseq.counts")

#file.copy(from = old_files3, to = new_files3)


#old_files4 <- list.files("/Users/dongzeyuan/Desktop/zeyuan-510final/AsianMale/SampleFiles4", pattern = ".htseq.counts", full.names = TRUE)
#new_files4 <- c("AM01.htseq.counts",
#"AM02.htseq.counts",
#"AM03.htseq.counts",
#"AM04.htseq.counts",
#"AM05.htseq.counts",
#"AM06.htseq.counts",
#"AM07.htseq.counts",
#"AM08.htseq.counts",
#"AM09.htseq.counts",
#"AM10.htseq.counts",
#"AM11.htseq.counts",
#"AM12.htseq.counts",
#"AM13.htseq.counts",
#"AM14.htseq.counts",
#"AM15.htseq.counts",
#"AM16.htseq.counts",
#"AM17.htseq.counts",
#"AM18.htseq.counts",
#"AM19.htseq.counts",
#"AM20.htseq.counts",
#"AM21.htseq.counts",
#"AM22.htseq.counts",
#"AM23.htseq.counts",
#"AM24.htseq.counts",
#"AM25.htseq.counts",
#"AM26.htseq.counts",
#"AM27.htseq.counts",
#"AM28.htseq.counts",
#"AM29.htseq.counts",
#"AM30.htseq.counts")

#file.copy(from = old_files4, to = new_files4)

Part 4 - Reading in counts data via readDGE

setwd('/Users/dongzeyuan/Desktop/zeyuan-510final/TwoGroups')
files <- c("WM01.htseq.counts",
"WM02.htseq.counts",
"WM03.htseq.counts",
"WM04.htseq.counts",
"WM05.htseq.counts",
"WM06.htseq.counts",
"WM07.htseq.counts",
"WM08.htseq.counts",
"WM09.htseq.counts",
"WM10.htseq.counts",
"WM11.htseq.counts",
"WM12.htseq.counts",
"WM13.htseq.counts",
"WM14.htseq.counts",
"WM15.htseq.counts",
"WM16.htseq.counts",
"WM17.htseq.counts",
"WM18.htseq.counts",
"WM19.htseq.counts",
"WM20.htseq.counts",
"WM21.htseq.counts",
"WM22.htseq.counts",
"WM23.htseq.counts",
"WM24.htseq.counts",
"WM25.htseq.counts",
"WM26.htseq.counts",
"WM27.htseq.counts",
"WM28.htseq.counts",
"WM29.htseq.counts",
"WM30.htseq.counts","WF01.htseq.counts",
"WF02.htseq.counts",
"WF03.htseq.counts",
"WF04.htseq.counts","AF01.htseq.counts",
"AF02.htseq.counts",
"AF03.htseq.counts",
"AF04.htseq.counts",
"AM01.htseq.counts",
"AM02.htseq.counts",
"AM03.htseq.counts",
"AM04.htseq.counts",
"AM05.htseq.counts",
"AM06.htseq.counts",
"AM07.htseq.counts",
"AM08.htseq.counts",
"AM09.htseq.counts",
"AM10.htseq.counts",
"AM11.htseq.counts",
"AM12.htseq.counts",
"AM13.htseq.counts",
"AM14.htseq.counts",
"AM15.htseq.counts",
"AM16.htseq.counts",
"AM17.htseq.counts",
"AM18.htseq.counts",
"AM19.htseq.counts",
"AM20.htseq.counts",
"AM21.htseq.counts",
"AM22.htseq.counts",
"AM23.htseq.counts",
"AM24.htseq.counts",
"AM25.htseq.counts",
"AM26.htseq.counts",
"AM27.htseq.counts",
"AM28.htseq.counts",
"AM29.htseq.counts",
"AM30.htseq.counts")
#for(i in paste(files, ".gz", sep=""))
#  R.utils::gunzip(i, overwrite=TRUE)

x <- readDGE(files, columns=c(1,2))
## Meta tags detected: __no_feature, __ambiguous, __too_low_aQual, __not_aligned, __alignment_not_unique

Part 5 - Sorting Data

# strip the suffix
samplenames <-  substring(colnames(x), 1, 4)
samplenames
##  [1] "WM01" "WM02" "WM03" "WM04" "WM05" "WM06" "WM07" "WM08" "WM09" "WM10"
## [11] "WM11" "WM12" "WM13" "WM14" "WM15" "WM16" "WM17" "WM18" "WM19" "WM20"
## [21] "WM21" "WM22" "WM23" "WM24" "WM25" "WM26" "WM27" "WM28" "WM29" "WM30"
## [31] "WF01" "WF02" "WF03" "WF04" "AF01" "AF02" "AF03" "AF04" "AM01" "AM02"
## [41] "AM03" "AM04" "AM05" "AM06" "AM07" "AM08" "AM09" "AM10" "AM11" "AM12"
## [51] "AM13" "AM14" "AM15" "AM16" "AM17" "AM18" "AM19" "AM20" "AM21" "AM22"
## [61] "AM23" "AM24" "AM25" "AM26" "AM27" "AM28" "AM29" "AM30"
colnames(x) <- samplenames
# Group by Race
group <- as.factor(rep(c("White","Asian"),c(34,34)))
x$samples$group <- group 

# Group by gender
gender <- as.factor(rep(c("Male","Female","Male"),c(30,8,30)))
x$samples$gender<-gender
# Look for the geneid corresponding gene-symbols and locations of chromosome
geneid <- rownames(x)
#geneid
# Remove the decimal point at ENSEMBL
gene <- str_match(geneid, "(\\w*).*")
geneid <- gene[,2]
# go through Homo pacakge and get symbol and chromosome location
genes <- select(Homo.sapiens, 
                keys=geneid,
                columns=c("SYMBOL","TXCHROM"),
                keytype="ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
head(genes)
##           ENSEMBL   SYMBOL TXCHROM
## 1 ENSG00000000005     TNMD    chrX
## 2 ENSG00000000419     DPM1   chr20
## 3 ENSG00000000457    SCYL3    chr1
## 4 ENSG00000000460 C1orf112    chr1
## 5 ENSG00000000938      FGR    chr1
## 6 ENSG00000000971      CFH    chr1
# remove the overlapped genes
genes <- genes[!duplicated(genes$ENSEMBL),]
# import genes to DGEList
x$genes <- genes
#x

Part 6 - Data preprocessing

cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)
# Get the average and median of the sample library size 
L <- mean(x$samples$lib.size) * 1e-6
M <- median(x$samples$lib.size) * 1e-6
c(L, M)
## [1] 80.27711 75.63755
# see the summary
summary(lcpm)
##       WM01              WM02              WM03              WM04        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-4.6226   Median :-4.5340   Median :-4.5704  
##  Mean   :-2.6314   Mean   :-2.5396   Mean   :-2.3890   Mean   :-2.4082  
##  3rd Qu.:-0.4149   3rd Qu.:-0.4529   3rd Qu.:-0.2724   3rd Qu.:-0.1645  
##  Max.   :17.7454   Max.   :17.9583   Max.   :17.4635   Max.   :17.8388  
##       WM05               WM06              WM07              WM08        
##  Min.   :-5.32692   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.32692   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-4.80685   Median :-4.7634   Median :-4.6601   Median :-4.8768  
##  Mean   :-2.41955   Mean   :-2.6985   Mean   :-2.4729   Mean   :-2.6704  
##  3rd Qu.:-0.06668   3rd Qu.:-0.9969   3rd Qu.:-0.2688   3rd Qu.:-0.9503  
##  Max.   :18.52531   Max.   :17.9985   Max.   :17.8434   Max.   :18.1149  
##       WM09              WM10              WM11              WM12        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-4.7118   Median :-5.3269   Median :-5.3269  
##  Mean   :-2.5955   Mean   :-2.5468   Mean   :-2.6742   Mean   :-2.6826  
##  3rd Qu.:-0.6431   3rd Qu.:-0.4083   3rd Qu.:-0.7364   3rd Qu.:-0.9285  
##  Max.   :17.5749   Max.   :17.5749   Max.   :17.9109   Max.   :17.8394  
##       WM13             WM14               WM15              WM16       
##  Min.   :-5.327   Min.   :-5.32692   Min.   :-5.3269   Min.   :-5.327  
##  1st Qu.:-5.327   1st Qu.:-5.32692   1st Qu.:-5.3269   1st Qu.:-5.327  
##  Median :-4.655   Median :-4.63508   Median :-5.3269   Median :-5.327  
##  Mean   :-2.560   Mean   :-2.43548   Mean   :-2.5495   Mean   :-2.655  
##  3rd Qu.:-0.446   3rd Qu.:-0.01343   3rd Qu.:-0.4172   3rd Qu.:-0.967  
##  Max.   :17.770   Max.   :17.94493   Max.   :17.5603   Max.   :17.344  
##       WM17              WM18              WM19              WM20        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-4.6507   Median :-4.7548   Median :-4.8064   Median :-4.6185  
##  Mean   :-2.5070   Mean   :-2.4360   Mean   :-2.5228   Mean   :-2.2647  
##  3rd Qu.:-0.5244   3rd Qu.:-0.1169   3rd Qu.:-0.5684   3rd Qu.: 0.3118  
##  Max.   :17.8456   Max.   :18.1089   Max.   :17.7854   Max.   :17.7193  
##       WM21              WM22              WM23             WM24       
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.327   Min.   :-5.327  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.327   1st Qu.:-5.327  
##  Median :-4.6693   Median :-4.9014   Median :-5.327   Median :-4.714  
##  Mean   :-2.4430   Mean   :-2.5493   Mean   :-2.802   Mean   :-2.456  
##  3rd Qu.:-0.1947   3rd Qu.:-0.6824   3rd Qu.:-1.163   3rd Qu.:-0.202  
##  Max.   :17.8367   Max.   :17.6007   Max.   :18.095   Max.   :17.655  
##       WM25              WM26             WM27              WM28       
##  Min.   :-5.3269   Min.   :-5.327   Min.   :-5.3269   Min.   :-5.327  
##  1st Qu.:-5.3269   1st Qu.:-5.327   1st Qu.:-5.3269   1st Qu.:-5.327  
##  Median :-5.3269   Median :-4.818   Median :-4.5844   Median :-4.591  
##  Mean   :-2.4899   Mean   :-2.672   Mean   :-2.5616   Mean   :-2.942  
##  3rd Qu.:-0.1856   3rd Qu.:-1.003   3rd Qu.:-0.6317   3rd Qu.:-1.467  
##  Max.   :17.3813   Max.   :17.897   Max.   :18.2407   Max.   :19.398  
##       WM29                WM30             WF01              WF02        
##  Min.   :-5.326917   Min.   :-5.327   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.326917   1st Qu.:-5.327   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-4.805374   Median :-4.803   Median :-4.8442   Median :-5.3269  
##  Mean   :-2.351516   Mean   :-2.692   Mean   :-2.5755   Mean   :-2.5922  
##  3rd Qu.: 0.001993   3rd Qu.:-1.048   3rd Qu.:-0.4983   3rd Qu.:-0.5136  
##  Max.   :17.551316   Max.   :17.732   Max.   :17.8535   Max.   :17.7878  
##       WF03              WF04              AF01              AF02        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-5.3269   Median :-5.3269   Median :-4.6169  
##  Mean   :-2.5155   Mean   :-2.6229   Mean   :-2.7296   Mean   :-2.3594  
##  3rd Qu.:-0.3728   3rd Qu.:-0.5169   3rd Qu.:-0.8329   3rd Qu.: 0.2209  
##  Max.   :17.5632   Max.   :17.7754   Max.   :18.2142   Max.   :17.6560  
##       AF03              AF04              AM01              AM02        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-4.6721   Median :-4.8093   Median :-4.8828  
##  Mean   :-2.5815   Mean   :-2.5082   Mean   :-2.5406   Mean   :-2.6353  
##  3rd Qu.:-0.6028   3rd Qu.:-0.4616   3rd Qu.:-0.6487   3rd Qu.:-0.8039  
##  Max.   :17.5836   Max.   :17.7331   Max.   :17.8091   Max.   :18.0378  
##       AM03              AM04              AM05              AM06        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-5.3269   Median :-4.9834   Median :-5.3269  
##  Mean   :-2.6791   Mean   :-2.5671   Mean   :-2.6703   Mean   :-2.5643  
##  3rd Qu.:-0.6953   3rd Qu.:-0.6244   3rd Qu.:-0.8173   3rd Qu.:-0.5037  
##  Max.   :18.1162   Max.   :17.6917   Max.   :17.9144   Max.   :17.6848  
##       AM07               AM08              AM09             AM10        
##  Min.   :-5.32692   Min.   :-5.3269   Min.   :-5.327   Min.   :-5.3269  
##  1st Qu.:-5.32692   1st Qu.:-5.3269   1st Qu.:-5.327   1st Qu.:-5.3269  
##  Median :-4.88125   Median :-4.6396   Median :-5.327   Median :-5.3269  
##  Mean   :-2.36136   Mean   :-2.4553   Mean   :-2.737   Mean   :-2.6980  
##  3rd Qu.:-0.00175   3rd Qu.:-0.2675   3rd Qu.:-1.244   3rd Qu.:-0.9101  
##  Max.   :17.56140   Max.   :17.6274   Max.   :17.758   Max.   :18.0273  
##       AM11              AM12              AM13              AM14        
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269  
##  Median :-5.3269   Median :-5.3269   Median :-4.8546   Median :-5.3269  
##  Mean   :-2.7170   Mean   :-2.7597   Mean   :-2.7981   Mean   :-2.6732  
##  3rd Qu.:-0.7161   3rd Qu.:-0.9733   3rd Qu.:-0.9787   3rd Qu.:-0.9632  
##  Max.   :17.8248   Max.   :17.9879   Max.   :18.4898   Max.   :17.7811  
##       AM15              AM16              AM17              AM18       
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.327  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.327  
##  Median :-4.7736   Median :-5.3269   Median :-4.7915   Median :-5.327  
##  Mean   :-2.5800   Mean   :-2.5495   Mean   :-2.5335   Mean   :-2.879  
##  3rd Qu.:-0.4899   3rd Qu.:-0.5002   3rd Qu.:-0.3886   3rd Qu.:-1.225  
##  Max.   :17.9565   Max.   :17.6605   Max.   :18.1492   Max.   :18.435  
##       AM19              AM20              AM21              AM22       
##  Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.3269   Min.   :-5.327  
##  1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.3269   1st Qu.:-5.327  
##  Median :-4.7207   Median :-4.6545   Median :-4.0121   Median :-5.327  
##  Mean   :-2.5716   Mean   :-2.3618   Mean   :-2.3625   Mean   :-3.130  
##  3rd Qu.:-0.6817   3rd Qu.: 0.1628   3rd Qu.:-0.5834   3rd Qu.:-1.757  
##  Max.   :17.7992   Max.   :18.3595   Max.   :18.6399   Max.   :18.478  
##       AM23             AM24              AM25             AM26        
##  Min.   :-5.327   Min.   :-5.3269   Min.   :-5.327   Min.   :-5.3269  
##  1st Qu.:-5.327   1st Qu.:-5.3269   1st Qu.:-5.327   1st Qu.:-5.3269  
##  Median :-5.327   Median :-5.3269   Median :-4.436   Median :-4.6663  
##  Mean   :-2.964   Mean   :-2.6495   Mean   :-2.386   Mean   :-2.5526  
##  3rd Qu.:-1.422   3rd Qu.:-0.5852   3rd Qu.:-0.226   3rd Qu.:-0.3635  
##  Max.   :18.269   Max.   :17.9668   Max.   :17.729   Max.   :18.0451  
##       AM27             AM28              AM29             AM30        
##  Min.   :-5.327   Min.   :-5.3269   Min.   :-5.327   Min.   :-5.3269  
##  1st Qu.:-5.327   1st Qu.:-5.3269   1st Qu.:-5.327   1st Qu.:-5.3269  
##  Median :-5.327   Median :-5.3269   Median :-5.327   Median :-4.7689  
##  Mean   :-2.800   Mean   :-2.7496   Mean   :-2.820   Mean   :-2.4558  
##  3rd Qu.:-1.028   3rd Qu.:-0.9491   3rd Qu.:-1.140   3rd Qu.:-0.2777  
##  Max.   :17.949   Max.   :18.0113   Max.   :18.695   Max.   :17.5229
# to filter genes, while keeping as many genes as possible with worthwhile counts.
keep.exprs <- filterByExpr(x, group=group)  
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
dim(x)
## [1] 20138    68
# Compare the data before and after filtering
lcpm.cutoff <- log2(5/M + 2/L)
library(RColorBrewer)
nsamples <- ncol(x)
col <- brewer.pal(nsamples, "Paired")
## Warning in brewer.pal(nsamples, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
par(mfrow=c(1,2))
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="A. Raw data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")
lcpm <- cpm(x, log=TRUE)
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="B. Filtered data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")

# normalization
x <- calcNormFactors(x, method = "TMM")
x$samples$norm.factors
##  [1] 0.9906711 1.1128612 1.2862404 1.1637129 0.9685468 0.8655872 1.1624764
##  [8] 0.9960807 1.1034987 1.1138525 0.9253751 1.0598805 0.9132135 1.1127265
## [15] 0.9570849 1.1136734 1.1194157 1.1416051 1.1210267 1.3611276 1.1907137
## [22] 1.0514294 0.7971595 1.1937833 1.1827919 1.0175912 1.0532701 0.3703736
## [29] 1.3687501 0.9834462 1.0066632 1.0079499 1.2806465 1.0120290 0.8116220
## [36] 1.2169187 1.1680961 1.2434374 1.0576294 0.9613168 0.8563864 1.1025868
## [43] 0.9498651 1.1407080 1.2936701 1.2823856 1.0368950 0.9188302 1.0105678
## [50] 0.8765149 0.6811246 0.9871650 0.9549061 1.2145905 1.0973692 0.7117738
## [57] 1.1288755 1.2203519 0.7962916 0.3952453 0.5630903 0.8561179 1.2654744
## [64] 0.9873156 0.8244647 0.9445294 0.7698379 1.2427766
# To give a better visual representation of the effects of normalisation, the data was duplicated then adjusted so that the counts of the first sample are reduced to 5% of their original values, and in the second sample they are inflated to be 5-times larger.
x2 <- x
x2$samples$norm.factors <- 1
x2$counts[,1] <- ceiling(x2$counts[,1]*0.05)
x2$counts[,2] <- x2$counts[,2]*5
par(mfrow=c(1,2))
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="A. Example: Unnormalised data",ylab="Log-cpm")
x2 <- calcNormFactors(x2)  
x2$samples$norm.factors
##  [1] 0.06240712 5.50619855 1.31601164 1.16928053 1.02167803 0.86378941
##  [7] 1.19567682 1.01974874 1.10916962 1.13109466 0.90931476 1.04943861
## [13] 0.95473805 1.15803559 1.04964202 1.11633889 1.13669152 1.17265012
## [19] 1.13926357 1.41574855 1.20762870 1.08110764 0.81483561 1.21823370
## [25] 1.19402358 1.04324607 1.06618421 0.38185077 1.41023715 1.01427805
## [31] 1.00653393 1.02269599 1.29978858 0.99161931 0.81675714 1.24670532
## [37] 1.17386503 1.25571264 1.04720965 0.98072175 0.84671839 1.09670615
## [43] 0.96640510 1.13301635 1.32622946 1.35960194 1.01892413 0.92254800
## [49] 1.00068939 0.89716104 0.70388504 1.00144044 0.98062456 1.26415319
## [55] 1.11233774 0.71553871 1.13762498 1.23488358 0.80335275 0.39924271
## [61] 0.57635838 0.88887026 1.29163654 1.03118686 0.82952801 0.96506159
## [67] 0.78340100 1.32116348

lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="B. Example: Normalised data",ylab="Log-cpm")

Part 7 - Unsupervised clustering of samples

# By Race
lcpm <- cpm(x, log=TRUE)
par(mfrow=c(1,2))
col.group <- group
levels(col.group) <-  brewer.pal(nlevels(col.group), "Set1")
## Warning in brewer.pal(nlevels(col.group), "Set1"): minimal value for n is 3, returning requested palette with 3 different levels
col.group <- as.character(col.group)
plotMDS(lcpm, labels=group, col=col.group)
title(main="A. Sample groups")


# By gender
lcpm <- cpm(x, log=TRUE)
par(mfrow=c(1,2))

col.gender <- gender
levels(col.gender) <-  brewer.pal(nlevels(col.gender), "Set1")
## Warning in brewer.pal(nlevels(col.gender), "Set1"): minimal value for n is 3, returning requested palette with 3 different levels
col.gender <- as.character(col.gender)
plotMDS(lcpm, labels=gender, col=col.gender)
title(main="B. Sample genders")

# The glMDSPlot function generates an html page (that is opened in a browser if launch=TRUE) with an MDS plot in the left panel and a barplot showing the proportion of variation explained by each dimension in the right panel. 
glMDSPlot(lcpm, groups=group,genders=gender)

Part 8 - Differential expression analysis

# Creating a design matrix and contrasts
design <- model.matrix(~0+group+gender)
colnames(design) <- gsub("group", "", colnames(design))
design
##    Asian White genderMale
## 1      0     1          1
## 2      0     1          1
## 3      0     1          1
## 4      0     1          1
## 5      0     1          1
## 6      0     1          1
## 7      0     1          1
## 8      0     1          1
## 9      0     1          1
## 10     0     1          1
## 11     0     1          1
## 12     0     1          1
## 13     0     1          1
## 14     0     1          1
## 15     0     1          1
## 16     0     1          1
## 17     0     1          1
## 18     0     1          1
## 19     0     1          1
## 20     0     1          1
## 21     0     1          1
## 22     0     1          1
## 23     0     1          1
## 24     0     1          1
## 25     0     1          1
## 26     0     1          1
## 27     0     1          1
## 28     0     1          1
## 29     0     1          1
## 30     0     1          1
## 31     0     1          0
## 32     0     1          0
## 33     0     1          0
## 34     0     1          0
## 35     1     0          0
## 36     1     0          0
## 37     1     0          0
## 38     1     0          0
## 39     1     0          1
## 40     1     0          1
## 41     1     0          1
## 42     1     0          1
## 43     1     0          1
## 44     1     0          1
## 45     1     0          1
## 46     1     0          1
## 47     1     0          1
## 48     1     0          1
## 49     1     0          1
## 50     1     0          1
## 51     1     0          1
## 52     1     0          1
## 53     1     0          1
## 54     1     0          1
## 55     1     0          1
## 56     1     0          1
## 57     1     0          1
## 58     1     0          1
## 59     1     0          1
## 60     1     0          1
## 61     1     0          1
## 62     1     0          1
## 63     1     0          1
## 64     1     0          1
## 65     1     0          1
## 66     1     0          1
## 67     1     0          1
## 68     1     0          1
## attr(,"assign")
## [1] 1 1 2
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
## 
## attr(,"contrasts")$gender
## [1] "contr.treatment"
contr.matrix <- makeContrasts(
   RaceVS= White - Asian,

   levels = colnames(design))
contr.matrix
##             Contrasts
## Levels       RaceVS
##   Asian          -1
##   White           1
##   genderMale      0
# Removing heteroscedascity from count data
par(mfrow=c(1,2))
v <- voom(x, design, plot=TRUE)
v
## An object of class "EList"
## $genes
##           ENSEMBL   SYMBOL TXCHROM
## 2 ENSG00000000419     DPM1   chr20
## 3 ENSG00000000457    SCYL3    chr1
## 4 ENSG00000000460 C1orf112    chr1
## 5 ENSG00000000938      FGR    chr1
## 6 ENSG00000000971      CFH    chr1
## 20133 more rows ...
## 
## $targets
##                  files group  lib.size norm.factors gender
## WM01 WM01.htseq.counts White  38200220    0.9906711   Male
## WM02 WM02.htseq.counts White  70884638    1.1128612   Male
## WM03 WM03.htseq.counts White 140620491    1.2862404   Male
## WM04 WM04.htseq.counts White 135250062    1.1637129   Male
## WM05 WM05.htseq.counts White  89403687    0.9685468   Male
## 63 more rows ...
## 
## $E
##                     Samples
## Tags                     WM01     WM02     WM03       WM04     WM05     WM06
##   ENSG00000000419.11 5.112452 4.902788 5.702458  3.5199625 4.440694 5.517074
##   ENSG00000000457.12 1.847779 2.778895 2.908048  4.4394135 3.463913 3.132169
##   ENSG00000000460.15 1.669303 3.186872 2.691680  5.2823213 2.813506 3.385158
##   ENSG00000000938.11 3.364711 3.731416 2.648154 -0.1665961 3.239691 4.281166
##   ENSG00000000971.14 6.124140 5.204918 6.471263  1.3778954 5.434550 3.317002
##                     Samples
## Tags                     WM07     WM08     WM09     WM10     WM11     WM12
##   ENSG00000000419.11 5.104507 4.585725 4.720851 4.840853 4.734839 5.531583
##   ENSG00000000457.12 3.878056 3.974512 3.914860 2.750162 2.842638 5.169846
##   ENSG00000000460.15 3.123671 1.739874 2.486408 3.149460 2.607313 4.289612
##   ENSG00000000938.11 2.324897 3.419306 2.054731 2.848931 3.839503 1.886268
##   ENSG00000000971.14 6.316593 6.410086 2.868994 3.827895 5.964925 3.300104
##                     Samples
## Tags                     WM13     WM14     WM15     WM16     WM17     WM18
##   ENSG00000000419.11 4.382119 4.674764 4.801646 4.695444 4.181286 3.705288
##   ENSG00000000457.12 4.228433 3.387291 2.733078 3.398690 4.012883 3.066093
##   ENSG00000000460.15 4.163366 3.011345 1.377316 3.190493 4.088282 1.390401
##   ENSG00000000938.11 2.027298 2.252827 3.300740 3.221140 1.855113 1.993779
##   ENSG00000000971.14 3.778249 4.755819 7.316368 5.979005 2.699563 2.816733
##                     Samples
## Tags                     WM19     WM20     WM21     WM22     WM23     WM24
##   ENSG00000000419.11 5.116220 3.725857 4.912533 5.097922 4.584477 4.361430
##   ENSG00000000457.12 3.956813 2.611544 3.428323 3.005259 3.272594 3.677833
##   ENSG00000000460.15 5.922911 3.517059 3.194669 3.435043 1.812065 3.773162
##   ENSG00000000938.11 1.578752 1.165460 2.503378 1.538305 2.710715 1.980041
##   ENSG00000000971.14 4.767323 3.762240 5.939669 4.424408 5.984270 3.807775
##                     Samples
## Tags                     WM25     WM26     WM27      WM28     WM29     WM30
##   ENSG00000000419.11 4.898056 4.792862 5.546109 4.2631967 5.170252 5.434577
##   ENSG00000000457.12 3.731335 2.459557 2.984828 3.9067157 3.881153 3.025893
##   ENSG00000000460.15 2.351077 2.577020 2.344328 3.6229540 3.425462 3.907418
##   ENSG00000000938.11 3.016312 2.786643 2.531015 0.7121162 3.523467 1.067629
##   ENSG00000000971.14 5.309299 5.344066 2.221471 5.5536482 5.490555 3.938092
##                     Samples
## Tags                      WF01     WF02     WF03     WF04     AF01     AF02
##   ENSG00000000419.11 5.0841573 4.927416 5.071312 5.894614 4.252053 4.918583
##   ENSG00000000457.12 3.7194016 3.153973 3.374131 3.015404 3.129501 3.280771
##   ENSG00000000460.15 4.0624242 2.823269 3.786885 3.458271 1.903642 2.920146
##   ENSG00000000938.11 0.3284319 3.944378 1.705485 3.051963 2.117766 3.776446
##   ENSG00000000971.14 1.2136617 6.032502 6.049270 5.856737 2.340533 5.120693
##                     Samples
## Tags                     AF03        AF04     AM01     AM02     AM03     AM04
##   ENSG00000000419.11 5.861504  4.46902368 4.283468 4.526542 4.654500 4.362484
##   ENSG00000000457.12 2.784974  4.54078885 3.998478 3.674677 2.831129 3.660270
##   ENSG00000000460.15 2.638460  5.08530326 1.714626 1.082830 2.386934 2.145565
##   ENSG00000000938.11 2.080634 -0.02114107 3.366601 2.191678 4.743130 2.982383
##   ENSG00000000971.14 3.215963  0.57781471 5.876951 1.406668 5.959805 6.846107
##                     Samples
## Tags                     AM05     AM06     AM07     AM08     AM09     AM10
##   ENSG00000000419.11 4.491436 4.673514 4.294396 4.000286 5.048078 4.116208
##   ENSG00000000457.12 3.838001 3.563122 3.430802 4.140950 3.684263 3.408600
##   ENSG00000000460.15 1.089240 2.427980 2.009989 1.075892 2.710034 1.839518
##   ENSG00000000938.11 3.494826 3.156267 2.092659 1.704879 3.914194 2.355748
##   ENSG00000000971.14 7.388289 3.856205 6.850257 6.491152 6.373695 6.889500
##                     Samples
## Tags                     AM11     AM12      AM13     AM14     AM15     AM16
##   ENSG00000000419.11 4.534507 4.163513 3.9115709 4.469462 3.510425 4.484946
##   ENSG00000000457.12 2.598466 2.922363 3.3344022 3.363438 4.132262 3.864419
##   ENSG00000000460.15 1.363704 1.523146 0.7725716 2.764862 2.113561 1.256073
##   ENSG00000000938.11 1.904519 2.368272 3.9636257 2.409309 3.591556 2.969818
##   ENSG00000000971.14 6.675617 6.177294 5.4077816 6.655467 7.341050 7.329022
##                     Samples
## Tags                     AM17     AM18     AM19     AM20     AM21     AM22
##   ENSG00000000419.11 5.556262 4.411512 4.388391 4.957028 4.192049 3.193876
##   ENSG00000000457.12 4.935219 3.399239 4.348918 3.371500 3.689088 3.582171
##   ENSG00000000460.15 5.344403 2.133149 2.540951 3.479324 3.138922 1.873029
##   ENSG00000000938.11 2.109927 1.425988 4.004665 2.640741 3.471760 1.987258
##   ENSG00000000971.14 7.529107 5.966683 8.413037 3.465087 3.632331 3.198195
##                     Samples
## Tags                     AM23     AM24     AM25     AM26     AM27      AM28
##   ENSG00000000419.11 4.464370 3.897764 4.854980 4.485652 4.564356 4.4484236
##   ENSG00000000457.12 3.570924 3.897764 3.967465 3.218172 3.296920 3.4541650
##   ENSG00000000460.15 1.400412 2.791900 4.582040 2.173138 2.076433 0.9448575
##   ENSG00000000938.11 2.079144 3.455461 1.725644 1.877362 2.577953 2.7491020
##   ENSG00000000971.14 4.507940 5.853690 4.607197 5.257700 4.567915 6.1194068
##                     Samples
## Tags                     AM29     AM30
##   ENSG00000000419.11 4.412534 4.294323
##   ENSG00000000457.12 3.601820 4.056226
##   ENSG00000000460.15 1.305810 1.945671
##   ENSG00000000938.11 4.077250 3.342629
##   ENSG00000000971.14 6.229582 7.476813
## 20133 more rows ...
## 
## $weights
##           [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## [1,] 1.7271550 2.159817 2.404807 2.396558 2.272489 2.172735 2.219394 2.341862
## [2,] 1.0515474 1.463765 1.996918 1.969175 1.649946 1.482371 1.554073 1.810478
## [3,] 0.9455721 1.297622 1.834342 1.805553 1.472147 1.314804 1.380121 1.631805
## [4,] 0.7992185 1.040287 1.499423 1.468796 1.172917 1.052880 1.101406 1.307110
## [5,] 1.7822841 2.196873 2.416737 2.410990 2.297776 2.210069 2.249431 2.361110
##          [,9]    [,10]     [,11]    [,12]     [,13]    [,14]     [,15]    [,16]
## [1,] 2.192091 2.244076 1.9081492 2.318517 2.0776748 2.171932 2.0256700 2.123756
## [2,] 1.510539 1.598102 1.1861570 1.752328 1.3574779 1.481210 1.2971903 1.412577
## [3,] 1.340410 1.423399 1.0571265 1.574171 1.2029668 1.313749 1.1508470 1.251160
## [4,] 1.071929 1.134073 0.8734815 1.256606 0.9727464 1.052095 0.9365379 1.006979
## [5,] 2.225838 2.274533 1.9565721 2.340814 2.1209624 2.209249 2.0682361 2.160644
##         [,17]    [,18]    [,19]    [,20]    [,21]    [,22]    [,23]    [,24]
## [1,] 2.190840 2.291708 2.323201 2.254134 2.237275 2.375245 2.249889 2.277328
## [2,] 1.508709 1.691231 1.763189 1.616310 1.585877 1.899443 1.608606 1.658900
## [3,] 1.338746 1.512093 1.584717 1.440725 1.411366 1.726638 1.433533 1.480518
## [4,] 1.070692 1.204974 1.265831 1.147705 1.124925 1.393925 1.141935 1.179634
## [5,] 2.224838 2.315311 2.344580 2.283680 2.267616 2.390428 2.280415 2.301485
##          [,25]    [,26]     [,27]    [,28]    [,29]    [,30]    [,31]     [,32]
## [1,] 2.0830919 2.300478 2.0880302 2.128446 2.381293 2.275158 2.406526 2.2617111
## [2,] 1.3638913 1.711018 1.3697526 1.418644 1.918718 1.654880 1.677245 1.2982546
## [3,] 1.2085524 1.532698 1.2136833 1.256468 1.746287 1.476760 1.957644 1.5791142
## [4,] 0.9767426 1.221642 0.9803921 1.010735 1.412943 1.176618 1.043436 0.8418882
## [5,] 2.1265209 2.324195 2.1307505 2.164829 2.395610 2.299822 1.761638 1.3762635
##          [,33]     [,34]     [,35]    [,36]     [,37]    [,38]    [,39]
## [1,] 2.3617622 2.2922838 2.1386956 2.233412 2.0735481 2.288444 2.194570
## [2,] 1.5217761 1.3533932 1.4226904 1.567849 1.3421819 1.672473 1.845229
## [3,] 1.8155449 1.6405632 1.1320911 1.249433 1.0719335 1.340572 1.100302
## [4,] 0.9516473 0.8677584 0.9245998 1.006537 0.8837216 1.072927 1.386273
## [5,] 1.6071592 1.4349997 1.8096468 1.946898 1.7239932 2.037406 2.438268
##         [,40]     [,41]     [,42]    [,43]     [,44]    [,45]    [,46]
## [1,] 2.235121 1.9006633 1.8977935 2.341440 1.8998101 2.344254 2.108275
## [2,] 1.910239 1.4780827 1.4752063 2.100857 1.4772274 2.107234 1.723569
## [3,] 1.150167 0.8866288 0.8853202 1.339853 0.8862398 1.347180 1.018062
## [4,] 1.451459 1.0827918 1.0807688 1.678127 1.0821903 1.685836 1.273833
## [5,] 2.445454 2.3509527 2.3499119 2.450091 2.3506434 2.449930 2.419057
##          [,47]    [,48]     [,49]     [,50]     [,51]     [,52]    [,53]
## [1,] 1.9074658 2.114940 1.9963978 2.0581195 1.9925522 1.9314207 2.090758
## [2,] 1.4849071 1.731834 1.5849638 1.6601117 1.5807257 1.5109247 1.701927
## [3,] 0.8897306 1.023174 0.9391901 0.9799751 0.9369952 0.9022556 1.004684
## [4,] 1.0875906 1.281121 1.1609891 1.2197271 1.1576892 1.1058755 1.254787
## [5,] 2.3534143 2.420556 2.3862990 2.4057710 2.3851727 2.3626997 2.415092
##         [,54]    [,55]     [,56]    [,57]    [,58]     [,59]     [,60]
## [1,] 2.033653 2.192418 1.8521565 2.126996 2.092772 1.9276754 1.4782346
## [2,] 1.627199 1.842287 1.4296738 1.746827 1.704409 1.5065572 1.1089540
## [3,] 0.961638 1.098106 0.8645017 1.032669 1.006218 0.9001366 0.7289766
## [4,] 1.193911 1.383291 1.0487209 1.294363 1.256969 1.1028072 0.8506770
## [5,] 2.397357 2.437936 2.3328457 2.423257 2.415550 2.3611518 2.1225951
##          [,61]     [,62]    [,63]    [,64]     [,65]     [,66]     [,67]
## [1,] 1.7593647 1.6659536 2.281417 1.968386 1.7534269 1.7590386 1.9681370
## [2,] 1.3391212 1.2558355 1.984046 1.554184 1.3337914 1.3388284 1.5539108
## [3,] 0.8253801 0.7903577 1.216053 0.923228 0.8231370 0.8252570 0.9230862
## [4,] 0.9895267 0.9372441 1.534009 1.137039 0.9860733 0.9893371 1.1368272
## [5,] 2.2913566 2.2421720 2.449674 2.377845 2.2888361 2.2912183 2.3777438
##         [,68]
## [1,] 2.229283
## [2,] 1.899877
## [3,] 1.142132
## [4,] 1.441377
## [5,] 2.444328
## 20133 more rows ...
## 
## $design
##   Asian White genderMale
## 1     0     1          1
## 2     0     1          1
## 3     0     1          1
## 4     0     1          1
## 5     0     1          1
## 63 more rows ...
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit, main="Final model: Mean-variance trend")

# Examining the number of DE genes
summary(decideTests(efit, p.value = 0.05))# significance: p.value=0.05
##        RaceVS
## Down     2451
## NotSig  15000
## Up       2687
tfit <- treat(vfit, lfc=log2(1.5))# get more differentially expressed  genes
dt <- decideTests(tfit, p.value=0.05)
summary(dt)
##        RaceVS
## Down       84
## NotSig  19906
## Up        148
tfit <- treat(vfit, lfc=log2(1.5))
dt <- decideTests(tfit, p.value = 0.05)
summary(dt)
##        RaceVS
## Down       84
## NotSig  19906
## Up        148
# Examining individual DE genes from top to bottom
RaceVS <- topTreat(tfit, coef=1, n=Inf)
head(RaceVS)
##                            ENSEMBL   SYMBOL TXCHROM     logFC    AveExpr
## ENSG00000207425.1  ENSG00000207425     <NA>    <NA>  4.003742 -2.6870360
## ENSG00000115414.17 ENSG00000115414      FN1    chr2  3.150491  7.5899529
## ENSG00000269918.1  ENSG00000269918     <NA>    <NA> -2.401672  0.3975525
## ENSG00000221947.6  ENSG00000221947     XKR9    chr8  2.657988 -0.7579479
## ENSG00000226278.1  ENSG00000226278     <NA>    <NA> -5.067213 -1.5758057
## ENSG00000247627.2  ENSG00000247627 MTND4P12    <NA> -3.477716  4.5334316
##                            t      P.Value    adj.P.Val
## ENSG00000207425.1   7.281302 2.083745e-10 4.196245e-06
## ENSG00000115414.17  5.961984 4.799089e-08 4.832203e-04
## ENSG00000269918.1  -5.852638 7.450607e-08 5.001344e-04
## ENSG00000221947.6   5.388856 4.681350e-07 2.129847e-03
## ENSG00000226278.1  -5.358541 5.288129e-07 2.129847e-03
## ENSG00000247627.2  -5.294286 6.768272e-07 2.271658e-03
# plotMD function
plotMD(tfit, column=1, status=dt[,1], main=colnames(tfit)[1], 
       xlim=c(-8,13))

glMDPlot(tfit, coef=1, status=dt, main=colnames(tfit)[1],
         side.main="ENSEMBL", counts=lcpm, groups=group, launch=FALSE)
# get heatmap of log-CPM values
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
RaceVS.topgenes <- RaceVS$ENSEMBL[1:100]
i <- which(v$genes$ENSEMBL %in% RaceVS.topgenes)
mycol <- colorpanel(1000,"blue","white","red")
heatmap.2(lcpm[i,], scale="row",
   labRow=v$genes$SYMBOL[i], labCol=group, 
   col=mycol, trace="none", density.info="none", 
   lhei=NULL, dendrogram="column")

Part 9 - Gene set testing with camera

load('/Users/dongzeyuan/Desktop/zeyuan-510final/human_c2_v5p2.rdata')
#load(system.file("extdata", "human_c2_v5p2.rda", package = "RNAseq123"))
idx <- ids2indices(Hs.c2,id=rownames(v$genes))
cam.RaceVS <- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.RaceVS,60)# look for appropriate hallmark gene sets
##                                                                   NGenes
## KANG_IMMORTALIZED_BY_TERT_DN                                          83
## BIOCARTA_GABA_PATHWAY                                                  8
## ST_G_ALPHA_S_PATHWAY                                                  12
## HOFMANN_CELL_LYMPHOMA_DN                                              31
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES         22
## REACTOME_GABA_A_RECEPTOR_ACTIVATION                                    9
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP                           25
## REACTOME_CA_DEPENDENT_EVENTS                                          21
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP                 57
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN      7
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP                               5
## BIOCARTA_TOB1_PATHWAY                                                 20
## ST_GA12_PATHWAY                                                       18
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP                  62
## CHENG_RESPONSE_TO_NICKEL_ACETATE                                      26
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP                                 155
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP                                 42
## BARIS_THYROID_CANCER_DN                                               48
## KEGG_CALCIUM_SIGNALING_PATHWAY                                       140
## BROWNE_HCMV_INFECTION_14HR_DN                                        219
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN                                28
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP                 22
## HOLLMANN_APOPTOSIS_VIA_CD40_UP                                       115
## SHIRAISHI_PLZF_TARGETS_UP                                              9
## FARMER_BREAST_CANCER_CLUSTER_4                                        10
## ZEILSTRA_CD44_TARGETS_DN                                               7
## DASU_IL6_SIGNALING_UP                                                 46
## BROWNE_HCMV_INFECTION_8HR_UP                                          80
## LOPES_METHYLATED_IN_COLON_CANCER_UP                                   24
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN                                 244
## BRIDEAU_IMPRINTED_GENES                                               39
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3                            2
## KEGG_DILATED_CARDIOMYOPATHY                                           64
## BIOCARTA_NOS1_PATHWAY                                                 16
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB                         11
## BIOCARTA_THELPER_PATHWAY                                              14
## OXFORD_RALB_TARGETS_UP                                                 2
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP                          49
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN                            71
## KEGG_ALZHEIMERS_DISEASE                                              127
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3                                      170
## KEGG_MELANOGENESIS                                                    77
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN                     51
## YAGI_AML_SURVIVAL                                                     87
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND            8
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS                         29
## PETRETTO_BLOOD_PRESSURE_UP                                             6
## LI_PROSTATE_CANCER_EPIGENETIC                                         25
## WANG_BARRETTS_ESOPHAGUS_DN                                            22
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG                                    13
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP                      47
## WANG_MLL_TARGETS                                                     159
## PID_IL12_STAT4_PATHWAY                                                30
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION                         31
## NIELSEN_LEIOMYOSARCOMA_UP                                             13
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP                         48
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP                                  9
## CHANG_IMMORTALIZED_BY_HPV31_UP                                        36
## VANASSE_BCL2_TARGETS_UP                                               26
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING                     141
##                                                                   Direction
## KANG_IMMORTALIZED_BY_TERT_DN                                             Up
## BIOCARTA_GABA_PATHWAY                                                    Up
## ST_G_ALPHA_S_PATHWAY                                                     Up
## HOFMANN_CELL_LYMPHOMA_DN                                                 Up
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES            Up
## REACTOME_GABA_A_RECEPTOR_ACTIVATION                                      Up
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP                              Up
## REACTOME_CA_DEPENDENT_EVENTS                                             Up
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP                    Up
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN        Up
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP                                 Up
## BIOCARTA_TOB1_PATHWAY                                                    Up
## ST_GA12_PATHWAY                                                          Up
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP                     Up
## CHENG_RESPONSE_TO_NICKEL_ACETATE                                         Up
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP                                     Up
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP                                    Up
## BARIS_THYROID_CANCER_DN                                                  Up
## KEGG_CALCIUM_SIGNALING_PATHWAY                                           Up
## BROWNE_HCMV_INFECTION_14HR_DN                                            Up
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN                                   Up
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP                    Up
## HOLLMANN_APOPTOSIS_VIA_CD40_UP                                           Up
## SHIRAISHI_PLZF_TARGETS_UP                                                Up
## FARMER_BREAST_CANCER_CLUSTER_4                                           Up
## ZEILSTRA_CD44_TARGETS_DN                                                 Up
## DASU_IL6_SIGNALING_UP                                                    Up
## BROWNE_HCMV_INFECTION_8HR_UP                                             Up
## LOPES_METHYLATED_IN_COLON_CANCER_UP                                      Up
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN                                     Up
## BRIDEAU_IMPRINTED_GENES                                                  Up
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3                              Up
## KEGG_DILATED_CARDIOMYOPATHY                                              Up
## BIOCARTA_NOS1_PATHWAY                                                    Up
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB                            Up
## BIOCARTA_THELPER_PATHWAY                                                 Up
## OXFORD_RALB_TARGETS_UP                                                 Down
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP                             Up
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN                               Up
## KEGG_ALZHEIMERS_DISEASE                                                  Up
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3                                          Up
## KEGG_MELANOGENESIS                                                       Up
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN                        Up
## YAGI_AML_SURVIVAL                                                        Up
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND            Down
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS                            Up
## PETRETTO_BLOOD_PRESSURE_UP                                               Up
## LI_PROSTATE_CANCER_EPIGENETIC                                            Up
## WANG_BARRETTS_ESOPHAGUS_DN                                               Up
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG                                       Up
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP                         Up
## WANG_MLL_TARGETS                                                         Up
## PID_IL12_STAT4_PATHWAY                                                   Up
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION                            Up
## NIELSEN_LEIOMYOSARCOMA_UP                                                Up
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP                            Up
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP                                    Up
## CHANG_IMMORTALIZED_BY_HPV31_UP                                           Up
## VANASSE_BCL2_TARGETS_UP                                                  Up
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING                         Up
##                                                                         PValue
## KANG_IMMORTALIZED_BY_TERT_DN                                      0.0005577753
## BIOCARTA_GABA_PATHWAY                                             0.0009533798
## ST_G_ALPHA_S_PATHWAY                                              0.0012156076
## HOFMANN_CELL_LYMPHOMA_DN                                          0.0016398925
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES     0.0019591011
## REACTOME_GABA_A_RECEPTOR_ACTIVATION                               0.0020135950
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP                       0.0020701011
## REACTOME_CA_DEPENDENT_EVENTS                                      0.0023701152
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP             0.0026184082
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN 0.0027526153
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP                          0.0027545525
## BIOCARTA_TOB1_PATHWAY                                             0.0028799548
## ST_GA12_PATHWAY                                                   0.0029281430
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP              0.0031859966
## CHENG_RESPONSE_TO_NICKEL_ACETATE                                  0.0037154177
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP                              0.0040024234
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP                             0.0046875855
## BARIS_THYROID_CANCER_DN                                           0.0047916937
## KEGG_CALCIUM_SIGNALING_PATHWAY                                    0.0049527284
## BROWNE_HCMV_INFECTION_14HR_DN                                     0.0051969690
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN                            0.0059129174
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP             0.0060851635
## HOLLMANN_APOPTOSIS_VIA_CD40_UP                                    0.0061785150
## SHIRAISHI_PLZF_TARGETS_UP                                         0.0062528755
## FARMER_BREAST_CANCER_CLUSTER_4                                    0.0062768235
## ZEILSTRA_CD44_TARGETS_DN                                          0.0063413522
## DASU_IL6_SIGNALING_UP                                             0.0066886588
## BROWNE_HCMV_INFECTION_8HR_UP                                      0.0068729429
## LOPES_METHYLATED_IN_COLON_CANCER_UP                               0.0069392738
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN                              0.0072064989
## BRIDEAU_IMPRINTED_GENES                                           0.0072453852
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3                       0.0073120033
## KEGG_DILATED_CARDIOMYOPATHY                                       0.0073168212
## BIOCARTA_NOS1_PATHWAY                                             0.0076693385
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB                     0.0076755853
## BIOCARTA_THELPER_PATHWAY                                          0.0077251403
## OXFORD_RALB_TARGETS_UP                                            0.0079015909
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP                      0.0081876622
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN                        0.0082522653
## KEGG_ALZHEIMERS_DISEASE                                           0.0085008789
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3                                   0.0085646776
## KEGG_MELANOGENESIS                                                0.0085690475
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN                 0.0086201993
## YAGI_AML_SURVIVAL                                                 0.0090649316
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND       0.0090896750
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS                     0.0092489879
## PETRETTO_BLOOD_PRESSURE_UP                                        0.0095922098
## LI_PROSTATE_CANCER_EPIGENETIC                                     0.0096915556
## WANG_BARRETTS_ESOPHAGUS_DN                                        0.0102885624
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG                                0.0104367740
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP                  0.0107650189
## WANG_MLL_TARGETS                                                  0.0113261064
## PID_IL12_STAT4_PATHWAY                                            0.0114246952
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION                     0.0116866384
## NIELSEN_LEIOMYOSARCOMA_UP                                         0.0117180907
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP                     0.0117801340
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP                             0.0120054946
## CHANG_IMMORTALIZED_BY_HPV31_UP                                    0.0120914323
## VANASSE_BCL2_TARGETS_UP                                           0.0122333161
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING                  0.0132544878
##                                                                       FDR
## KANG_IMMORTALIZED_BY_TERT_DN                                      0.73499
## BIOCARTA_GABA_PATHWAY                                             0.73499
## ST_G_ALPHA_S_PATHWAY                                              0.73499
## HOFMANN_CELL_LYMPHOMA_DN                                          0.73499
## KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES     0.73499
## REACTOME_GABA_A_RECEPTOR_ACTIVATION                               0.73499
## HOFFMANN_IMMATURE_TO_MATURE_B_LYMPHOCYTE_UP                       0.73499
## REACTOME_CA_DEPENDENT_EVENTS                                      0.73499
## GARGALOVIC_RESPONSE_TO_OXIDIZED_PHOSPHOLIPIDS_BLUE_UP             0.73499
## TONKS_TARGETS_OF_RUNX1_RUNX1T1_FUSION_SUSTAINED_IN_GRANULOCYTE_DN 0.73499
## REACTOME_DESTABILIZATION_OF_MRNA_BY_KSRP                          0.73499
## BIOCARTA_TOB1_PATHWAY                                             0.73499
## ST_GA12_PATHWAY                                                   0.73499
## RICKMAN_TUMOR_DIFFERENTIATED_MODERATELY_VS_POORLY_UP              0.73499
## CHENG_RESPONSE_TO_NICKEL_ACETATE                                  0.73499
## CONCANNON_APOPTOSIS_BY_EPOXOMICIN_UP                              0.73499
## FIGUEROA_AML_METHYLATION_CLUSTER_1_UP                             0.73499
## BARIS_THYROID_CANCER_DN                                           0.73499
## KEGG_CALCIUM_SIGNALING_PATHWAY                                    0.73499
## BROWNE_HCMV_INFECTION_14HR_DN                                     0.73499
## EBAUER_TARGETS_OF_PAX3_FOXO1_FUSION_DN                            0.73499
## LIANG_HEMATOPOIESIS_STEM_CELL_NUMBER_SMALL_VS_HUGE_UP             0.73499
## HOLLMANN_APOPTOSIS_VIA_CD40_UP                                    0.73499
## SHIRAISHI_PLZF_TARGETS_UP                                         0.73499
## FARMER_BREAST_CANCER_CLUSTER_4                                    0.73499
## ZEILSTRA_CD44_TARGETS_DN                                          0.73499
## DASU_IL6_SIGNALING_UP                                             0.73499
## BROWNE_HCMV_INFECTION_8HR_UP                                      0.73499
## LOPES_METHYLATED_IN_COLON_CANCER_UP                               0.73499
## PURBEY_TARGETS_OF_CTBP1_NOT_SATB1_DN                              0.73499
## BRIDEAU_IMPRINTED_GENES                                           0.73499
## MIKKELSEN_IPS_LCP_WITH_H3K4ME3_AND_H3K27ME3                       0.73499
## KEGG_DILATED_CARDIOMYOPATHY                                       0.73499
## BIOCARTA_NOS1_PATHWAY                                             0.73499
## REACTOME_PKA_MEDIATED_PHOSPHORYLATION_OF_CREB                     0.73499
## BIOCARTA_THELPER_PATHWAY                                          0.73499
## OXFORD_RALB_TARGETS_UP                                            0.73499
## SETLUR_PROSTATE_CANCER_TMPRSS2_ERG_FUSION_UP                      0.73499
## GRAHAM_CML_DIVIDING_VS_NORMAL_QUIESCENT_DN                        0.73499
## KEGG_ALZHEIMERS_DISEASE                                           0.73499
## MIKKELSEN_NPC_HCP_WITH_H3K27ME3                                   0.73499
## KEGG_MELANOGENESIS                                                0.73499
## POMEROY_MEDULLOBLASTOMA_DESMOPLASIC_VS_CLASSIC_DN                 0.73499
## YAGI_AML_SURVIVAL                                                 0.73499
## REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND       0.73499
## REACTOME_POST_NMDA_RECEPTOR_ACTIVATION_EVENTS                     0.73499
## PETRETTO_BLOOD_PRESSURE_UP                                        0.73499
## LI_PROSTATE_CANCER_EPIGENETIC                                     0.73499
## WANG_BARRETTS_ESOPHAGUS_DN                                        0.73499
## LI_ADIPOGENESIS_BY_ACTIVATED_PPARG                                0.73499
## FLECHNER_PBL_KIDNEY_TRANSPLANT_REJECTED_VS_OK_UP                  0.73499
## WANG_MLL_TARGETS                                                  0.73499
## PID_IL12_STAT4_PATHWAY                                            0.73499
## KEGG_VASOPRESSIN_REGULATED_WATER_REABSORPTION                     0.73499
## NIELSEN_LEIOMYOSARCOMA_UP                                         0.73499
## CHARAFE_BREAST_CANCER_BASAL_VS_MESENCHYMAL_UP                     0.73499
## SILIGAN_TARGETS_OF_EWS_FLI1_FUSION_UP                             0.73499
## CHANG_IMMORTALIZED_BY_HPV31_UP                                    0.73499
## VANASSE_BCL2_TARGETS_UP                                           0.73499
## BONOME_OVARIAN_CANCER_SURVIVAL_OPTIMAL_DEBULKING                  0.73499
# use camera function to perform a competitive test to assess whether the genes in a given set are highly ranked in terms of differential expression relative to genes that are not in the set.
barcodeplot(efit$t[,1], index=idx$KANG_IMMORTALIZED_BY_TERT_DN, 
            index2=idx$REACTOME_REMOVAL_OF_THE_FLAP_INTERMEDIATE_FROM_THE_C_STRAND, main="RaceVS")