setwd("/Users/dongzeyuan/Desktop/TRGN_lab/")
library(limma)
library(edgeR)
counts <- read.table("ProstCa_030921_2.txt")
file<-"ProstCa_030921_2.txt"
dim(counts)
## [1] 58779 6
colnames(counts)<-c("C1_1","C1_2","C1_3","T1_1","T1_2","T1_3")#NEW
x<-DGEList(counts) #
group<-as.factor(c(rep("C1",3),rep("T1",3))) #NEW
x$samples$group<-group
library(limma)
library(Glimma)
library(edgeR)
library(Homo.sapiens)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: OrganismDbi
## Loading required package: GenomicFeatures
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GO.db
##
## Loading required package: org.Hs.eg.db
##
## Loading required package: TxDb.Hsapiens.UCSC.hg19.knownGene
#presentation
library(stringr)
setwd("/Users/dongzeyuan/Desktop/TRGN_lab/")
#install.packages("Homo.sapiens")
library(Homo.sapiens)
geneid <- rownames(x)
gene <- str_match(geneid, "(\\w*).*")
geneid <- gene[,2]
genes <- select(Homo.sapiens, keys=geneid,
columns=c("SYMBOL","TXCHROM"),
keytype="ENSEMBL")
## 'select()' returned many:many mapping between keys and columns
dim(genes)
## [1] 60213 3
genes <- genes[!duplicated(genes$ENSEMBL),]
x$genes <- genes
samplenames <- substring(colnames(x), 6, nchar(colnames(x)))
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)
L <- mean(x$samples$lib.size) * 1e-6
M <- median(x$samples$lib.size) * 1e-6
c(L, M)
## [1] 43.72306 40.43761
summary(lcpm)
## C1_1 C1_2 C1_3 T1_1
## Min. :-4.4503 Min. :-4.450 Min. :-4.450 Min. :-4.4503
## 1st Qu.:-4.4503 1st Qu.:-4.450 1st Qu.:-4.450 1st Qu.:-4.4503
## Median :-1.0828 Median :-2.849 Median :-2.514 Median :-1.9200
## Mean :-0.4579 Mean :-1.130 Mean :-0.967 Mean :-0.8838
## 3rd Qu.: 2.2291 3rd Qu.: 2.017 3rd Qu.: 2.257 3rd Qu.: 2.1005
## Max. :15.7358 Max. :15.899 Max. :16.464 Max. :16.3088
## T1_2 T1_3
## Min. :-4.4503 Min. :-4.4503
## 1st Qu.:-4.4503 1st Qu.:-4.4503
## Median : 0.6721 Median :-2.3416
## Mean : 0.2754 Mean :-0.9174
## 3rd Qu.: 3.1734 3rd Qu.: 2.0058
## Max. :15.5588 Max. :16.3109
keep.exprs <- filterByExpr(x)
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
dim(x)
## [1] 30027 6
lcpm.cutoff <- log2(5/M + 2/L)
library(RColorBrewer)
nsamples <- ncol(x)
col <- brewer.pal(nsamples, "Paired")
par(mfrow=c(1,2))
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="A. Raw data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")
lcpm <- cpm(x, log=TRUE)
plot(density(lcpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="B. Filtered data", xlab="Log-cpm")
abline(v=lcpm.cutoff, lty=3)
for (i in 2:nsamples){
den <- density(lcpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", samplenames, text.col=col, bty="n")

x <- calcNormFactors(x, method = "TMM")
x$samples$norm.factors
## [1] 1.0321479 0.9002265 1.0233137 0.9688055 1.3125091 0.8271010
x2 <- x
x2$samples$norm.factors <- 1
x2$counts[,1] <- ceiling(x2$counts[,1]*0.05)
x2$counts[,2] <- x2$counts[,2]*5
par(mfrow=c(1,2))
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="A. Example: Unnormalised data",ylab="Log-cpm")
x2 <- calcNormFactors(x2)
x2$samples$norm.factors
## [1] 0.0683339 5.7762475 1.2625805 1.2339692 1.5945590 1.0197978
lcpm <- cpm(x2, log=TRUE)
boxplot(lcpm, las=2, col=col, main="")
title(main="B. Example: Normalised data",ylab="Log-cpm")


design <- model.matrix(~0+group)
colnames(design) <- gsub("group", "", colnames(design))
design
## C1 T1
## 1 1 0
## 2 1 0
## 3 1 0
## 4 0 1
## 5 0 1
## 6 0 1
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
contr.matrix <- makeContrasts(
C1vsT1= C1 - T1,
levels = colnames(design))
contr.matrix
## Contrasts
## Levels C1vsT1
## C1 1
## T1 -1
par(mfrow=c(1,2))
v <- voom(x, design, plot=TRUE)
v
## An object of class "EList"
## $genes
## ENSEMBL SYMBOL TXCHROM
## 1 ENSG00000000003 TSPAN6 chrX
## 2 ENSG00000000005 TNMD chrX
## 3 ENSG00000000419 DPM1 chr20
## 4 ENSG00000000457 SCYL3 chr1
## 5 ENSG00000000460 C1orf112 chr1
## 30022 more rows ...
##
## $targets
## group lib.size norm.factors
## C1_1 C1 43410027 1.0321479
## C1_2 C1 57984280 0.9002265
## C1_3 C1 39514220 1.0233137
## T1_1 T1 53111013 0.9688055
## T1_2 T1 37219342 1.3125091
## T1_3 T1 27207086 0.8271010
##
## $E
## C1_1 C1_2 C1_3 T1_1 T1_2 T1_3
## ENSG00000000003.14 4.500357 4.850201 4.849885 4.7402280 4.529373 4.933662
## ENSG00000000005.5 -1.013692 -1.024700 -1.446319 -0.6224147 1.182899 -1.517983
## ENSG00000000419.12 3.041843 3.653174 3.727746 3.6386582 1.864168 4.282576
## ENSG00000000457.13 4.567772 4.645739 5.375620 4.8703674 4.077788 5.097501
## ENSG00000000460.16 3.021523 2.675740 3.428715 2.7016027 2.818193 2.944896
## 30022 more rows ...
##
## $weights
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 4.3771247 4.6329833 4.2740829 4.5676182 4.2003682 3.7596742
## [2,] 0.2440368 0.3078621 0.2274268 0.4766986 0.3471982 0.2671885
## [3,] 3.0855775 3.5718464 2.9251478 3.1786667 2.5825194 2.0985551
## [4,] 4.4684432 4.6956192 4.3726970 4.5352295 4.1557993 3.7030934
## [5,] 2.5848926 3.0686155 2.4332656 2.6636523 2.1106906 1.6864280
## 30022 more rows ...
##
## $design
## C1 T1
## 1 1 0
## 2 1 0
## 3 1 0
## 4 0 1
## 5 0 1
## 6 0 1
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"

vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
plotSA(efit)

summary(decideTests(efit,p.value = 0.05))
## C1vsT1
## Down 0
## NotSig 30027
## Up 0
tfit <- treat(vfit, lfc=log2(1.8))
dt <- decideTests(tfit, p.value = 0.05)
summary(dt)
## C1vsT1
## Down 0
## NotSig 30027
## Up 0
plotMD(tfit, column=1, status=dt[,1], main=colnames(tfit)[1],
xlim=c(-8,13))

C1vsT1 <- topTreat(tfit, coef=1, n=Inf)
head(C1vsT1)
## ENSEMBL SYMBOL TXCHROM logFC AveExpr
## ENSG00000273777.4 ENSG00000273855 <NA> <NA> -2.900723 2.845955
## ENSG00000272732.1 ENSG00000272804 KRTAP10-7 chr21 -2.625514 3.242335
## ENSG00000243365.3 ENSG00000243466 IGKV1-5 <NA> -4.150990 1.424784
## ENSG00000210140.1 ENSG00000210839 RNU6ATAC5P <NA> -2.683541 3.481430
## ENSG00000224271.5 ENSG00000224302 <NA> <NA> -2.524138 2.353195
## ENSG00000134612.12 ENSG00000134640 MTNR1B chr11 -2.796207 2.215427
## t P.Value adj.P.Val
## ENSG00000273777.4 -3.564315 0.001769231 0.9999805
## ENSG00000272732.1 -3.259206 0.003163032 0.9999805
## ENSG00000243365.3 -3.051043 0.004941342 0.9999805
## ENSG00000210140.1 -2.996491 0.005244118 0.9999805
## ENSG00000224271.5 -2.919336 0.006067087 0.9999805
## ENSG00000134612.12 -2.911848 0.006183691 0.9999805
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
C1vsT1.topgenes <- C1vsT1$ENSEMBL[1:100]
i <- which(v$genes$ENSEMBL %in% C1vsT1.topgenes)
mycol <- colorpanel(1000,"blue","white","red")
heatmap.2(lcpm[i,], scale="row",
labRow=v$genes$SYMBOL[i], labCol=colnames(x),
col=mycol, trace="none", density.info="none",
lhei=NULL, dendrogram="column")

load('/Users/dongzeyuan/Desktop/TRGN_lab/human_c2_v5p2.rdata')
idx <- ids2indices(Hs.c2,id=rownames(v$genes))
cam.RaceVS <- camera(v,idx,design,contrast=contr.matrix[,1])
head(cam.RaceVS,20)
## NGenes Direction PValue
## KEGG_PANCREATIC_CANCER 65 Up 0.0005183241
## BARRIER_CANCER_RELAPSE_NORMAL_SAMPLE_UP 26 Up 0.0006908885
## MAGRANGEAS_MULTIPLE_MYELOMA_IGG_VS_IGA_UP 18 Up 0.0011836084
## REACTOME_SIGNALING_BY_FGFR1_MUTANTS 25 Up 0.0017610200
## MIKHAYLOVA_OXIDATIVE_STRESS_RESPONSE_VIA_VHL_UP 6 Up 0.0020272148
## KANG_GIST_WITH_PDGFRA_DN 5 Down 0.0020852547
## PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP 70 Up 0.0021603387
## MIKKELSEN_MEF_ICP_WITH_H3K4ME3_AND_H3K27ME3 22 Up 0.0023700218
## WU_HBX_TARGETS_3_UP 18 Up 0.0026958433
## REACTOME_RNA_POL_I_TRANSCRIPTION_INITIATION 18 Up 0.0034709207
## WOO_LIVER_CANCER_RECURRENCE_DN 68 Up 0.0039063178
## WENDT_COHESIN_TARGETS_UP 21 Up 0.0044888897
## MIKKELSEN_IPS_ICP_WITH_H3K4ME3_AND_H327ME3 69 Up 0.0045691588
## REACTOME_SIGNALING_BY_FGFR1_FUSION_MUTANTS 15 Up 0.0049215854
## REACTOME_G_ALPHA_Z_SIGNALLING_EVENTS 36 Up 0.0050400801
## BHATI_G2M_ARREST_BY_2METHOXYESTRADIOL_UP 70 Up 0.0052058851
## REACTOME_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS 31 Up 0.0055022029
## MCMURRAY_TP53_HRAS_COOPERATION_RESPONSE_DN 43 Up 0.0058120290
## OXFORD_RALB_TARGETS_DN 8 Up 0.0064671374
## FONTAINE_FOLLICULAR_THYROID_ADENOMA_UP 47 Up 0.0065464775
## FDR
## KEGG_PANCREATIC_CANCER 0.8837534
## BARRIER_CANCER_RELAPSE_NORMAL_SAMPLE_UP 0.8837534
## MAGRANGEAS_MULTIPLE_MYELOMA_IGG_VS_IGA_UP 0.8837534
## REACTOME_SIGNALING_BY_FGFR1_MUTANTS 0.8837534
## MIKHAYLOVA_OXIDATIVE_STRESS_RESPONSE_VIA_VHL_UP 0.8837534
## KANG_GIST_WITH_PDGFRA_DN 0.8837534
## PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP 0.8837534
## MIKKELSEN_MEF_ICP_WITH_H3K4ME3_AND_H3K27ME3 0.8837534
## WU_HBX_TARGETS_3_UP 0.8837534
## REACTOME_RNA_POL_I_TRANSCRIPTION_INITIATION 0.8837534
## WOO_LIVER_CANCER_RECURRENCE_DN 0.8837534
## WENDT_COHESIN_TARGETS_UP 0.8837534
## MIKKELSEN_IPS_ICP_WITH_H3K4ME3_AND_H327ME3 0.8837534
## REACTOME_SIGNALING_BY_FGFR1_FUSION_MUTANTS 0.8837534
## REACTOME_G_ALPHA_Z_SIGNALLING_EVENTS 0.8837534
## BHATI_G2M_ARREST_BY_2METHOXYESTRADIOL_UP 0.8837534
## REACTOME_APOPTOTIC_CLEAVAGE_OF_CELLULAR_PROTEINS 0.8837534
## MCMURRAY_TP53_HRAS_COOPERATION_RESPONSE_DN 0.8837534
## OXFORD_RALB_TARGETS_DN 0.8837534
## FONTAINE_FOLLICULAR_THYROID_ADENOMA_UP 0.8837534
barcodeplot(efit$t[,1], index=idx$PUIFFE_INVASION_INHIBITED_BY_ASCITES_UP,
index2=idx$WOO_LIVER_CANCER_RECURRENCE_DN, main="C1vsT1")
