Author: Charleen D. Adams
library(org.Hs.eg.db)
I got the gene symbols and full gene names and double checked that the ENTREZID
column matched exactly to our results
rownames.
table(ann$ENTREZID==rownames(results))
##
## TRUE
## 50
The volcano plot displaying a measure of significance on the y-axis and fold-change on the x-axis.
signif <- -log10(results.annotated$FDR)
plot(results.annotated$logFC,signif,pch=16)
points(results.annotated[detags,"logFC"],-log10(results.annotated[detags,"FDR"]),pch=16,col="red")
Signficantly up and down DE genes are highlighted in red and blue respectively.
library(org.Hs.eg.db)
library(statmod)
design <- model.matrix(~0+groupER)
colnames(design) <- levels(groupER)
y <- DGEList(counts.keep,group=groupER, genes=counts.keep)
options(digits=3)
# Add the gene names to the DGEList object
y$genes$Symbol <- rownames(counts.keep)
y <- estimateDisp(y,design, robust=TRUE)
## estimation of QL dispersions
fit <- glmQLFit(y, design, robust=TRUE)
head(fit$coefficients)
## Missing NEG POS
## 57180 -6.60 -5.43 -6.22
## 54443 -5.37 -3.75 -4.78
## 573 -4.32 -4.40 -4.32
## 596 -3.01 -3.80 -3.09
## 332 -7.14 -4.53 -5.80
## 644 -5.53 -5.06 -4.90
plotQLDisp(fit)
#summary(fit$df.prior)
## Differential expression analysis
B.LvsP <- makeContrasts(NEG-POS, levels=design)
res <- glmQLFTest(fit, contrast=B.LvsP)
#topTags(res)
is.de <- decideTestsDGE(res)
plotMD(res, status=is.de, values=c(1,-1), col=c("red", "blue"),
legend="topright")
MD plot showing the log-fold change and average abundance of each gene. Genes with fold-changes significantly greater than 1.5 are highlighted
tr <- glmTreat(fit, contrast=B.LvsP, lfc=log2(1.5))
#topTags(tr)
is.de <- decideTestsDGE(tr)
is.de_tr=summary(is.de)
is.de_tr
## 1*NEG -1*POS
## Down 7
## NotSig 16
## Up 27
plotMD(tr, status=is.de, values=c(1,-1), col=c("red", "blue"),
legend="topright")
text(results.annotated$logCPM[1:10],results.annotated$logFC[1:10],labels = results.annotated$SYMBOL[1:10],col="blue")
library(gplots)
logCPM <- cpm(y, prior.count=2, log=TRUE)
rownames(logCPM) <- y$genes$Symbol
colnames(logCPM) <- paste(y$samples$group, 1:2, sep="-")
o <- order(tr$table$PValue)
logCPM <- logCPM[o[1:30],]
logCPM <- t(scale(t(logCPM)))
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM, col=col.pan, Rowv=TRUE, scale="none",
trace="none", dendrogram="both", cexRow=1, cexCol=1.4,
density.info="none",
margin=c(10,9), lhei=c(2,10), lwid=c(2,6))
go <- goana(tr, species="Hs")
topGO(go, n=10)
## Term Ont N Up Down
## GO:0048754 branching morphogenesis of an epithelial tube BP 5 0 4
## GO:0072503 cellular divalent inorganic cation homeostasis BP 3 0 3
## GO:0007565 female pregnancy BP 3 0 3
## GO:0044706 multi-multicellular organism process BP 3 0 3
## GO:0010720 positive regulation of cell development BP 3 0 3
## GO:0050769 positive regulation of neurogenesis BP 3 0 3
## GO:0045666 positive regulation of neuron differentiation BP 3 0 3
## GO:0060562 epithelial tube morphogenesis BP 6 1 4
## GO:0022612 gland morphogenesis BP 6 1 4
## GO:0061138 morphogenesis of a branching epithelium BP 6 1 4
## P.Up P.Down
## GO:0048754 1.000 0.00072
## GO:0072503 1.000 0.00179
## GO:0007565 1.000 0.00179
## GO:0044706 1.000 0.00179
## GO:0010720 1.000 0.00179
## GO:0050769 1.000 0.00179
## GO:0045666 1.000 0.00179
## GO:0060562 0.994 0.00205
## GO:0022612 0.994 0.00205
## GO:0061138 0.994 0.00205
keg <- kegga(tr, species="Hs")
topKEGG(keg, n=5, truncate=34)
## Pathway N Up Down P.Up P.Down
## path:hsa04915 Estrogen signaling pathway 6 0 3 1.000 0.0292
## path:hsa05224 Breast cancer 5 1 2 0.984 0.1380
## path:hsa01522 Endocrine resistance 5 1 2 0.984 0.1380
## path:hsa04261 Adrenergic signaling in cardiom... 1 0 1 1.000 0.1400
## path:hsa04933 AGE-RAGE signaling pathway in d... 1 0 1 1.000 0.1400
topKEGG=topKEGG(keg, n=5, truncate=34)
barcodeplot(res$t[,1], index=Hs.H.ind[["HALLMARK_ESTROGEN_RESPONSE_EARLY"]],
main="logFC: HALLMARK_ESTROGEN_RESPONSE_EARLY")