PAM50 differential expression (ER-/ER+) for 125 breast cancers

Author: Charleen D. Adams

library(org.Hs.eg.db)

I got the gene symbols and full gene names and double checked that the ENTREZID column matched exactly to our results rownames.

table(ann$ENTREZID==rownames(results))

## 
## TRUE 
##   50

The volcano plot displaying a measure of significance on the y-axis and fold-change on the x-axis.

signif <- -log10(results.annotated$FDR)
plot(results.annotated$logFC,signif,pch=16)
points(results.annotated[detags,"logFC"],-log10(results.annotated[detags,"FDR"]),pch=16,col="red")

MD plots

Signficantly up and down DE genes are highlighted in red and blue respectively.

library(org.Hs.eg.db)
library(statmod)
design <- model.matrix(~0+groupER)
colnames(design) <- levels(groupER)
y <- DGEList(counts.keep,group=groupER, genes=counts.keep)
options(digits=3)

# Add the gene names to the DGEList object
y$genes$Symbol <- rownames(counts.keep) 
y <- estimateDisp(y,design, robust=TRUE)

## estimation of QL dispersions
fit <- glmQLFit(y, design, robust=TRUE)
head(fit$coefficients)

##       Missing   NEG   POS
## 57180   -6.60 -5.43 -6.22
## 54443   -5.37 -3.75 -4.78
## 573     -4.32 -4.40 -4.32
## 596     -3.01 -3.80 -3.09
## 332     -7.14 -4.53 -5.80
## 644     -5.53 -5.06 -4.90

plotQLDisp(fit)

#summary(fit$df.prior)

## Differential expression analysis
B.LvsP <- makeContrasts(NEG-POS, levels=design)
res <- glmQLFTest(fit, contrast=B.LvsP)
#topTags(res)
is.de <- decideTestsDGE(res)
plotMD(res, status=is.de, values=c(1,-1), col=c("red", "blue"),
       legend="topright")

MD plot showing the log-fold change and average abundance of each gene. Genes with fold-changes significantly greater than 1.5 are highlighted

tr <- glmTreat(fit, contrast=B.LvsP, lfc=log2(1.5))
#topTags(tr)

is.de <- decideTestsDGE(tr)
is.de_tr=summary(is.de)
is.de_tr

##        1*NEG -1*POS
## Down              7
## NotSig           16
## Up               27

plotMD(tr, status=is.de, values=c(1,-1), col=c("red", "blue"),
       legend="topright")
text(results.annotated$logCPM[1:10],results.annotated$logFC[1:10],labels = results.annotated$SYMBOL[1:10],col="blue")

Heat map clustering

library(gplots)

logCPM <- cpm(y, prior.count=2, log=TRUE)
rownames(logCPM) <- y$genes$Symbol
colnames(logCPM) <- paste(y$samples$group, 1:2, sep="-")
o <- order(tr$table$PValue)
logCPM <- logCPM[o[1:30],]
logCPM <- t(scale(t(logCPM)))

col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM, col=col.pan, Rowv=TRUE, scale="none",
          trace="none", dendrogram="both", cexRow=1, cexCol=1.4, 
          density.info="none", 
          margin=c(10,9), lhei=c(2,10), lwid=c(2,6))

Gene ontology analysis

go <-  goana(tr, species="Hs")
topGO(go, n=10)

##                                                      Term Ont N Up Down
## GO:0048754  branching morphogenesis of an epithelial tube  BP 5  0    4
## GO:0072503 cellular divalent inorganic cation homeostasis  BP 3  0    3
## GO:0007565                               female pregnancy  BP 3  0    3
## GO:0044706           multi-multicellular organism process  BP 3  0    3
## GO:0010720        positive regulation of cell development  BP 3  0    3
## GO:0050769            positive regulation of neurogenesis  BP 3  0    3
## GO:0045666  positive regulation of neuron differentiation  BP 3  0    3
## GO:0060562                  epithelial tube morphogenesis  BP 6  1    4
## GO:0022612                            gland morphogenesis  BP 6  1    4
## GO:0061138        morphogenesis of a branching epithelium  BP 6  1    4
##             P.Up  P.Down
## GO:0048754 1.000 0.00072
## GO:0072503 1.000 0.00179
## GO:0007565 1.000 0.00179
## GO:0044706 1.000 0.00179
## GO:0010720 1.000 0.00179
## GO:0050769 1.000 0.00179
## GO:0045666 1.000 0.00179
## GO:0060562 0.994 0.00205
## GO:0022612 0.994 0.00205
## GO:0061138 0.994 0.00205

KEGG pathway analysis

keg <-  kegga(tr, species="Hs")
topKEGG(keg, n=5, truncate=34)

##                                          Pathway N Up Down  P.Up P.Down
## path:hsa04915         Estrogen signaling pathway 6  0    3 1.000 0.0292
## path:hsa05224                      Breast cancer 5  1    2 0.984 0.1380
## path:hsa01522               Endocrine resistance 5  1    2 0.984 0.1380
## path:hsa04261 Adrenergic signaling in cardiom... 1  0    1 1.000 0.1400
## path:hsa04933 AGE-RAGE signaling pathway in d... 1  0    1 1.000 0.1400

topKEGG=topKEGG(keg, n=5, truncate=34)

Barcode and enrichment plot to visualize gene sets

barcodeplot(res$t[,1], index=Hs.H.ind[["HALLMARK_ESTROGEN_RESPONSE_EARLY"]], 
            main="logFC: HALLMARK_ESTROGEN_RESPONSE_EARLY")