Although the clusters are unsupervised, the color bar at top displays ER status: purple is “Negative”, orange is “Positive”, blue and light blue are for indeterminate/missing)
heatmap.2(highly_variable_lcpm,
col=rev(morecols(50)),
trace="none",
main="Top 500 most variable genes across samples",
ColSideColors=col.ER,scale="row",
distfun=function(x) dist(x, method="euclidean"),
hclustfun=function(x) hclust(x, method="ward.D2"))
heatmap.2(logCPM, col=col.pan, Rowv=TRUE, scale="none",
trace="none", dendrogram="both", cexRow=1, cexCol=1.4,
density.info="none",
margin=c(10,9), lhei=c(2,10), lwid=c(2,6),
distfun=function(x) dist(x, method="euclidean"),
hclustfun=function(x) hclust(x, method="ward.D2"))
The genes for the GO and KEGG analyses are chosen based on FDR<0.05 and logfold change >1.5 (only displaying top 15 to save space)
## Gene ontology analysis
go <- goana(tr, species="Hs")
topGO(go, n=15)
## Term Ont N Up Down
## GO:0031424 keratinization BP 134 64 14
## GO:0043588 skin development BP 317 104 26
## GO:0070268 cornification BP 105 53 11
## GO:0008544 epidermis development BP 363 110 35
## GO:0032501 multicellular organismal process BP 6502 985 613
## GO:0030216 keratinocyte differentiation BP 206 74 19
## GO:0042995 cell projection CC 1967 215 266
## GO:0120025 plasma membrane bounded cell projection CC 1895 208 258
## GO:0048856 anatomical structure development BP 5386 822 493
## GO:0009888 tissue development BP 1781 328 184
## GO:0030855 epithelial cell differentiation BP 635 149 72
## GO:0009913 epidermal cell differentiation BP 256 79 26
## GO:0032502 developmental process BP 5778 864 522
## GO:0060429 epithelium development BP 1130 225 118
## GO:0003341 cilium movement BP 63 1 28
## P.Up P.Down
## GO:0031424 2.57e-24 2.20e-01
## GO:0043588 1.86e-22 5.53e-01
## GO:0070268 8.63e-22 2.52e-01
## GO:0008544 1.27e-20 1.96e-01
## GO:0032501 8.65e-20 2.43e-05
## GO:0030216 7.33e-19 3.48e-01
## GO:0042995 9.64e-01 6.30e-17
## GO:0120025 9.55e-01 8.85e-17
## GO:0048856 2.43e-16 3.72e-03
## GO:0009888 5.03e-16 8.05e-04
## GO:0030855 5.56e-16 3.98e-03
## GO:0009913 1.15e-15 1.64e-01
## GO:0032502 6.32e-15 7.58e-03
## GO:0060429 9.81e-15 4.95e-03
## GO:0003341 1.00e+00 1.49e-14
topGO=topGO(go, n=15)
The genes for the GO and KEGG analyses are chosen based on FDR<0.05 and logfold change >1.5 (only displaying top 15 to save space)
## KEGG pathway analysis
keg <- kegga(tr, species="Hs")
topKEGG(keg, n=15, truncate=34)
## Pathway N Up Down P.Up
## path:hsa00140 Steroid hormone biosynthesis 53 22 7 6.96e-08
## path:hsa04060 Cytokine-cytokine receptor inte... 259 58 12 2.42e-06
## path:hsa04110 Cell cycle 124 33 5 8.68e-06
## path:hsa04080 Neuroactive ligand-receptor int... 241 44 40 3.64e-03
## path:hsa04724 Glutamatergic synapse 110 13 23 5.86e-01
## path:hsa00040 Pentose and glucuronate interco... 32 13 4 4.47e-05
## path:hsa00983 Drug metabolism - other enzymes 76 22 8 6.77e-05
## path:hsa00053 Ascorbate and aldarate metaboli... 25 11 4 7.23e-05
## path:hsa05204 Chemical carcinogenesis 77 22 12 8.40e-05
## path:hsa00982 Drug metabolism - cytochrome P4... 67 19 9 2.78e-04
## path:hsa00830 Retinol metabolism 62 18 9 2.90e-04
## path:hsa04726 Serotonergic synapse 104 14 20 3.84e-01
## path:hsa05323 Rheumatoid arthritis 85 22 3 3.99e-04
## path:hsa04970 Salivary secretion 86 22 13 4.76e-04
## path:hsa04974 Protein digestion and absorptio... 83 21 11 7.41e-04
## P.Down
## path:hsa00140 1.47e-01
## path:hsa04060 9.93e-01
## path:hsa04110 9.80e-01
## path:hsa04080 1.84e-05
## path:hsa04724 2.94e-05
## path:hsa00040 2.73e-01
## path:hsa00983 2.95e-01
## path:hsa00053 1.49e-01
## path:hsa05204 2.43e-02
## path:hsa00982 1.01e-01
## path:hsa00830 6.89e-02
## path:hsa04726 3.13e-04
## path:hsa05323 9.76e-01
## path:hsa04970 2.47e-02
## path:hsa04974 8.12e-02
topKEGG=topKEGG(keg, n=15)
Significant results for the plotsmear and volcano plots are based on FDR <0.05 (not logfold changes). Note that as with the annotating of plots for the Hispanic population, the genes with the smallest FDRs are the ones that are upregulated. That’s why we only see gene labels for positive logFC changes. I could put labels on all genes, but the plot would be a big blue blob if I did that.
plotSmear(res, de.tags=detags)
N <- 20
text(results.annotated$logCPM[1:N],
results.annotated$logFC[1:N],labels = results.annotated$SYMBOL[1:N],col="blue")
## Volcano plot
signif <- -log10(results.annotated$FDR)
plot(results.annotated$logFC,signif,pch=16)
points(results.annotated[detags,"logFC"],-log10(results.annotated[detags,"FDR"]),pch=16,col="red")
keg_shared <- kegga(shared_pathway, species="Hs")
topKEGG(keg_shared, n=15)
## Pathway N DE P.DE
## path:hsa04974 Protein digestion and absorption 90 9 0.000261
## path:hsa04080 Neuroactive ligand-receptor interaction 277 16 0.000844
## path:hsa04978 Mineral absorption 51 6 0.001209
## path:hsa04657 IL-17 signaling pathway 93 8 0.001555
## path:hsa04972 Pancreatic secretion 96 8 0.001906
## path:hsa00980 Metabolism of xenobiotics by cytochrome P450 76 7 0.002055
## path:hsa05204 Chemical carcinogenesis 82 7 0.003174
## path:hsa00053 Ascorbate and aldarate metabolism 27 4 0.003491
## path:hsa04721 Synaptic vesicle cycle 63 6 0.003610
## path:hsa04915 Estrogen signaling pathway 137 9 0.005120
## path:hsa04970 Salivary secretion 90 7 0.005328
## path:hsa00910 Nitrogen metabolism 17 3 0.006963
## path:hsa00982 Drug metabolism - cytochrome P450 72 6 0.006971
## path:hsa00040 Pentose and glucuronate interconversions 34 4 0.008113
## path:hsa00983 Drug metabolism - other enzymes 79 6 0.010832
write.csv(keg_shared, "C:/Users/chaadams/Dropbox/COH/Hispanic tumours/Annotated ER Whites TCGA/shared_KEGG.csv")
go_shared <- goana(shared_pathway, species="Hs")
topGO(go_shared, n=15)
## Term Ont N DE P.DE
## GO:0070268 cornification BP 113 21 1.60e-12
## GO:0048856 anatomical structure development BP 5836 220 1.67e-10
## GO:0032502 developmental process BP 6266 231 3.56e-10
## GO:0005576 extracellular region CC 5012 194 5.38e-10
## GO:0032501 multicellular organismal process BP 7494 264 6.93e-10
## GO:0030855 epithelial cell differentiation BP 760 50 2.47e-09
## GO:0009888 tissue development BP 1968 95 3.07e-09
## GO:0019730 antimicrobial humoral response BP 108 17 3.14e-09
## GO:0030154 cell differentiation BP 4123 163 7.07e-09
## GO:0031424 keratinization BP 226 24 7.10e-09
## GO:0006811 ion transport BP 1610 81 9.19e-09
## GO:0060429 epithelium development BP 1264 68 1.30e-08
## GO:0006865 amino acid transport BP 137 18 2.06e-08
## GO:0007275 multicellular organism development BP 5351 197 2.64e-08
## GO:0048869 cellular developmental process BP 4322 166 3.89e-08
write.csv(go_shared, "C:/Users/chaadams/Dropbox/COH/Hispanic tumours/Annotated ER Whites TCGA/shared_go.csv")
dim(hisp)
## [1] 1294 11
#Number of significant genes common to both TCGA and Hispanics
length(shared_pathway)
## [1] 543
#Number of uniquely signficant genes in Hispanics
hisp_unique = subset(hisp, !(hisp$ENTREZID %in% shared_pathway))
dim(hisp_unique)
## [1] 751 11
#KEGG pathways unique to Hispanic set
hisp_unique_KEGG <- kegga(hisp_unique$ENTREZID, species="Hs")
topKEGG(hisp_unique_KEGG, n=15)
## Pathway N DE P.DE
## path:hsa04080 Neuroactive ligand-receptor interaction 277 23 7.81e-09
## path:hsa04020 Calcium signaling pathway 183 14 2.08e-05
## path:hsa04261 Adrenergic signaling in cardiomyocytes 144 11 1.67e-04
## path:hsa04024 cAMP signaling pathway 198 13 1.98e-04
## path:hsa05204 Chemical carcinogenesis 82 8 2.55e-04
## path:hsa05032 Morphine addiction 91 8 5.20e-04
## path:hsa00982 Drug metabolism - cytochrome P450 72 7 6.37e-04
## path:hsa04925 Aldosterone synthesis and secretion 96 8 7.45e-04
## path:hsa05033 Nicotine addiction 40 5 1.25e-03
## path:hsa04721 Synaptic vesicle cycle 63 6 1.74e-03
## path:hsa04022 cGMP-PKG signaling pathway 163 10 1.83e-03
## path:hsa04727 GABAergic synapse 88 7 2.09e-03
## path:hsa05414 Dilated cardiomyopathy (DCM) 90 7 2.38e-03
## path:hsa00591 Linoleic acid metabolism 29 4 2.70e-03
## path:hsa04972 Pancreatic secretion 96 7 3.43e-03
#GO sets unique to Hispanic set
hisp_unique_GO <- goana(hisp_unique$ENTREZID, species="Hs")
topGO(hisp_unique_GO , n=15)
## Term Ont N DE P.DE
## GO:0006936 muscle contraction BP 352 34 3.01e-12
## GO:0042391 regulation of membrane potential BP 438 37 1.75e-11
## GO:0098916 anterograde trans-synaptic signaling BP 667 46 6.24e-11
## GO:0007268 chemical synaptic transmission BP 667 46 6.24e-11
## GO:0099537 trans-synaptic signaling BP 670 46 7.25e-11
## GO:0099536 synaptic signaling BP 671 46 7.62e-11
## GO:0003008 system process BP 2111 96 1.07e-10
## GO:0003012 muscle system process BP 450 36 1.54e-10
## GO:0033275 actin-myosin filament sliding BP 39 11 9.65e-10
## GO:0030049 muscle filament sliding BP 39 11 9.65e-10
## GO:0071944 cell periphery CC 5614 188 7.36e-09
## GO:0005886 plasma membrane CC 5503 184 1.41e-08
## GO:0044459 plasma membrane part CC 2830 111 1.55e-08
## GO:0045202 synapse CC 880 49 1.88e-08
## GO:0043292 contractile fiber CC 230 22 2.66e-08
dim(whites)
## [1] 1643 14
#Number of significant genes common to both TCGA and Hispanics
length(shared_pathway)
## [1] 543
#Number of uniquely signficant genes in Hispanics
whites_unique = subset(whites, !(whites$ENTREZID %in% shared_pathway))
dim(whites_unique)
## [1] 1100 14
#KEGG pathways unique to TCGA set
whites_unique_KEGG <- kegga(whites_unique$ENTREZID, species="Hs")
topKEGG(whites_unique_KEGG, n=15)
## Pathway N
## path:hsa00140 Steroid hormone biosynthesis 59
## path:hsa00040 Pentose and glucuronate interconversions 34
## path:hsa00053 Ascorbate and aldarate metabolism 27
## path:hsa05204 Chemical carcinogenesis 82
## path:hsa00980 Metabolism of xenobiotics by cytochrome P450 76
## path:hsa00830 Retinol metabolism 67
## path:hsa04970 Salivary secretion 90
## path:hsa04726 Serotonergic synapse 115
## path:hsa00860 Porphyrin and chlorophyll metabolism 42
## path:hsa00982 Drug metabolism - cytochrome P450 72
## path:hsa00983 Drug metabolism - other enzymes 79
## path:hsa05412 Arrhythmogenic right ventricular cardiomyopathy (ARVC) 72
## path:hsa04080 Neuroactive ligand-receptor interaction 277
## path:hsa04927 Cortisol synthesis and secretion 64
## path:hsa04727 GABAergic synapse 88
## DE P.DE
## path:hsa00140 13 3.28e-06
## path:hsa00040 9 2.28e-05
## path:hsa00053 8 2.67e-05
## path:hsa05204 14 3.13e-05
## path:hsa00980 13 5.87e-05
## path:hsa00830 12 7.09e-05
## path:hsa04970 14 9.05e-05
## path:hsa04726 16 1.15e-04
## path:hsa00860 9 1.37e-04
## path:hsa00982 12 1.46e-04
## path:hsa00983 12 3.59e-04
## path:hsa05412 11 5.98e-04
## path:hsa04080 26 8.06e-04
## path:hsa04927 10 8.79e-04
## path:hsa04727 12 9.75e-04
#GO sets unique to TCGA set
whites_unique_GO <- goana(whites_unique$ENTREZID, species="Hs")
topGO(whites_unique_GO , n=15)
## Term Ont N DE P.DE
## GO:0032501 multicellular organismal process BP 7494 479 3.22e-12
## GO:0060429 epithelium development BP 1264 118 2.71e-11
## GO:0010817 regulation of hormone levels BP 510 62 9.28e-11
## GO:0009888 tissue development BP 1968 160 3.03e-10
## GO:0043588 skin development BP 413 53 3.13e-10
## GO:0018149 peptide cross-linking BP 59 18 3.14e-10
## GO:0007275 multicellular organism development BP 5351 353 7.26e-10
## GO:0008544 epidermis development BP 462 56 8.58e-10
## GO:0048856 anatomical structure development BP 5836 378 1.21e-09
## GO:0070268 cornification BP 113 24 1.45e-09
## GO:0030855 epithelial cell differentiation BP 760 77 2.94e-09
## GO:0007417 central nervous system development BP 932 88 6.60e-09
## GO:0009812 flavonoid metabolic process BP 15 9 7.32e-09
## GO:0048731 system development BP 4760 315 8.37e-09
## GO:0048513 animal organ development BP 3428 240 1.04e-08
grid.newpage()
venn.plot <- draw.pairwise.venn(area1 = 751,
area2 = 1100,
cross.area = 543,
fill = c("blue", "red"),
category = c("Hispanic", "TCGA"))