# source('http://bioconductor.org/biocLite.R')
# biocLite('mogene10sttranscriptcluster.db')
load("~/Dropbox/RonjonOct2012/ronSub.RData")
library(annotate)
library(mogene10sttranscriptcluster.db)
glucoC = c("Cyp11a1", "Hsd3b1", "Cyp21a1", "Cyp11b1", "Hsd11b1", "H6pd", "Nr3c1")
sym = getSYMBOL(featureNames(ron.eset), "mogene10sttranscriptcluster")
match.all = function(x, vec) {
ret = vector()
for (i in x) ret = c(ret, which(vec == i))
ret
}
glucoC.index = match.all(glucoC, sym)
glucoC.index
## 10585794 10500565 10450272 10429538 10361234 10518743 10458569
## 32096 22826 17577 15420 8244 24733 18422
sym[glucoC.index]
## 10585794 10500565 10450272 10429538 10361234 10518743 10458569
## "Cyp11a1" "Hsd3b1" "Cyp21a1" "Cyp11b1" "Hsd11b1" "H6pd" "Nr3c1"
library(gplots)
hmap = as.matrix(t(exprs(ron.eset[glucoC.index, ])))
type = samp$type
type = as.vector(type)
type[type == "ResidentDCs"] <- "blue"
type[type == "MigratoryDCs"] <- "green"
type[type == "NonLymphoid"] <- "yellow"
type[type == "Precursors"] <- "red"
heatmap.2(hmap, trace = "n", scale = "c", labCol = glucoC, labRow = samp$heatmap_order,
Rowv = F, Colv = F, col = "bluered", RowSideColors = type, margins = c(7,
3))
Here I am exploring a way to represent the expression of this pathway - CI or se bars or similar. I am first scaling the data so that each gene will be eqully weighted when averaging the pathway expression and so that when plotting the 0 line will have been the mean expression across all samples.
hmap = as.matrix(scale(t(exprs(ron.eset[glucoC.index, ]))))
samp2 = read.table("/Users/stephenhenderson/Dropbox/RonjonOct2012/samp2.txt",
header = T)
library(reshape2)
library(ggplot2)
colnames(hmap) = glucoC
hmap = transform(hmap, group = samp2$Group)
head(hmap)
## Cyp11a1 Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 0.52626 0.76902 0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 0.61426 1.06808 0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.76856 0.01182 0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516 0.53820 0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553 1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349 0.2401
## Cyp11b1 Hsd11b1 H6pd
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 1.2068 -0.01089 0.8880
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 1.5446 0.09510 1.5060
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.3367 0.58540 0.7750
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 1.3498 -0.08084 0.4626
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.9607 0.35317 1.0353
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.7863 0.13952 0.9375
## Nr3c1 group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 0.4280 A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz -0.2362 A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.1584 A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz -0.1517 A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz -0.4476 A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.3763 A
hmap1 = hmap[, -7]
hmap2 = hmap[, c(1:4, 8)]
head(hmap1)
## Cyp11a1 Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 0.52626 0.76902 0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 0.61426 1.06808 0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.76856 0.01182 0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516 0.53820 0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553 1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349 0.2401
## Cyp11b1 Hsd11b1 H6pd
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 1.2068 -0.01089 0.8880
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 1.5446 0.09510 1.5060
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.3367 0.58540 0.7750
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 1.3498 -0.08084 0.4626
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.9607 0.35317 1.0353
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.7863 0.13952 0.9375
## group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz A
head(hmap2)
## Cyp11a1 Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 0.52626 0.76902 0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 0.61426 1.06808 0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.76856 0.01182 0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516 0.53820 0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553 1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349 0.2401
## Cyp11b1 group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz 1.2068 A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz 1.5446 A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz 0.3367 A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 1.3498 A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.9607 A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.7863 A
# You can see from the plots and corrs that Hsd11b1 and H6pd are a less
# tightly kniot pairs of genes compared to the others
library(GGally)
## Loading required package: reshape
## Loading required package: plyr
## Attaching package: 'reshape'
## The following object(s) are masked from 'package:plyr':
##
## rename, round_any
## The following object(s) are masked from 'package:reshape2':
##
## colsplit, melt, recast
## Warning: replacing previous import 'rename' when loading 'reshape'
## Warning: replacing previous import 'round_any' when loading 'reshape'
ggpairs(hmap1[, 1:6])
ggpairs(hmap2[, 1:4])
Before I squish the genes together I just want to explore a few different plots to orientate myself…
hmap1.melt = melt(hmap1, id.var = "group")
hmap2.melt = melt(hmap2, id.var = "group")
head(hmap1.melt)
## group variable value
## 1 A Cyp11a1 0.52626
## 2 A Cyp11a1 0.61426
## 3 A Cyp11a1 0.76856
## 4 A Cyp11a1 0.33516
## 5 A Cyp11a1 0.39845
## 6 A Cyp11a1 0.02552
colnames(hmap1.melt) = c("group", "gene", "value")
colnames(hmap2.melt) = c("group", "gene", "value")
qplot(x = group, y = value, data = hmap1.melt, facets = ~gene, geom = "boxplot") +
theme_bw()
qplot(x = group, y = value, data = hmap2.melt, facets = ~gene, geom = "boxplot") +
theme_bw()
qplot(x = group, y = value, data = hmap2.melt, geom = "boxplot") + theme_bw()
It is all a bit too complex like this to use as the base for a plot… although the last there gives a glimpse of what we might cut up to use (it's not quite correct). I have a script for calculating se and 95% CI intervals
library(plyr)
library(doBy)
# gluco column is now averaging the values of the genes which should be
# approx equal scaled.
hmap1 = transform(hmap2, gluco = rowMeans(hmap1[, 1:6]))
hmap2 = transform(hmap2, gluco = rowMeans(hmap2[, 1:4]))
source("/Users/stephenhenderson/Dropbox/RonjonOct2012/summarySE.R")
hmap1SE = summarySE(hmap1, measurevar = "gluco", groupvars = "group")
hmap2SE = summarySE(hmap2, measurevar = "gluco", groupvars = "group")
hmap1SE = transform(hmap1SE, dummyvar = rep(0, 13))
hmap2SE = transform(hmap2SE, dummyvar = rep(0, 13))
hmap1SE
## group N gluco sd se ci dummyvar
## 1 A 11 0.38967 0.635770 0.191692 0.42712 0
## 2 B 3 -1.36787 0.228420 0.131878 0.56743 0
## 3 C 13 0.37835 0.670734 0.186028 0.40532 0
## 4 D 20 0.05418 0.707968 0.158306 0.33134 0
## 5 E 25 0.16944 0.281984 0.056397 0.11640 0
## 6 F 5 0.47715 0.316151 0.141387 0.39255 0
## 7 G 11 -0.54920 0.256650 0.077383 0.17242 0
## 8 H 3 -1.08339 0.482838 0.278767 1.19944 0
## 9 I 3 1.28051 0.006939 0.004006 0.01724 0
## 10 J 3 -0.45554 0.280085 0.161707 0.69577 0
## 11 K 3 -0.82880 0.093891 0.054208 0.23324 0
## 12 L 3 0.06646 0.076059 0.043913 0.18894 0
## 13 M 3 -1.23440 0.267296 0.154324 0.66400 0
hmap2SE
## group N gluco sd se ci dummyvar
## 1 A 11 0.4438 0.72823 0.21957 0.4892 0
## 2 B 3 -1.4689 0.24357 0.14063 0.6051 0
## 3 C 13 0.6433 0.78589 0.21797 0.4749 0
## 4 D 20 0.2099 0.81406 0.18203 0.3810 0
## 5 E 25 0.2805 0.37289 0.07458 0.1539 0
## 6 F 5 0.8267 0.34657 0.15499 0.4303 0
## 7 G 11 -1.2662 0.33981 0.10246 0.2283 0
## 8 H 3 -1.1047 0.19772 0.11415 0.4912 0
## 9 I 3 0.8082 0.05845 0.03374 0.1452 0
## 10 J 3 -0.5341 0.30872 0.17824 0.7669 0
## 11 K 3 -0.7348 0.11682 0.06745 0.2902 0
## 12 L 3 -0.4965 0.12660 0.07309 0.3145 0
## 13 M 3 -1.3566 0.22263 0.12854 0.5530 0
I can plot this using the linerange or errobar geom- from the ggplot2 package.
# These are just plot stylings
new_theme_empty <- theme_bw()
new_theme_empty$line <- element_blank()
new_theme_empty$axis.text.x <- element_blank()
new_theme_empty$plot.title <- element_blank()
new_theme_empty$axis.title <- element_blank()
new_theme_empty$strip.background <- element_rect(fill = NULL)
new_theme_empty$plot.margin <- structure(c(1, 1, 1, 1), unit = "lines", valid.unit = 3L,
class = "unit")
theme_set(new_theme_empty)
limits <- aes(ymax = gluco + ci, ymin = gluco - ci)
# this is averaging all 6 genes
p <- ggplot(hmap1SE, aes(x = dummyvar, y = gluco))
p + geom_pointrange(limits) + ylim(-2.3, 2.2) + facet_wrap(~group, nrow = 3) +
geom_hline(yintercept = 0, linetype = "dashed")
# this is averaging the more tightly correlated 4 genes
p <- ggplot(hmap2SE, aes(x = dummyvar, y = gluco))
p + geom_pointrange(limits) + ylim(-2.3, 2.3) + facet_wrap(~group, nrow = 3) +
geom_hline(yintercept = 0, linetype = "dashed")
I would say a few things:
This could be the basis for a plot where we just cut the panels up using illustrator and paste them to a tree like structure or similar developmental graph like in the Miller paper.
I would say that the tighter group of 4 genes is superior. Many more groups are significantly different form the mean usign these alone. I attach both plots glucoSDexprs(4 genes) and glucoSDexprsFull(6 genes)
The presentation would be slightly clearer than the Miller paper. You can interpret the Y-scale as standard deviations of the data. So group H for instance has a mean of ~1 std deviation below the overall mean of all samples. Whilst the basr are the 95% confidence intervals. So where the bars do not cross 0 you can say this group is significantly above or below the mean of the whole group.
I can produce slight variants on this Fig to make it easier for cutting i.e. without strips or enclosing boxes. And I can also replace A-B with proper label names.
I've updated the RPubs page for this work…