Ronjon …Mouse1

# source('http://bioconductor.org/biocLite.R')
# biocLite('mogene10sttranscriptcluster.db')

load("~/Dropbox/RonjonOct2012/ronSub.RData")
library(annotate)
library(mogene10sttranscriptcluster.db)
glucoC = c("Cyp11a1", "Hsd3b1", "Cyp21a1", "Cyp11b1", "Hsd11b1", "H6pd", "Nr3c1")
sym = getSYMBOL(featureNames(ron.eset), "mogene10sttranscriptcluster")

match.all = function(x, vec) {
    ret = vector()
    for (i in x) ret = c(ret, which(vec == i))
    ret
}

glucoC.index = match.all(glucoC, sym)
glucoC.index

## 10585794 10500565 10450272 10429538 10361234 10518743 10458569 
##    32096    22826    17577    15420     8244    24733    18422

sym[glucoC.index]

##  10585794  10500565  10450272  10429538  10361234  10518743  10458569 
## "Cyp11a1"  "Hsd3b1" "Cyp21a1" "Cyp11b1" "Hsd11b1"    "H6pd"   "Nr3c1"


library(gplots)
hmap = as.matrix(t(exprs(ron.eset[glucoC.index, ])))

type = samp$type
type = as.vector(type)
type[type == "ResidentDCs"] <- "blue"
type[type == "MigratoryDCs"] <- "green"
type[type == "NonLymphoid"] <- "yellow"
type[type == "Precursors"] <- "red"
heatmap.2(hmap, trace = "n", scale = "c", labCol = glucoC, labRow = samp$heatmap_order, 
    Rowv = F, Colv = F, col = "bluered", RowSideColors = type, margins = c(7, 
        3))

plot of chunk Ronjon

UPDATE

Here I am exploring a way to represent the expression of this pathway - CI or se bars or similar. I am first scaling the data so that each gene will be eqully weighted when averaging the pathway expression and so that when plotting the 0 line will have been the mean expression across all samples.

hmap = as.matrix(scale(t(exprs(ron.eset[glucoC.index, ]))))

samp2 = read.table("/Users/stephenhenderson/Dropbox/RonjonOct2012/samp2.txt", 
    header = T)
library(reshape2)
library(ggplot2)
colnames(hmap) = glucoC
hmap = transform(hmap, group = samp2$Group)
head(hmap)

##                                                 Cyp11a1   Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz  0.52626  0.76902  0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz  0.61426  1.06808  0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz  0.76856  0.01182  0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516  0.53820  0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553  1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349  0.2401
##                                                 Cyp11b1  Hsd11b1   H6pd
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz   1.2068 -0.01089 0.8880
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz   1.5446  0.09510 1.5060
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz   0.3367  0.58540 0.7750
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz  1.3498 -0.08084 0.4626
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz  0.9607  0.35317 1.0353
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz  0.7863  0.13952 0.9375
##                                                   Nr3c1 group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz   0.4280     A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz  -0.2362     A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz   0.1584     A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz -0.1517     A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz -0.4476     A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz  0.3763     A

hmap1 = hmap[, -7]
hmap2 = hmap[, c(1:4, 8)]
head(hmap1)

##                                                 Cyp11a1   Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz  0.52626  0.76902  0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz  0.61426  1.06808  0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz  0.76856  0.01182  0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516  0.53820  0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553  1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349  0.2401
##                                                 Cyp11b1  Hsd11b1   H6pd
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz   1.2068 -0.01089 0.8880
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz   1.5446  0.09510 1.5060
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz   0.3367  0.58540 0.7750
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz  1.3498 -0.08084 0.4626
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz  0.9607  0.35317 1.0353
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz  0.7863  0.13952 0.9375
##                                                 group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz      A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz      A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz      A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz     A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz     A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz     A

head(hmap2)

##                                                 Cyp11a1   Hsd3b1 Cyp21a1
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz  0.52626  0.76902  0.7014
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz  0.61426  1.06808  0.7085
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz  0.76856  0.01182  0.8504
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz 0.33516  0.53820  0.6032
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz 0.39845 -0.07553  1.1477
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz 0.02552 -0.07349  0.2401
##                                                 Cyp11b1 group
## GSM538255_EA07068_91074_MoGene_DC1.LN#1.CEL.gz   1.2068     A
## GSM538256_EA07068_91075_MoGene_DC1.LN#2.CEL.gz   1.5446     A
## GSM538257_EA07068_91076_MoGene_DC1.LN#3.CEL.gz   0.3367     A
## GSM538252_EA07068_91080_MoGene_DC1.MLN#1.CEL.gz  1.3498     A
## GSM538253_EA07068_91081_MoGene_DC1.MLN#2.CEL.gz  0.9607     A
## GSM538254_EA07068_91082_MoGene_DC1.MLN#3.CEL.gz  0.7863     A


# You can see from the plots and corrs that Hsd11b1 and H6pd are a less
# tightly kniot pairs of genes compared to the others
library(GGally)

## Loading required package: reshape

## Loading required package: plyr

## Attaching package: 'reshape'

## The following object(s) are masked from 'package:plyr':
## 
## rename, round_any

## The following object(s) are masked from 'package:reshape2':
## 
## colsplit, melt, recast

## Warning: replacing previous import 'rename' when loading 'reshape'

## Warning: replacing previous import 'round_any' when loading 'reshape'

ggpairs(hmap1[, 1:6])

plot of chunk samp2

ggpairs(hmap2[, 1:4])

plot of chunk samp2

Before I squish the genes together I just want to explore a few different plots to orientate myself…

hmap1.melt = melt(hmap1, id.var = "group")
hmap2.melt = melt(hmap2, id.var = "group")
head(hmap1.melt)

##   group variable   value
## 1     A  Cyp11a1 0.52626
## 2     A  Cyp11a1 0.61426
## 3     A  Cyp11a1 0.76856
## 4     A  Cyp11a1 0.33516
## 5     A  Cyp11a1 0.39845
## 6     A  Cyp11a1 0.02552

colnames(hmap1.melt) = c("group", "gene", "value")
colnames(hmap2.melt) = c("group", "gene", "value")

qplot(x = group, y = value, data = hmap1.melt, facets = ~gene, geom = "boxplot") + 
    theme_bw()

plot of chunk reshape

qplot(x = group, y = value, data = hmap2.melt, facets = ~gene, geom = "boxplot") + 
    theme_bw()

plot of chunk reshape

qplot(x = group, y = value, data = hmap2.melt, geom = "boxplot") + theme_bw()

plot of chunk reshape

It is all a bit too complex like this to use as the base for a plot… although the last there gives a glimpse of what we might cut up to use (it's not quite correct). I have a script for calculating se and 95% CI intervals

library(plyr)
library(doBy)
# gluco column is now averaging the values of the genes which should be
# approx equal scaled.
hmap1 = transform(hmap2, gluco = rowMeans(hmap1[, 1:6]))
hmap2 = transform(hmap2, gluco = rowMeans(hmap2[, 1:4]))
source("/Users/stephenhenderson/Dropbox/RonjonOct2012/summarySE.R")

hmap1SE = summarySE(hmap1, measurevar = "gluco", groupvars = "group")
hmap2SE = summarySE(hmap2, measurevar = "gluco", groupvars = "group")
hmap1SE = transform(hmap1SE, dummyvar = rep(0, 13))
hmap2SE = transform(hmap2SE, dummyvar = rep(0, 13))

hmap1SE

##    group  N    gluco       sd       se      ci dummyvar
## 1      A 11  0.38967 0.635770 0.191692 0.42712        0
## 2      B  3 -1.36787 0.228420 0.131878 0.56743        0
## 3      C 13  0.37835 0.670734 0.186028 0.40532        0
## 4      D 20  0.05418 0.707968 0.158306 0.33134        0
## 5      E 25  0.16944 0.281984 0.056397 0.11640        0
## 6      F  5  0.47715 0.316151 0.141387 0.39255        0
## 7      G 11 -0.54920 0.256650 0.077383 0.17242        0
## 8      H  3 -1.08339 0.482838 0.278767 1.19944        0
## 9      I  3  1.28051 0.006939 0.004006 0.01724        0
## 10     J  3 -0.45554 0.280085 0.161707 0.69577        0
## 11     K  3 -0.82880 0.093891 0.054208 0.23324        0
## 12     L  3  0.06646 0.076059 0.043913 0.18894        0
## 13     M  3 -1.23440 0.267296 0.154324 0.66400        0

hmap2SE

##    group  N   gluco      sd      se     ci dummyvar
## 1      A 11  0.4438 0.72823 0.21957 0.4892        0
## 2      B  3 -1.4689 0.24357 0.14063 0.6051        0
## 3      C 13  0.6433 0.78589 0.21797 0.4749        0
## 4      D 20  0.2099 0.81406 0.18203 0.3810        0
## 5      E 25  0.2805 0.37289 0.07458 0.1539        0
## 6      F  5  0.8267 0.34657 0.15499 0.4303        0
## 7      G 11 -1.2662 0.33981 0.10246 0.2283        0
## 8      H  3 -1.1047 0.19772 0.11415 0.4912        0
## 9      I  3  0.8082 0.05845 0.03374 0.1452        0
## 10     J  3 -0.5341 0.30872 0.17824 0.7669        0
## 11     K  3 -0.7348 0.11682 0.06745 0.2902        0
## 12     L  3 -0.4965 0.12660 0.07309 0.3145        0
## 13     M  3 -1.3566 0.22263 0.12854 0.5530        0

I can plot this using the linerange or errobar geom- from the ggplot2 package.

# These are just plot stylings
new_theme_empty <- theme_bw()
new_theme_empty$line <- element_blank()
new_theme_empty$axis.text.x <- element_blank()
new_theme_empty$plot.title <- element_blank()
new_theme_empty$axis.title <- element_blank()
new_theme_empty$strip.background <- element_rect(fill = NULL)
new_theme_empty$plot.margin <- structure(c(1, 1, 1, 1), unit = "lines", valid.unit = 3L, 
    class = "unit")
theme_set(new_theme_empty)




limits <- aes(ymax = gluco + ci, ymin = gluco - ci)

# this is averaging all 6 genes
p <- ggplot(hmap1SE, aes(x = dummyvar, y = gluco))
p + geom_pointrange(limits) + ylim(-2.3, 2.2) + facet_wrap(~group, nrow = 3) + 
    geom_hline(yintercept = 0, linetype = "dashed")

plot of chunk linerangeCI


# this is averaging the more tightly correlated 4 genes
p <- ggplot(hmap2SE, aes(x = dummyvar, y = gluco))
p + geom_pointrange(limits) + ylim(-2.3, 2.3) + facet_wrap(~group, nrow = 3) + 
    geom_hline(yintercept = 0, linetype = "dashed")

plot of chunk linerangeCI

I would say a few things:

This could be the basis for a plot where we just cut the panels up using illustrator and paste them to a tree like structure or similar developmental graph like in the Miller paper.
I would say that the tighter group of 4 genes is superior. Many more groups are significantly different form the mean usign these alone. I attach both plots glucoSDexprs(4 genes) and glucoSDexprsFull(6 genes)
The presentation would be slightly clearer than the Miller paper. You can interpret the Y-scale as standard deviations of the data. So group H for instance has a mean of ~1 std deviation below the overall mean of all samples. Whilst the basr are the 95% confidence intervals. So where the bars do not cross 0 you can say this group is significantly above or below the mean of the whole group.
I can produce slight variants on this Fig to make it easier for cutting i.e. without strips or enclosing boxes. And I can also replace A-B with proper label names.

I've updated the RPubs page for this work…