Seminar 3: Playing with Lattice and ggplot2 Graphics using the photorec dataset

library(lattice)
library(RColorBrewer)
library(gplots)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
library(ggplot2)
photoDat <- read.table("GSE4051_data.tsv")
str(photoDat, max.level = 0)
## 'data.frame':    29949 obs. of  39 variables:
sampleinfo <- readRDS("GSE4051_design.rds")
str(sampleinfo)
## 'data.frame':    39 obs. of  4 variables:
##  $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##  $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...

Let's make a heatmap of the 35 random probsets from our photorec dataset using heatmap.2 function of gplots. Note that we will need to reconfigure our dataframe as a matrix.

set.seed(1)
(op <- sample(1:nrow(photoDat), size = 35))
##  [1]  7952 11145 17156 27198  6040 26902 28287 19786 18837  1850  6167
## [12]  5286 20568 11499 23046 14899 21481 29690 11375 23269 27975  6350
## [23] 19503  3758  7997 11555   401 11442 26023 10184 14424 17938 14766
## [34]  5571 24751
sumDat <- photoDat[op, ]
str(sumDat)
## 'data.frame':    35 obs. of  39 variables:
##  $ Sample_20: num  8.3 8.25 6.91 6.79 6.4 ...
##  $ Sample_21: num  8.33 9.14 6.55 6.23 6.69 ...
##  $ Sample_22: num  8.43 8.19 6.59 6.69 6.14 ...
##  $ Sample_23: num  8.49 8.66 6.58 6.34 6.34 ...
##  $ Sample_16: num  8.51 6.66 7.75 7.25 5.5 ...
##  $ Sample_17: num  8.18 7.95 6.85 6.38 7.51 ...
##  $ Sample_6 : num  7.96 8.45 7.42 6.19 7.64 ...
##  $ Sample_24: num  8.34 7.49 7.17 6.84 5.83 ...
##  $ Sample_25: num  8.14 7.39 7.12 7.02 5.85 ...
##  $ Sample_26: num  8.45 6.94 7.46 7.43 6.32 ...
##  $ Sample_27: num  8.25 6.5 7.23 6.91 5.8 ...
##  $ Sample_14: num  8.46 6.99 7.14 6.78 6.29 ...
##  $ Sample_3 : num  8.53 7.14 7.23 6.88 6.15 ...
##  $ Sample_5 : num  8.45 6.71 7.36 6.92 6 ...
##  $ Sample_8 : num  8.62 6.66 7.9 6.97 5.95 ...
##  $ Sample_28: num  8.63 6.46 7.45 7.17 6.03 ...
##  $ Sample_29: num  8.58 7.84 6.72 6.91 6.31 ...
##  $ Sample_30: num  8.28 7.01 6.81 6.75 5.81 ...
##  $ Sample_31: num  8.47 6.88 7.18 6.89 5.79 ...
##  $ Sample_1 : num  8.66 6.81 7.22 6.56 6.03 ...
##  $ Sample_10: num  8.68 7.41 6.97 6.5 5.99 ...
##  $ Sample_4 : num  8.74 7.23 6.91 6.78 5.82 ...
##  $ Sample_7 : num  8.69 6.61 7.43 7.1 5.64 ...
##  $ Sample_32: num  9.7 7.62 6.96 6.89 6.82 ...
##  $ Sample_33: num  8.72 6.83 7.36 7.21 5.93 ...
##  $ Sample_34: num  8.58 6.73 7.63 7.19 5.93 ...
##  $ Sample_35: num  8.54 6.91 7.14 6.71 5.59 ...
##  $ Sample_13: num  8.73 6.83 7.14 6.46 6.25 ...
##  $ Sample_15: num  8.57 6.86 7.12 6.72 5.84 ...
##  $ Sample_18: num  8.96 6.95 6.96 6.31 6.44 ...
##  $ Sample_19: num  8.65 6.69 7.03 6.91 6.32 ...
##  $ Sample_36: num  10.41 7.9 6.57 6.87 6.09 ...
##  $ Sample_37: num  9.48 6.36 6.99 6.85 6.08 ...
##  $ Sample_38: num  10.14 7.24 6.95 6.99 6.13 ...
##  $ Sample_39: num  9.48 6.21 6.97 7.13 6.02 ...
##  $ Sample_11: num  9.09 6.23 7 6.4 6.22 ...
##  $ Sample_12: num  9.21 6.47 7.17 6.78 5.76 ...
##  $ Sample_2 : num  9.05 6.2 7.2 6.54 5.9 ...
##  $ Sample_9 : num  8.89 7.67 6.86 6.72 6.26 ...
sumDat <- as.matrix(t(sumDat))
rownames(sumDat) <- with(sampleinfo, paste(devStage, gType, sidChar, sep = "_"))
str(sumDat)
##  num [1:39, 1:35] 8.3 8.33 8.43 8.49 8.51 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
##   ..$ : chr [1:35] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...
feelingBlue <- colorRampPalette(brewer.pal(n = 7, "Blues"))
heatmap.2(sumDat, col = feelingBlue, trace = "none")

plot of chunk unnamed-chunk-1

Now let's plot Sample 16 against Sample 2 using the stat_binhex function in ggplot2

(overplotting <- ggplot(photoDat, aes(photoDat$Sample_16, photoDat$Sample_2)) + 
    stat_binhex() + theme_bw() + xlab("Expression of Sample 16") + ylab("Expression of Sample 2") + 
    ggtitle("Gene Expression Levels in Two Samples"))

plot of chunk unnamed-chunk-2

Working with a smaller subset of the dataset, we can look at the expression levels of Jenny's favourite eggBomb vs poisonFang probes by Genotype

mini_dat <- readRDS("GSE4051_MINI.rds")
str(mini_dat)
## 'data.frame':    39 obs. of  7 variables:
##  $ sidChar   : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##  $ sidNum    : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ crabHammer: num  10.22 10.02 9.64 9.65 8.58 ...
##  $ eggBomb   : num  7.46 6.89 6.72 6.53 6.47 ...
##  $ poisonFang: num  7.37 7.18 7.35 7.04 7.49 ...
splot <- ggplot(mini_dat, aes(x = eggBomb, y = poisonFang, color = devStage))
(splot <- splot + geom_point() + theme_bw() + xlab("Expression of eggBomb") + 
    ylab("Expression of poisonFang") + ggtitle("Scatterplot of Expression Levels by Genotype") + 
    facet_wrap(~gType))

plot of chunk unnamed-chunk-3

Let's re-shape the data a bit to look at density plots of gene expression by developmental stage

reshaped_dat <- with(mini_dat, data.frame(sidChar, sidNum, devStage, gType, 
    probeset = factor(rep(c("crabHammer", "eggBomb", "poisonFang"), each = nrow(mini_dat))), 
    geneExp = c(crabHammer, eggBomb, poisonFang)))
str(reshaped_dat)
## 'data.frame':    117 obs. of  6 variables:
##  $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ geneExp : num  10.22 10.02 9.64 9.65 8.58 ...
(densplot <- ggplot(reshaped_dat, aes(geneExp, color = devStage)) + stat_density(geom = "line", 
    position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-4