Seminar 3: Introduction to R Graphics (Lattice + ggplot2)

by Erica Acton

Lattice

Load data.

library(lattice)
library(grid)
library(hexbin)

kDat <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_MINI.rds")
str(kDat)
## 'data.frame':    39 obs. of  7 variables:
##  $ sidChar   : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##  $ sidNum    : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ crabHammer: num  10.22 10.02 9.64 9.65 8.58 ...
##  $ eggBomb   : num  7.46 6.89 6.72 6.53 6.47 ...
##  $ poisonFang: num  7.37 7.18 7.35 7.04 7.49 ...
table(kDat$devStage)
## 
##     E16      P2      P6     P10 4_weeks 
##       7       8       8       8       8
table(kDat$gType)
## 
##    wt NrlKO 
##    20    19
with(kDat, table(devStage, gType))
##          gType
## devStage  wt NrlKO
##   E16      4     3
##   P2       4     4
##   P6       4     4
##   P10      4     4
##   4_weeks  4     4

Scatterplots.

Plot gene expression of one gene against another.

xyplot(eggBomb ~ crabHammer, kDat)

plot of chunk unnamed-chunk-2

# You try!
xyplot(poisonFang ~ crabHammer, kDat)

plot of chunk unnamed-chunk-2

Plot the gene expression of one gene against two others at the same time.

xyplot(eggBomb + poisonFang ~ crabHammer, kDat, auto.key = TRUE)

plot of chunk unnamed-chunk-3

Separate the two genes instead via a grid.

xyplot(eggBomb + poisonFang ~ crabHammer, kDat, outer = TRUE, grid = TRUE)

plot of chunk unnamed-chunk-4

Add genotype information.

xyplot(eggBomb + poisonFang ~ crabHammer, kDat, outer = TRUE, grid = TRUE, groups = gType, 
    auto.key = TRUE)

plot of chunk unnamed-chunk-5

Reshape data.

nDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, crabHammer, 
    probeset = factor(rep(c("eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(eggBomb, 
        poisonFang)))
str(nDat)
## 'data.frame':    78 obs. of  7 variables:
##  $ sidChar   : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum    : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ crabHammer: num  10.22 10.02 9.64 9.65 8.58 ...
##  $ probeset  : Factor w/ 2 levels "eggBomb","poisonFang": 1 1 1 1 1 1 1 1 1 1 ...
##  $ geneExp   : num  7.46 6.89 6.72 6.53 6.47 ...

Remake previous plots with panel syntax.

xyplot(geneExp ~ crabHammer | probeset, nDat, grid = TRUE, groups = gType, auto.key = TRUE)

plot of chunk unnamed-chunk-7

# You try!
xyplot(geneExp ~ crabHammer | probeset, nDat, grid = TRUE, groups = devStage, 
    auto.key = TRUE)

plot of chunk unnamed-chunk-7

Stripplots

Futher data reshaping.

oDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, probeset = factor(rep(c("crabHammer", 
    "eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(crabHammer, eggBomb, 
    poisonFang)))
str(oDat)
## 'data.frame':    117 obs. of  6 variables:
##  $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ geneExp : num  10.22 10.02 9.64 9.65 8.58 ...

Univariate stripplot of 1 gene.

stripplot(~geneExp, oDat)

plot of chunk unnamed-chunk-9

Stripplot of 3 genes, and then 3 genes with jitter spacing.

stripplot(probeset ~ geneExp, oDat)

plot of chunk unnamed-chunk-10

stripplot(probeset ~ geneExp, oDat, jitter.data = TRUE)

plot of chunk unnamed-chunk-10

Stripplot of genes separated by panels.

stripplot(~geneExp | probeset, oDat, layout = c(nlevels(oDat$probeset), 1))

plot of chunk unnamed-chunk-11

Stripplot of genes separated by panels with genotype information.

stripplot(~geneExp | probeset, oDat, layout = c(nlevels(oDat$probeset), 1), 
    groups = gType, auto.key = TRUE)

plot of chunk unnamed-chunk-12

Gene expression over the course of development.

stripplot(geneExp ~ devStage, oDat)

plot of chunk unnamed-chunk-13

Gene expression over the course of development for each of the 3 genes.

stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset), 
    1))

plot of chunk unnamed-chunk-14

Gene expression over the course of development for each of the 3 genes with genotype information.

stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset), 
    1), groups = gType, auto.key = TRUE)

plot of chunk unnamed-chunk-15

Gene expression over the course of development for each of the 3 genes with genotype information and the addition of averages.

stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset), 
    1), groups = gType, auto.key = TRUE, grid = TRUE, type = c("p", "a"))

plot of chunk unnamed-chunk-16

Densityplots

Density plot of gene expression.

densityplot(~geneExp, oDat)

plot of chunk unnamed-chunk-17

Density plot of gene expression with genotype displayed in separate panels.

densityplot(~geneExp | gType, oDat, grid = TRUE)

plot of chunk unnamed-chunk-18

Density plot of gene expression with genotype displayed by colour.

densityplot(~geneExp, oDat, groups = gType, auto.key = TRUE)

plot of chunk unnamed-chunk-19

Density plot of gene expression with genotype displayed by colour displaying bandwidth.

Bw <- 0.2
n <- 400
densityplot(~geneExp, oDat, groups = gType, auto.key = TRUE, bw = Bw, n = n, 
    main = paste("bw =", Bw, ", n =", n))

plot of chunk unnamed-chunk-20

# You try!
Bw2 <- 0.4
n2 <- 200
densityplot(~geneExp, oDat, groups = devStage, auto.key = TRUE, bw = Bw2, n = n2, 
    main = paste("bw =", Bw, ", n =", n))

plot of chunk unnamed-chunk-20

Boxplots

Boxplot of gene expression and developmental stage.

bwplot(geneExp ~ devStage, oDat)

plot of chunk unnamed-chunk-21

Boxplot of gene expression and developmental stage by genotype

bwplot(geneExp ~ devStage | gType, oDat)

plot of chunk unnamed-chunk-22

Violin plot.

bwplot(geneExp ~ devStage | gType, oDat, panel = panel.violin)

plot of chunk unnamed-chunk-23

Heatmaps

Load larger dataset.

prDat <- read.table("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_data.tsv")
str(prDat, max.level = 0)
## 'data.frame':    29949 obs. of  39 variables:
prDes <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_design.rds")
str(prDes)
## 'data.frame':    39 obs. of  4 variables:
##  $ sidChar : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##  $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...

Choose a random sample size of 50 probes in a repeatable way. Convert to matrix and add meaningful labels.

set.seed(1)
(yo <- sample(1:nrow(prDat), size = 50))
##  [1]  7952 11145 17156 27198  6040 26902 28287 19786 18837  1850  6167
## [12]  5286 20568 11499 23046 14899 21481 29690 11375 23269 27975  6350
## [23] 19503  3758  7997 11555   401 11442 26023 10184 14424 17938 14766
## [34]  5571 24751 19997 23759  3229 21647 12302 24554 19353 23416 16540
## [45] 15842 23605   698 14271 21897 20713
hDat <- prDat[yo, ]
str(hDat)
## 'data.frame':    50 obs. of  39 variables:
##  $ Sample_20: num  8.3 8.25 6.91 6.79 6.4 ...
##  $ Sample_21: num  8.33 9.14 6.55 6.23 6.69 ...
##  $ Sample_22: num  8.43 8.19 6.59 6.69 6.14 ...
##  $ Sample_23: num  8.49 8.66 6.58 6.34 6.34 ...
##  $ Sample_16: num  8.51 6.66 7.75 7.25 5.5 ...
##  $ Sample_17: num  8.18 7.95 6.85 6.38 7.51 ...
##  $ Sample_6 : num  7.96 8.45 7.42 6.19 7.64 ...
##  $ Sample_24: num  8.34 7.49 7.17 6.84 5.83 ...
##  $ Sample_25: num  8.14 7.39 7.12 7.02 5.85 ...
##  $ Sample_26: num  8.45 6.94 7.46 7.43 6.32 ...
##  $ Sample_27: num  8.25 6.5 7.23 6.91 5.8 ...
##  $ Sample_14: num  8.46 6.99 7.14 6.78 6.29 ...
##  $ Sample_3 : num  8.53 7.14 7.23 6.88 6.15 ...
##  $ Sample_5 : num  8.45 6.71 7.36 6.92 6 ...
##  $ Sample_8 : num  8.62 6.66 7.9 6.97 5.95 ...
##  $ Sample_28: num  8.63 6.46 7.45 7.17 6.03 ...
##  $ Sample_29: num  8.58 7.84 6.72 6.91 6.31 ...
##  $ Sample_30: num  8.28 7.01 6.81 6.75 5.81 ...
##  $ Sample_31: num  8.47 6.88 7.18 6.89 5.79 ...
##  $ Sample_1 : num  8.66 6.81 7.22 6.56 6.03 ...
##  $ Sample_10: num  8.68 7.41 6.97 6.5 5.99 ...
##  $ Sample_4 : num  8.74 7.23 6.91 6.78 5.82 ...
##  $ Sample_7 : num  8.69 6.61 7.43 7.1 5.64 ...
##  $ Sample_32: num  9.7 7.62 6.96 6.89 6.82 ...
##  $ Sample_33: num  8.72 6.83 7.36 7.21 5.93 ...
##  $ Sample_34: num  8.58 6.73 7.63 7.19 5.93 ...
##  $ Sample_35: num  8.54 6.91 7.14 6.71 5.59 ...
##  $ Sample_13: num  8.73 6.83 7.14 6.46 6.25 ...
##  $ Sample_15: num  8.57 6.86 7.12 6.72 5.84 ...
##  $ Sample_18: num  8.96 6.95 6.96 6.31 6.44 ...
##  $ Sample_19: num  8.65 6.69 7.03 6.91 6.32 ...
##  $ Sample_36: num  10.41 7.9 6.57 6.87 6.09 ...
##  $ Sample_37: num  9.48 6.36 6.99 6.85 6.08 ...
##  $ Sample_38: num  10.14 7.24 6.95 6.99 6.13 ...
##  $ Sample_39: num  9.48 6.21 6.97 7.13 6.02 ...
##  $ Sample_11: num  9.09 6.23 7 6.4 6.22 ...
##  $ Sample_12: num  9.21 6.47 7.17 6.78 5.76 ...
##  $ Sample_2 : num  9.05 6.2 7.2 6.54 5.9 ...
##  $ Sample_9 : num  8.89 7.67 6.86 6.72 6.26 ...

hDat <- as.matrix(t(hDat))
rownames(hDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))
str(hDat)
##  num [1:39, 1:50] 8.3 8.33 8.43 8.49 8.51 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
##   ..$ : chr [1:50] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...

Make a heatmap. Immediately make a heatmap with better colours.

heatmap(hDat, Rowv = NA, Colv = NA, scale = "none", margins = c(5, 8))

plot of chunk unnamed-chunk-26

heatmap(hDat, Rowv = NA, Colv = NA, col = cm.colors(256), scale = "none", margins = c(5, 
    8))

plot of chunk unnamed-chunk-26

Load RColorBrewer and try the grey and blue/purple palettes.

library(RColorBrewer)
display.brewer.all()

plot of chunk unnamed-chunk-27

jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Greys"))
jBuPuFun <- colorRampPalette(brewer.pal(n = 9, "BuPu"))
heatmap(hDat, Rowv = NA, Colv = NA, col = jGraysFun(256), scale = "none", margins = c(5, 
    8))

plot of chunk unnamed-chunk-27

heatmap(hDat, Rowv = NA, Colv = NA, col = jBuPuFun(256), scale = "none", margins = c(5, 
    8))

plot of chunk unnamed-chunk-27

Add dendrograms.

heatmap(hDat, col = jBuPuFun(256), margins = c(5, 8))

plot of chunk unnamed-chunk-28

Scale according to column.

heatmap(hDat, col = jBuPuFun(256), margins = c(5, 8), scale = c("column"))

plot of chunk unnamed-chunk-29

Overplotting

Select 2 random, but repeatable, samples to plot against each other.

set.seed(924)
(yo <- sample(1:ncol(prDat), size = 2))
## [1] 25 24
y <- prDat[[yo[1]]]
z <- prDat[[yo[2]]]
str(y)
##  num [1:29949] 7.01 8.97 9.22 8.42 8.53 ...
str(z)
##  num [1:29949] 7.54 9.53 9.92 8.78 8.57 ...
xyplot(y ~ z, asp = 1)

plot of chunk unnamed-chunk-30

Explore the smoothscatter plotting function, or by specifying in 'panel' from lattice.

smoothScatter(y ~ z, asp = 1)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009

plot of chunk unnamed-chunk-31

xyplot(y ~ z, asp = 1, panel = panel.smoothScatter, nbin = 150)

plot of chunk unnamed-chunk-31

Explore hexagonal binning.

hexbinplot(y ~ z)

plot of chunk unnamed-chunk-32

Plot Matrix

Take a larger sample of columns. Create pairwise plots.

set.seed(3)
(yo <- sample(1:ncol(prDat), size = 4))
## [1]  7 31 15 12
pairDat <- subset(prDat, select = yo)
str(pairDat)
## 'data.frame':    29949 obs. of  4 variables:
##  $ Sample_6 : num  7.24 9.71 10.17 8.84 8.54 ...
##  $ Sample_19: num  7.21 9.21 9.59 8.31 8.31 ...
##  $ Sample_8 : num  7.11 8.24 9.13 8.13 8.33 ...
##  $ Sample_14: num  7.09 9.56 9.88 8.57 8.59 ...
pairs(pairDat)

plot of chunk unnamed-chunk-33

Add smoothscatter.

pairs(pairDat, panel = function(...) smoothScatter(..., add = TRUE))

plot of chunk unnamed-chunk-34

Explore splom() and splom() with smoothscatter. This is useful for high-volume scatterplotting.

splom(pairDat)

plot of chunk unnamed-chunk-35

splom(pairDat, panel = panel.smoothScatter, raster = TRUE)

plot of chunk unnamed-chunk-35

Explore hexplom().

hexplom(pairDat)

plot of chunk unnamed-chunk-36

ggplot2

Load and explore basic terminology.

library(ggplot2)
apropos("^geom_")
##  [1] "geom_abline"     "geom_area"       "geom_bar"       
##  [4] "geom_bin2d"      "geom_blank"      "geom_boxplot"   
##  [7] "geom_contour"    "geom_crossbar"   "geom_density"   
## [10] "geom_density2d"  "geom_dotplot"    "geom_errorbar"  
## [13] "geom_errorbarh"  "geom_freqpoly"   "geom_hex"       
## [16] "geom_histogram"  "geom_hline"      "geom_jitter"    
## [19] "geom_line"       "geom_linerange"  "geom_map"       
## [22] "geom_path"       "geom_point"      "geom_pointrange"
## [25] "geom_polygon"    "geom_quantile"   "geom_raster"    
## [28] "geom_rect"       "geom_ribbon"     "geom_rug"       
## [31] "geom_segment"    "geom_smooth"     "geom_step"      
## [34] "geom_text"       "geom_tile"       "geom_violin"    
## [37] "geom_vline"
apropos("^stat_")
##  [1] "stat_abline"      "stat_bin"         "stat_bin2d"      
##  [4] "stat_bindot"      "stat_binhex"      "stat_boxplot"    
##  [7] "stat_contour"     "stat_density"     "stat_density2d"  
## [10] "stat_ecdf"        "stat_function"    "stat_hline"      
## [13] "stat_identity"    "stat_qq"          "stat_quantile"   
## [16] "stat_smooth"      "stat_spoke"       "stat_sum"        
## [19] "stat_summary"     "stat_summary2d"   "stat_summary_hex"
## [22] "stat_unique"      "stat_vline"       "stat_ydensity"
apropos("^scale_")
##  [1] "scale_alpha"               "scale_alpha_continuous"   
##  [3] "scale_alpha_discrete"      "scale_alpha_identity"     
##  [5] "scale_alpha_manual"        "scale_area"               
##  [7] "scale_color_brewer"        "scale_color_continuous"   
##  [9] "scale_color_discrete"      "scale_color_gradient"     
## [11] "scale_color_gradient2"     "scale_color_gradientn"    
## [13] "scale_color_grey"          "scale_color_hue"          
## [15] "scale_color_identity"      "scale_color_manual"       
## [17] "scale_colour_brewer"       "scale_colour_continuous"  
## [19] "scale_colour_discrete"     "scale_colour_gradient"    
## [21] "scale_colour_gradient2"    "scale_colour_gradientn"   
## [23] "scale_colour_grey"         "scale_colour_hue"         
## [25] "scale_colour_identity"     "scale_colour_manual"      
## [27] "scale_fill_brewer"         "scale_fill_continuous"    
## [29] "scale_fill_discrete"       "scale_fill_gradient"      
## [31] "scale_fill_gradient2"      "scale_fill_gradientn"     
## [33] "scale_fill_grey"           "scale_fill_hue"           
## [35] "scale_fill_identity"       "scale_fill_manual"        
## [37] "scale_linetype"            "scale_linetype_continuous"
## [39] "scale_linetype_discrete"   "scale_linetype_identity"  
## [41] "scale_linetype_manual"     "scale_shape"              
## [43] "scale_shape_continuous"    "scale_shape_discrete"     
## [45] "scale_shape_identity"      "scale_shape_manual"       
## [47] "scale_size"                "scale_size_area"          
## [49] "scale_size_continuous"     "scale_size_discrete"      
## [51] "scale_size_identity"       "scale_size_manual"        
## [53] "scale_x_continuous"        "scale_x_date"             
## [55] "scale_x_datetime"          "scale_x_discrete"         
## [57] "scale_x_log10"             "scale_x_reverse"          
## [59] "scale_x_sqrt"              "scale_y_continuous"       
## [61] "scale_y_date"              "scale_y_datetime"         
## [63] "scale_y_discrete"          "scale_y_log10"            
## [65] "scale_y_reverse"           "scale_y_sqrt"

Load data. Add a few tables for info about data set.

kDat <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_MINI.rds")
str(kDat)
## 'data.frame':    39 obs. of  7 variables:
##  $ sidChar   : chr  "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##  $ sidNum    : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ crabHammer: num  10.22 10.02 9.64 9.65 8.58 ...
##  $ eggBomb   : num  7.46 6.89 6.72 6.53 6.47 ...
##  $ poisonFang: num  7.37 7.18 7.35 7.04 7.49 ...

table(kDat$devStage)
## 
##     E16      P2      P6     P10 4_weeks 
##       7       8       8       8       8
table(kDat$gType)
## 
##    wt NrlKO 
##    20    19
with(kDat, table(devStage, gType))
##          gType
## devStage  wt NrlKO
##   E16      4     3
##   P2       4     4
##   P6       4     4
##   P10      4     4
##   4_weeks  4     4

qplot

Make a (basic) qplot.

qplot(crabHammer, eggBomb, data = kDat)

plot of chunk unnamed-chunk-39

Make a scatterplot. Information is added to ggplot2 in layers. Such as labels, titles, and smoothing lines.

p <- ggplot(kDat, aes(x = crabHammer, y = eggBomb))
str(p)
## List of 9
##  $ data       :'data.frame': 39 obs. of  7 variables:
##   ..$ sidChar   : chr [1:39] "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
##   ..$ sidNum    : num [1:39] 20 21 22 23 16 17 6 24 25 26 ...
##   ..$ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##   ..$ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##   ..$ crabHammer: num [1:39] 10.22 10.02 9.64 9.65 8.58 ...
##   ..$ eggBomb   : num [1:39] 7.46 6.89 6.72 6.53 6.47 ...
##   ..$ poisonFang: num [1:39] 7.37 7.18 7.35 7.04 7.49 ...
##  $ layers     : list()
##  $ scales     :Reference class 'Scales' [package "ggplot2"] with 1 fields
##   ..$ scales: NULL
##   ..and 21 methods, of which 9 are possibly relevant:
##   ..  add, clone, find, get_scales, has_scale, initialize, input, n,
##   ..  non_position_scales
##  $ mapping    :List of 2
##   ..$ x: symbol crabHammer
##   ..$ y: symbol eggBomb
##  $ theme      : list()
##  $ coordinates:List of 1
##   ..$ limits:List of 2
##   .. ..$ x: NULL
##   .. ..$ y: NULL
##   ..- attr(*, "class")= chr [1:2] "cartesian" "coord"
##  $ facet      :List of 1
##   ..$ shrink: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "null" "facet"
##  $ plot_env   :<environment: R_GlobalEnv> 
##  $ labels     :List of 2
##   ..$ x: chr "crabHammer"
##   ..$ y: chr "eggBomb"
##  - attr(*, "class")= chr [1:2] "gg" "ggplot"

(p <- p + geom_point())

plot of chunk unnamed-chunk-40

(p <- p + stat_smooth())
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-40

(p <- p + theme_bw() + xlab("Expression of crabHammer") + ylab("Expression of eggBomb") + 
    ggtitle("Scatterplot for expression levels"))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-40

Reshape data.

nDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, crabHammer, 
    probeset = factor(rep(c("eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(eggBomb, 
        poisonFang)))
str(nDat)
## 'data.frame':    78 obs. of  7 variables:
##  $ sidChar   : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum    : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage  : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType     : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ crabHammer: num  10.22 10.02 9.64 9.65 8.58 ...
##  $ probeset  : Factor w/ 2 levels "eggBomb","poisonFang": 1 1 1 1 1 1 1 1 1 1 ...
##  $ geneExp   : num  7.46 6.89 6.72 6.53 6.47 ...

Gene expression of poisonFang and eggBomb against crabHammer using color.

(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point())

plot of chunk unnamed-chunk-42

Add a smoothing line.

(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point() + 
    stat_smooth(se = F))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-43

Change the line so that it no longer defining a line for each y-axis probeset, but for the entire set.

(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point() + 
    stat_smooth(se = F, aes(group = 1)))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-44

Use facetting to plot probesets in separate panels.

(p <- ggplot(nDat, aes(crabHammer, geneExp)) + geom_point() + facet_wrap(~probeset))

plot of chunk unnamed-chunk-45

Now color can be used to display genotype.

(p <- ggplot(nDat, aes(crabHammer, geneExp, color = gType)) + geom_point() + 
    facet_wrap(~probeset))

plot of chunk unnamed-chunk-46


# You try! Use color for developmental stage instead of genotype!
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = devStage)) + geom_point() + 
    facet_wrap(~probeset))

plot of chunk unnamed-chunk-46

Stripplots

Reshape data.

oDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, probeset = factor(rep(c("crabHammer", 
    "eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(crabHammer, eggBomb, 
    poisonFang)))
str(oDat)
## 'data.frame':    117 obs. of  6 variables:
##  $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum  : num  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ gType   : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
##  $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ geneExp : num  10.22 10.02 9.64 9.65 8.58 ...

Plot the expression level of each gene. Then add jitter for spacing.

(p <- ggplot(oDat, aes(geneExp, probeset)) + geom_point())

plot of chunk unnamed-chunk-48


(p <- ggplot(oDat, aes(geneExp, probeset)) + geom_point(position = position_jitter(height = 0.1)))

plot of chunk unnamed-chunk-48

Plot the expression level according to developmental stage.

(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_point())

plot of chunk unnamed-chunk-49

Separate our 3 genes of interest with panels.

(p <- p + facet_wrap(~probeset))

plot of chunk unnamed-chunk-50

Add genotype information with color.

(p <- p + aes(color = gType))

plot of chunk unnamed-chunk-51

Add in averages.

(p <- p + stat_summary(fun.y = mean, geom = "point", shape = 4, size = 4))

plot of chunk unnamed-chunk-52

Density plots

Test out the 2 kinds of density plots in ggplot2: geom_density and stat_density.

(p <- ggplot(oDat, aes(geneExp)) + geom_density())

plot of chunk unnamed-chunk-53


(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity"))

plot of chunk unnamed-chunk-53

Adding our points in:

(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity") + 
    geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-54

Change the bandwidth.

(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity", 
    adjust = 0.5) + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-55

Use panels to separate by genotype, or use color.

(p <- p + facet_wrap(~gType))

plot of chunk unnamed-chunk-56


(p <- ggplot(oDat, aes(geneExp, color = gType)) + stat_density(geom = "line", 
    position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-56


# You try!  Explore gene expression via developmental stage.
(p <- ggplot(oDat, aes(geneExp, color = devStage)) + stat_density(geom = "line", 
    position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-56

Boxplots

Make a boxplot.

(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_boxplot())

plot of chunk unnamed-chunk-57

Separate the genotypes with panels.

(p <- p + facet_wrap(~gType))

plot of chunk unnamed-chunk-58

Make a violin plot.

(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_violin())

plot of chunk unnamed-chunk-59

Overplotting and plot matrix

Pick two random samples to plot against each other from the large dataset. Put them in a data frame.

set.seed(2)
(yo <- sample(1:ncol(prDat), size = 2))
## [1]  8 27

bDat <- data.frame(y = prDat[[yo[1]]], z = prDat[[yo[2]]])
str(bDat)
## 'data.frame':    29949 obs. of  2 variables:
##  $ y: num  7.11 9.75 9.39 8.37 8.36 ...
##  $ z: num  7.15 9.22 10.06 8.35 8.45 ...

Plot samples against each other (scatterplot).

(p <- ggplot(bDat, aes(z, y)) + geom_point())

plot of chunk unnamed-chunk-61

Reduce the transparency of the data points (similar to smoothscatter).

(p <- ggplot(bDat, aes(z, y)) + geom_point(alpha = 0.1))

plot of chunk unnamed-chunk-62

Plot using the 2D density function.

(p <- ggplot(bDat, aes(z, y)) + stat_density2d())

plot of chunk unnamed-chunk-63

Use colors instead of lines to display the density.

(p <- ggplot(bDat, aes(z, y)) + stat_density2d(geom = "tile", contour = F, aes(fill = ..density..)) + 
    scale_fill_gradient(low = "white", high = "blue"))

plot of chunk unnamed-chunk-64

ggplot2 version of hexbin:

(p <- ggplot(bDat, aes(z, y)) + stat_binhex())

plot of chunk unnamed-chunk-65

Larger sample size to address overplotting using plotmatrix.

set.seed(3)
(yo <- sample(1:ncol(prDat), size = 4))
## [1]  7 31 15 12
pairDat <- subset(prDat, select = yo)
str(pairDat)
## 'data.frame':    29949 obs. of  4 variables:
##  $ Sample_6 : num  7.24 9.71 10.17 8.84 8.54 ...
##  $ Sample_19: num  7.21 9.21 9.59 8.31 8.31 ...
##  $ Sample_8 : num  7.11 8.24 9.13 8.13 8.33 ...
##  $ Sample_14: num  7.09 9.56 9.88 8.57 8.59 ...

(p <- plotmatrix(pairDat) + stat_binhex())
## This function is deprecated. For a replacement, see the ggpairs function in the GGally package. (Deprecated; last used in version 0.9.2)

plot of chunk unnamed-chunk-66

Heatmaps

Make a beautiful, beautiful heatmap after transforming data to tall format.

set.seed(1)
yo <- sample(1:nrow(prDat), size = 50)
hDat <- prDat[yo, ]
colnames(hDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))

prDatTall <- data.frame(sample = rep(colnames(hDat), each = nrow(hDat)), probe = rownames(hDat), 
    expression = unlist(hDat))

ggplot(prDatTall, aes(x = probe, y = sample, fill = expression)) + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1, vjust = 0.5)) + geom_tile() + scale_fill_gradient2(low = jBuPuFun(256)[1], 
    mid = jBuPuFun(256)[256/2], high = jBuPuFun(256)[256], midpoint = (max(prDatTall$expression) + 
        min(prDatTall$expression))/2, name = "Expression")

plot of chunk unnamed-chunk-67

Exercise:

The full photoRec dataset has 39 samples and 29949 probesets. Choose 2 … or 20 … or 200 random probesets/genes and look for gene expression differences between the two genotypes, wild type versus knockout. Make use of the graphing techniques discussed this week such as scatter plots, data heatmaps, correlation heatmaps, etc.

Take a subset of 77 random probes and open a can of plotting whoopass.

set.seed(777)
(exDat <- sample(1:nrow(prDat), size = 77))
##  [1] 20601 14741 10336 29798 20820   321 10331  5152 28425  7461 21940
## [12] 19768 17373 17806 25932  3111 12522 25967 10547 11668 11387 19223
## [23] 15610  5319   898 23149 14559 16715 29610 21003 24970 21745 24910
## [34]  9890  8771  9243 22404  8304 20691 19257   633  8418 17648  9910
## [45]  6484 29576 26871 27075 27651  4530 19893 10746  1213  4803 14646
## [56]  4624  2043 28614  8790   615 22292  5917 24049   177 26737  1909
## [67]  3355  7663 27973 18342  3234 23101 28447 12263  9180 27089  8409
subDat <- prDat[exDat, ]
str(subDat)
## 'data.frame':    77 obs. of  39 variables:
##  $ Sample_20: num  8.69 6.72 8.13 6.4 6.56 ...
##  $ Sample_21: num  8.75 6.49 7.54 7.34 6.6 ...
##  $ Sample_22: num  8.24 6.51 7.88 6.78 6.41 ...
##  $ Sample_23: num  8.28 6.45 7.95 6.93 6.33 ...
##  $ Sample_16: num  8.31 6.94 8.76 6.26 6.73 ...
##  $ Sample_17: num  9.2 6.42 7.75 6.92 6.3 ...
##  $ Sample_6 : num  9.28 6.72 7.79 7.25 6.55 ...
##  $ Sample_24: num  8.2 6.79 8.17 6.6 6.46 ...
##  $ Sample_25: num  8.2 6.69 8.2 6.72 6.49 ...
##  $ Sample_26: num  8.34 6.64 8.39 6.4 6.77 ...
##  $ Sample_27: num  8.39 6.84 8.26 6.6 6.51 ...
##  $ Sample_14: num  8.18 6.57 8.28 6.56 6.51 ...
##  $ Sample_3 : num  8.08 6.54 8.49 6.44 6.52 ...
##  $ Sample_5 : num  8.07 6.69 8.51 6.4 6.65 ...
##  $ Sample_8 : num  8.2 6.71 8.6 6.47 6.69 ...
##  $ Sample_28: num  7.69 6.82 8.32 6.34 6.62 ...
##  $ Sample_29: num  9.16 6.71 7.93 6.71 6.91 ...
##  $ Sample_30: num  8.32 6.61 7.96 6.8 6.44 ...
##  $ Sample_31: num  7.73 6.73 8.22 6.66 6.5 ...
##  $ Sample_1 : num  7.5 6.89 8.23 6.51 6.56 ...
##  $ Sample_10: num  7.84 6.63 7.99 7 6.36 ...
##  $ Sample_4 : num  7.63 6.75 8.3 6.56 6.45 ...
##  $ Sample_7 : num  7.99 6.89 8.39 6.39 6.87 ...
##  $ Sample_32: num  8.67 6.84 7.62 7.3 7.88 ...
##  $ Sample_33: num  8.49 6.93 8.24 6.44 6.72 ...
##  $ Sample_34: num  8.26 7.13 8.35 6.37 6.74 ...
##  $ Sample_35: num  8 6.73 8.32 6.87 6.58 ...
##  $ Sample_13: num  8.01 7.09 7.93 6.6 6.71 ...
##  $ Sample_15: num  7.87 7.24 8.13 6.46 6.37 ...
##  $ Sample_18: num  7.72 7.36 7.81 6.87 6.57 ...
##  $ Sample_19: num  7.93 7.1 8.23 6.48 6.72 ...
##  $ Sample_36: num  8.72 6.8 7.7 8.31 7.63 ...
##  $ Sample_37: num  8.82 6.59 8.06 7.42 7.03 ...
##  $ Sample_38: num  9.02 6.67 8.02 7.78 7.39 ...
##  $ Sample_39: num  8.87 6.57 8.06 7.54 7.37 ...
##  $ Sample_11: num  8.07 6.81 7.97 6.59 6.79 ...
##  $ Sample_12: num  7.97 7 8.31 6.51 6.71 ...
##  $ Sample_2 : num  8.3 6.93 8.07 6.69 7.05 ...
##  $ Sample_9 : num  8.11 6.77 7.73 6.81 6.59 ...

Make a heatmap with lattice.

hsubDat <- as.matrix(t(subDat))
rownames(hsubDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))

heatmap(hsubDat, col = jBuPuFun(256), scale = c("column"), margins = c(5, 8))

plot of chunk unnamed-chunk-69

Make a heatmap with ggplot2.

colnames(subDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))

prDatTall <- data.frame(sample = rep(colnames(subDat), each = nrow(subDat)), 
    probe = rownames(subDat), expression = unlist(subDat))

ggplot(prDatTall, aes(x = probe, y = sample, fill = expression)) + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1, vjust = 0.5)) + geom_tile() + scale_fill_gradient2(low = jBuPuFun(256)[1], 
    mid = jBuPuFun(256)[256/2], high = jBuPuFun(256)[256], midpoint = (max(prDatTall$expression) + 
        min(prDatTall$expression))/2, name = "Expression")

plot of chunk unnamed-chunk-70

Make a density plot with lattice.

resDat <- data.frame(sample = rep(names(subDat), each = 77), gene = LETTERS[1:77], 
    geneExp = unlist(subDat))

Bw3 <- 0.5
n3 <- 500
densityplot(~geneExp, resDat, auto.key = TRUE, bw = Bw3, n = n3, main = paste("bw =", 
    Bw3, ", n =", n3))

plot of chunk unnamed-chunk-71

Make a density plot with ggplot2.

(p <- ggplot(resDat, aes(geneExp)) + stat_density(geom = "line", position = "identity") + 
    geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))

plot of chunk unnamed-chunk-72

Be crazy and use splom().

splom(subDat, panel = panel.smoothScatter, raster = TRUE)

plot of chunk unnamed-chunk-73