by Erica Acton
Load data.
library(lattice)
library(grid)
library(hexbin)
kDat <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_MINI.rds")
str(kDat)
## 'data.frame': 39 obs. of 7 variables:
## $ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ crabHammer: num 10.22 10.02 9.64 9.65 8.58 ...
## $ eggBomb : num 7.46 6.89 6.72 6.53 6.47 ...
## $ poisonFang: num 7.37 7.18 7.35 7.04 7.49 ...
table(kDat$devStage)
##
## E16 P2 P6 P10 4_weeks
## 7 8 8 8 8
table(kDat$gType)
##
## wt NrlKO
## 20 19
with(kDat, table(devStage, gType))
## gType
## devStage wt NrlKO
## E16 4 3
## P2 4 4
## P6 4 4
## P10 4 4
## 4_weeks 4 4
Plot gene expression of one gene against another.
xyplot(eggBomb ~ crabHammer, kDat)
# You try!
xyplot(poisonFang ~ crabHammer, kDat)
Plot the gene expression of one gene against two others at the same time.
xyplot(eggBomb + poisonFang ~ crabHammer, kDat, auto.key = TRUE)
Separate the two genes instead via a grid.
xyplot(eggBomb + poisonFang ~ crabHammer, kDat, outer = TRUE, grid = TRUE)
Add genotype information.
xyplot(eggBomb + poisonFang ~ crabHammer, kDat, outer = TRUE, grid = TRUE, groups = gType,
auto.key = TRUE)
Reshape data.
nDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, crabHammer,
probeset = factor(rep(c("eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(eggBomb,
poisonFang)))
str(nDat)
## 'data.frame': 78 obs. of 7 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ crabHammer: num 10.22 10.02 9.64 9.65 8.58 ...
## $ probeset : Factor w/ 2 levels "eggBomb","poisonFang": 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 7.46 6.89 6.72 6.53 6.47 ...
Remake previous plots with panel syntax.
xyplot(geneExp ~ crabHammer | probeset, nDat, grid = TRUE, groups = gType, auto.key = TRUE)
# You try!
xyplot(geneExp ~ crabHammer | probeset, nDat, grid = TRUE, groups = devStage,
auto.key = TRUE)
Futher data reshaping.
oDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, probeset = factor(rep(c("crabHammer",
"eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(crabHammer, eggBomb,
poisonFang)))
str(oDat)
## 'data.frame': 117 obs. of 6 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 10.22 10.02 9.64 9.65 8.58 ...
Univariate stripplot of 1 gene.
stripplot(~geneExp, oDat)
Stripplot of 3 genes, and then 3 genes with jitter spacing.
stripplot(probeset ~ geneExp, oDat)
stripplot(probeset ~ geneExp, oDat, jitter.data = TRUE)
Stripplot of genes separated by panels.
stripplot(~geneExp | probeset, oDat, layout = c(nlevels(oDat$probeset), 1))
Stripplot of genes separated by panels with genotype information.
stripplot(~geneExp | probeset, oDat, layout = c(nlevels(oDat$probeset), 1),
groups = gType, auto.key = TRUE)
Gene expression over the course of development.
stripplot(geneExp ~ devStage, oDat)
Gene expression over the course of development for each of the 3 genes.
stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset),
1))
Gene expression over the course of development for each of the 3 genes with genotype information.
stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset),
1), groups = gType, auto.key = TRUE)
Gene expression over the course of development for each of the 3 genes with genotype information and the addition of averages.
stripplot(geneExp ~ devStage | probeset, oDat, layout = c(nlevels(oDat$probeset),
1), groups = gType, auto.key = TRUE, grid = TRUE, type = c("p", "a"))
Density plot of gene expression.
densityplot(~geneExp, oDat)
Density plot of gene expression with genotype displayed in separate panels.
densityplot(~geneExp | gType, oDat, grid = TRUE)
Density plot of gene expression with genotype displayed by colour.
densityplot(~geneExp, oDat, groups = gType, auto.key = TRUE)
Density plot of gene expression with genotype displayed by colour displaying bandwidth.
Bw <- 0.2
n <- 400
densityplot(~geneExp, oDat, groups = gType, auto.key = TRUE, bw = Bw, n = n,
main = paste("bw =", Bw, ", n =", n))
# You try!
Bw2 <- 0.4
n2 <- 200
densityplot(~geneExp, oDat, groups = devStage, auto.key = TRUE, bw = Bw2, n = n2,
main = paste("bw =", Bw, ", n =", n))
Boxplot of gene expression and developmental stage.
bwplot(geneExp ~ devStage, oDat)
Boxplot of gene expression and developmental stage by genotype
bwplot(geneExp ~ devStage | gType, oDat)
Violin plot.
bwplot(geneExp ~ devStage | gType, oDat, panel = panel.violin)
Load larger dataset.
prDat <- read.table("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_data.tsv")
str(prDat, max.level = 0)
## 'data.frame': 29949 obs. of 39 variables:
prDes <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_design.rds")
str(prDes)
## 'data.frame': 39 obs. of 4 variables:
## $ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
Choose a random sample size of 50 probes in a repeatable way. Convert to matrix and add meaningful labels.
set.seed(1)
(yo <- sample(1:nrow(prDat), size = 50))
## [1] 7952 11145 17156 27198 6040 26902 28287 19786 18837 1850 6167
## [12] 5286 20568 11499 23046 14899 21481 29690 11375 23269 27975 6350
## [23] 19503 3758 7997 11555 401 11442 26023 10184 14424 17938 14766
## [34] 5571 24751 19997 23759 3229 21647 12302 24554 19353 23416 16540
## [45] 15842 23605 698 14271 21897 20713
hDat <- prDat[yo, ]
str(hDat)
## 'data.frame': 50 obs. of 39 variables:
## $ Sample_20: num 8.3 8.25 6.91 6.79 6.4 ...
## $ Sample_21: num 8.33 9.14 6.55 6.23 6.69 ...
## $ Sample_22: num 8.43 8.19 6.59 6.69 6.14 ...
## $ Sample_23: num 8.49 8.66 6.58 6.34 6.34 ...
## $ Sample_16: num 8.51 6.66 7.75 7.25 5.5 ...
## $ Sample_17: num 8.18 7.95 6.85 6.38 7.51 ...
## $ Sample_6 : num 7.96 8.45 7.42 6.19 7.64 ...
## $ Sample_24: num 8.34 7.49 7.17 6.84 5.83 ...
## $ Sample_25: num 8.14 7.39 7.12 7.02 5.85 ...
## $ Sample_26: num 8.45 6.94 7.46 7.43 6.32 ...
## $ Sample_27: num 8.25 6.5 7.23 6.91 5.8 ...
## $ Sample_14: num 8.46 6.99 7.14 6.78 6.29 ...
## $ Sample_3 : num 8.53 7.14 7.23 6.88 6.15 ...
## $ Sample_5 : num 8.45 6.71 7.36 6.92 6 ...
## $ Sample_8 : num 8.62 6.66 7.9 6.97 5.95 ...
## $ Sample_28: num 8.63 6.46 7.45 7.17 6.03 ...
## $ Sample_29: num 8.58 7.84 6.72 6.91 6.31 ...
## $ Sample_30: num 8.28 7.01 6.81 6.75 5.81 ...
## $ Sample_31: num 8.47 6.88 7.18 6.89 5.79 ...
## $ Sample_1 : num 8.66 6.81 7.22 6.56 6.03 ...
## $ Sample_10: num 8.68 7.41 6.97 6.5 5.99 ...
## $ Sample_4 : num 8.74 7.23 6.91 6.78 5.82 ...
## $ Sample_7 : num 8.69 6.61 7.43 7.1 5.64 ...
## $ Sample_32: num 9.7 7.62 6.96 6.89 6.82 ...
## $ Sample_33: num 8.72 6.83 7.36 7.21 5.93 ...
## $ Sample_34: num 8.58 6.73 7.63 7.19 5.93 ...
## $ Sample_35: num 8.54 6.91 7.14 6.71 5.59 ...
## $ Sample_13: num 8.73 6.83 7.14 6.46 6.25 ...
## $ Sample_15: num 8.57 6.86 7.12 6.72 5.84 ...
## $ Sample_18: num 8.96 6.95 6.96 6.31 6.44 ...
## $ Sample_19: num 8.65 6.69 7.03 6.91 6.32 ...
## $ Sample_36: num 10.41 7.9 6.57 6.87 6.09 ...
## $ Sample_37: num 9.48 6.36 6.99 6.85 6.08 ...
## $ Sample_38: num 10.14 7.24 6.95 6.99 6.13 ...
## $ Sample_39: num 9.48 6.21 6.97 7.13 6.02 ...
## $ Sample_11: num 9.09 6.23 7 6.4 6.22 ...
## $ Sample_12: num 9.21 6.47 7.17 6.78 5.76 ...
## $ Sample_2 : num 9.05 6.2 7.2 6.54 5.9 ...
## $ Sample_9 : num 8.89 7.67 6.86 6.72 6.26 ...
hDat <- as.matrix(t(hDat))
rownames(hDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))
str(hDat)
## num [1:39, 1:50] 8.3 8.33 8.43 8.49 8.51 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
## ..$ : chr [1:50] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...
Make a heatmap. Immediately make a heatmap with better colours.
heatmap(hDat, Rowv = NA, Colv = NA, scale = "none", margins = c(5, 8))
heatmap(hDat, Rowv = NA, Colv = NA, col = cm.colors(256), scale = "none", margins = c(5,
8))
Load RColorBrewer and try the grey and blue/purple palettes.
library(RColorBrewer)
display.brewer.all()
jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Greys"))
jBuPuFun <- colorRampPalette(brewer.pal(n = 9, "BuPu"))
heatmap(hDat, Rowv = NA, Colv = NA, col = jGraysFun(256), scale = "none", margins = c(5,
8))
heatmap(hDat, Rowv = NA, Colv = NA, col = jBuPuFun(256), scale = "none", margins = c(5,
8))
Add dendrograms.
heatmap(hDat, col = jBuPuFun(256), margins = c(5, 8))
Scale according to column.
heatmap(hDat, col = jBuPuFun(256), margins = c(5, 8), scale = c("column"))
Select 2 random, but repeatable, samples to plot against each other.
set.seed(924)
(yo <- sample(1:ncol(prDat), size = 2))
## [1] 25 24
y <- prDat[[yo[1]]]
z <- prDat[[yo[2]]]
str(y)
## num [1:29949] 7.01 8.97 9.22 8.42 8.53 ...
str(z)
## num [1:29949] 7.54 9.53 9.92 8.78 8.57 ...
xyplot(y ~ z, asp = 1)
Explore the smoothscatter plotting function, or by specifying in 'panel' from lattice.
smoothScatter(y ~ z, asp = 1)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
xyplot(y ~ z, asp = 1, panel = panel.smoothScatter, nbin = 150)
Explore hexagonal binning.
hexbinplot(y ~ z)
Take a larger sample of columns. Create pairwise plots.
set.seed(3)
(yo <- sample(1:ncol(prDat), size = 4))
## [1] 7 31 15 12
pairDat <- subset(prDat, select = yo)
str(pairDat)
## 'data.frame': 29949 obs. of 4 variables:
## $ Sample_6 : num 7.24 9.71 10.17 8.84 8.54 ...
## $ Sample_19: num 7.21 9.21 9.59 8.31 8.31 ...
## $ Sample_8 : num 7.11 8.24 9.13 8.13 8.33 ...
## $ Sample_14: num 7.09 9.56 9.88 8.57 8.59 ...
pairs(pairDat)
Add smoothscatter.
pairs(pairDat, panel = function(...) smoothScatter(..., add = TRUE))
Explore splom() and splom() with smoothscatter. This is useful for high-volume scatterplotting.
splom(pairDat)
splom(pairDat, panel = panel.smoothScatter, raster = TRUE)
Explore hexplom().
hexplom(pairDat)
Load and explore basic terminology.
library(ggplot2)
apropos("^geom_")
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin2d" "geom_blank" "geom_boxplot"
## [7] "geom_contour" "geom_crossbar" "geom_density"
## [10] "geom_density2d" "geom_dotplot" "geom_errorbar"
## [13] "geom_errorbarh" "geom_freqpoly" "geom_hex"
## [16] "geom_histogram" "geom_hline" "geom_jitter"
## [19] "geom_line" "geom_linerange" "geom_map"
## [22] "geom_path" "geom_point" "geom_pointrange"
## [25] "geom_polygon" "geom_quantile" "geom_raster"
## [28] "geom_rect" "geom_ribbon" "geom_rug"
## [31] "geom_segment" "geom_smooth" "geom_step"
## [34] "geom_text" "geom_tile" "geom_violin"
## [37] "geom_vline"
apropos("^stat_")
## [1] "stat_abline" "stat_bin" "stat_bin2d"
## [4] "stat_bindot" "stat_binhex" "stat_boxplot"
## [7] "stat_contour" "stat_density" "stat_density2d"
## [10] "stat_ecdf" "stat_function" "stat_hline"
## [13] "stat_identity" "stat_qq" "stat_quantile"
## [16] "stat_smooth" "stat_spoke" "stat_sum"
## [19] "stat_summary" "stat_summary2d" "stat_summary_hex"
## [22] "stat_unique" "stat_vline" "stat_ydensity"
apropos("^scale_")
## [1] "scale_alpha" "scale_alpha_continuous"
## [3] "scale_alpha_discrete" "scale_alpha_identity"
## [5] "scale_alpha_manual" "scale_area"
## [7] "scale_color_brewer" "scale_color_continuous"
## [9] "scale_color_discrete" "scale_color_gradient"
## [11] "scale_color_gradient2" "scale_color_gradientn"
## [13] "scale_color_grey" "scale_color_hue"
## [15] "scale_color_identity" "scale_color_manual"
## [17] "scale_colour_brewer" "scale_colour_continuous"
## [19] "scale_colour_discrete" "scale_colour_gradient"
## [21] "scale_colour_gradient2" "scale_colour_gradientn"
## [23] "scale_colour_grey" "scale_colour_hue"
## [25] "scale_colour_identity" "scale_colour_manual"
## [27] "scale_fill_brewer" "scale_fill_continuous"
## [29] "scale_fill_discrete" "scale_fill_gradient"
## [31] "scale_fill_gradient2" "scale_fill_gradientn"
## [33] "scale_fill_grey" "scale_fill_hue"
## [35] "scale_fill_identity" "scale_fill_manual"
## [37] "scale_linetype" "scale_linetype_continuous"
## [39] "scale_linetype_discrete" "scale_linetype_identity"
## [41] "scale_linetype_manual" "scale_shape"
## [43] "scale_shape_continuous" "scale_shape_discrete"
## [45] "scale_shape_identity" "scale_shape_manual"
## [47] "scale_size" "scale_size_area"
## [49] "scale_size_continuous" "scale_size_discrete"
## [51] "scale_size_identity" "scale_size_manual"
## [53] "scale_x_continuous" "scale_x_date"
## [55] "scale_x_datetime" "scale_x_discrete"
## [57] "scale_x_log10" "scale_x_reverse"
## [59] "scale_x_sqrt" "scale_y_continuous"
## [61] "scale_y_date" "scale_y_datetime"
## [63] "scale_y_discrete" "scale_y_log10"
## [65] "scale_y_reverse" "scale_y_sqrt"
Load data. Add a few tables for info about data set.
kDat <- readRDS("/home/eacton/R/stat540-2014-ACTON-ERICA/GSE4051_MINI.rds")
str(kDat)
## 'data.frame': 39 obs. of 7 variables:
## $ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ crabHammer: num 10.22 10.02 9.64 9.65 8.58 ...
## $ eggBomb : num 7.46 6.89 6.72 6.53 6.47 ...
## $ poisonFang: num 7.37 7.18 7.35 7.04 7.49 ...
table(kDat$devStage)
##
## E16 P2 P6 P10 4_weeks
## 7 8 8 8 8
table(kDat$gType)
##
## wt NrlKO
## 20 19
with(kDat, table(devStage, gType))
## gType
## devStage wt NrlKO
## E16 4 3
## P2 4 4
## P6 4 4
## P10 4 4
## 4_weeks 4 4
Make a (basic) qplot.
qplot(crabHammer, eggBomb, data = kDat)
Make a scatterplot. Information is added to ggplot2 in layers. Such as labels, titles, and smoothing lines.
p <- ggplot(kDat, aes(x = crabHammer, y = eggBomb))
str(p)
## List of 9
## $ data :'data.frame': 39 obs. of 7 variables:
## ..$ sidChar : chr [1:39] "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## ..$ sidNum : num [1:39] 20 21 22 23 16 17 6 24 25 26 ...
## ..$ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## ..$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## ..$ crabHammer: num [1:39] 10.22 10.02 9.64 9.65 8.58 ...
## ..$ eggBomb : num [1:39] 7.46 6.89 6.72 6.53 6.47 ...
## ..$ poisonFang: num [1:39] 7.37 7.18 7.35 7.04 7.49 ...
## $ layers : list()
## $ scales :Reference class 'Scales' [package "ggplot2"] with 1 fields
## ..$ scales: NULL
## ..and 21 methods, of which 9 are possibly relevant:
## .. add, clone, find, get_scales, has_scale, initialize, input, n,
## .. non_position_scales
## $ mapping :List of 2
## ..$ x: symbol crabHammer
## ..$ y: symbol eggBomb
## $ theme : list()
## $ coordinates:List of 1
## ..$ limits:List of 2
## .. ..$ x: NULL
## .. ..$ y: NULL
## ..- attr(*, "class")= chr [1:2] "cartesian" "coord"
## $ facet :List of 1
## ..$ shrink: logi TRUE
## ..- attr(*, "class")= chr [1:2] "null" "facet"
## $ plot_env :<environment: R_GlobalEnv>
## $ labels :List of 2
## ..$ x: chr "crabHammer"
## ..$ y: chr "eggBomb"
## - attr(*, "class")= chr [1:2] "gg" "ggplot"
(p <- p + geom_point())
(p <- p + stat_smooth())
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
(p <- p + theme_bw() + xlab("Expression of crabHammer") + ylab("Expression of eggBomb") +
ggtitle("Scatterplot for expression levels"))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Reshape data.
nDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, crabHammer,
probeset = factor(rep(c("eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(eggBomb,
poisonFang)))
str(nDat)
## 'data.frame': 78 obs. of 7 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ crabHammer: num 10.22 10.02 9.64 9.65 8.58 ...
## $ probeset : Factor w/ 2 levels "eggBomb","poisonFang": 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 7.46 6.89 6.72 6.53 6.47 ...
Gene expression of poisonFang and eggBomb against crabHammer using color.
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point())
Add a smoothing line.
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point() +
stat_smooth(se = F))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Change the line so that it no longer defining a line for each y-axis probeset, but for the entire set.
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = probeset)) + geom_point() +
stat_smooth(se = F, aes(group = 1)))
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
Use facetting to plot probesets in separate panels.
(p <- ggplot(nDat, aes(crabHammer, geneExp)) + geom_point() + facet_wrap(~probeset))
Now color can be used to display genotype.
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = gType)) + geom_point() +
facet_wrap(~probeset))
# You try! Use color for developmental stage instead of genotype!
(p <- ggplot(nDat, aes(crabHammer, geneExp, color = devStage)) + geom_point() +
facet_wrap(~probeset))
Reshape data.
oDat <- with(kDat, data.frame(sidChar, sidNum, devStage, gType, probeset = factor(rep(c("crabHammer",
"eggBomb", "poisonFang"), each = nrow(kDat))), geneExp = c(crabHammer, eggBomb,
poisonFang)))
str(oDat)
## 'data.frame': 117 obs. of 6 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 10.22 10.02 9.64 9.65 8.58 ...
Plot the expression level of each gene. Then add jitter for spacing.
(p <- ggplot(oDat, aes(geneExp, probeset)) + geom_point())
(p <- ggplot(oDat, aes(geneExp, probeset)) + geom_point(position = position_jitter(height = 0.1)))
Plot the expression level according to developmental stage.
(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_point())
Separate our 3 genes of interest with panels.
(p <- p + facet_wrap(~probeset))
Add genotype information with color.
(p <- p + aes(color = gType))
Add in averages.
(p <- p + stat_summary(fun.y = mean, geom = "point", shape = 4, size = 4))
Test out the 2 kinds of density plots in ggplot2: geom_density and stat_density.
(p <- ggplot(oDat, aes(geneExp)) + geom_density())
(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity"))
Adding our points in:
(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity") +
geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))
Change the bandwidth.
(p <- ggplot(oDat, aes(geneExp)) + stat_density(geom = "line", position = "identity",
adjust = 0.5) + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))
Use panels to separate by genotype, or use color.
(p <- p + facet_wrap(~gType))
(p <- ggplot(oDat, aes(geneExp, color = gType)) + stat_density(geom = "line",
position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))
# You try! Explore gene expression via developmental stage.
(p <- ggplot(oDat, aes(geneExp, color = devStage)) + stat_density(geom = "line",
position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))
Make a boxplot.
(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_boxplot())
Separate the genotypes with panels.
(p <- p + facet_wrap(~gType))
Make a violin plot.
(p <- ggplot(oDat, aes(devStage, geneExp)) + geom_violin())
Pick two random samples to plot against each other from the large dataset. Put them in a data frame.
set.seed(2)
(yo <- sample(1:ncol(prDat), size = 2))
## [1] 8 27
bDat <- data.frame(y = prDat[[yo[1]]], z = prDat[[yo[2]]])
str(bDat)
## 'data.frame': 29949 obs. of 2 variables:
## $ y: num 7.11 9.75 9.39 8.37 8.36 ...
## $ z: num 7.15 9.22 10.06 8.35 8.45 ...
Plot samples against each other (scatterplot).
(p <- ggplot(bDat, aes(z, y)) + geom_point())
Reduce the transparency of the data points (similar to smoothscatter).
(p <- ggplot(bDat, aes(z, y)) + geom_point(alpha = 0.1))
Plot using the 2D density function.
(p <- ggplot(bDat, aes(z, y)) + stat_density2d())
Use colors instead of lines to display the density.
(p <- ggplot(bDat, aes(z, y)) + stat_density2d(geom = "tile", contour = F, aes(fill = ..density..)) +
scale_fill_gradient(low = "white", high = "blue"))
ggplot2 version of hexbin:
(p <- ggplot(bDat, aes(z, y)) + stat_binhex())
Larger sample size to address overplotting using plotmatrix.
set.seed(3)
(yo <- sample(1:ncol(prDat), size = 4))
## [1] 7 31 15 12
pairDat <- subset(prDat, select = yo)
str(pairDat)
## 'data.frame': 29949 obs. of 4 variables:
## $ Sample_6 : num 7.24 9.71 10.17 8.84 8.54 ...
## $ Sample_19: num 7.21 9.21 9.59 8.31 8.31 ...
## $ Sample_8 : num 7.11 8.24 9.13 8.13 8.33 ...
## $ Sample_14: num 7.09 9.56 9.88 8.57 8.59 ...
(p <- plotmatrix(pairDat) + stat_binhex())
## This function is deprecated. For a replacement, see the ggpairs function in the GGally package. (Deprecated; last used in version 0.9.2)
Make a beautiful, beautiful heatmap after transforming data to tall format.
set.seed(1)
yo <- sample(1:nrow(prDat), size = 50)
hDat <- prDat[yo, ]
colnames(hDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))
prDatTall <- data.frame(sample = rep(colnames(hDat), each = nrow(hDat)), probe = rownames(hDat),
expression = unlist(hDat))
ggplot(prDatTall, aes(x = probe, y = sample, fill = expression)) + theme(axis.text.x = element_text(angle = 90,
hjust = 1, vjust = 0.5)) + geom_tile() + scale_fill_gradient2(low = jBuPuFun(256)[1],
mid = jBuPuFun(256)[256/2], high = jBuPuFun(256)[256], midpoint = (max(prDatTall$expression) +
min(prDatTall$expression))/2, name = "Expression")
The full photoRec dataset has 39 samples and 29949 probesets. Choose 2 … or 20 … or 200 random probesets/genes and look for gene expression differences between the two genotypes, wild type versus knockout. Make use of the graphing techniques discussed this week such as scatter plots, data heatmaps, correlation heatmaps, etc.
Take a subset of 77 random probes and open a can of plotting whoopass.
set.seed(777)
(exDat <- sample(1:nrow(prDat), size = 77))
## [1] 20601 14741 10336 29798 20820 321 10331 5152 28425 7461 21940
## [12] 19768 17373 17806 25932 3111 12522 25967 10547 11668 11387 19223
## [23] 15610 5319 898 23149 14559 16715 29610 21003 24970 21745 24910
## [34] 9890 8771 9243 22404 8304 20691 19257 633 8418 17648 9910
## [45] 6484 29576 26871 27075 27651 4530 19893 10746 1213 4803 14646
## [56] 4624 2043 28614 8790 615 22292 5917 24049 177 26737 1909
## [67] 3355 7663 27973 18342 3234 23101 28447 12263 9180 27089 8409
subDat <- prDat[exDat, ]
str(subDat)
## 'data.frame': 77 obs. of 39 variables:
## $ Sample_20: num 8.69 6.72 8.13 6.4 6.56 ...
## $ Sample_21: num 8.75 6.49 7.54 7.34 6.6 ...
## $ Sample_22: num 8.24 6.51 7.88 6.78 6.41 ...
## $ Sample_23: num 8.28 6.45 7.95 6.93 6.33 ...
## $ Sample_16: num 8.31 6.94 8.76 6.26 6.73 ...
## $ Sample_17: num 9.2 6.42 7.75 6.92 6.3 ...
## $ Sample_6 : num 9.28 6.72 7.79 7.25 6.55 ...
## $ Sample_24: num 8.2 6.79 8.17 6.6 6.46 ...
## $ Sample_25: num 8.2 6.69 8.2 6.72 6.49 ...
## $ Sample_26: num 8.34 6.64 8.39 6.4 6.77 ...
## $ Sample_27: num 8.39 6.84 8.26 6.6 6.51 ...
## $ Sample_14: num 8.18 6.57 8.28 6.56 6.51 ...
## $ Sample_3 : num 8.08 6.54 8.49 6.44 6.52 ...
## $ Sample_5 : num 8.07 6.69 8.51 6.4 6.65 ...
## $ Sample_8 : num 8.2 6.71 8.6 6.47 6.69 ...
## $ Sample_28: num 7.69 6.82 8.32 6.34 6.62 ...
## $ Sample_29: num 9.16 6.71 7.93 6.71 6.91 ...
## $ Sample_30: num 8.32 6.61 7.96 6.8 6.44 ...
## $ Sample_31: num 7.73 6.73 8.22 6.66 6.5 ...
## $ Sample_1 : num 7.5 6.89 8.23 6.51 6.56 ...
## $ Sample_10: num 7.84 6.63 7.99 7 6.36 ...
## $ Sample_4 : num 7.63 6.75 8.3 6.56 6.45 ...
## $ Sample_7 : num 7.99 6.89 8.39 6.39 6.87 ...
## $ Sample_32: num 8.67 6.84 7.62 7.3 7.88 ...
## $ Sample_33: num 8.49 6.93 8.24 6.44 6.72 ...
## $ Sample_34: num 8.26 7.13 8.35 6.37 6.74 ...
## $ Sample_35: num 8 6.73 8.32 6.87 6.58 ...
## $ Sample_13: num 8.01 7.09 7.93 6.6 6.71 ...
## $ Sample_15: num 7.87 7.24 8.13 6.46 6.37 ...
## $ Sample_18: num 7.72 7.36 7.81 6.87 6.57 ...
## $ Sample_19: num 7.93 7.1 8.23 6.48 6.72 ...
## $ Sample_36: num 8.72 6.8 7.7 8.31 7.63 ...
## $ Sample_37: num 8.82 6.59 8.06 7.42 7.03 ...
## $ Sample_38: num 9.02 6.67 8.02 7.78 7.39 ...
## $ Sample_39: num 8.87 6.57 8.06 7.54 7.37 ...
## $ Sample_11: num 8.07 6.81 7.97 6.59 6.79 ...
## $ Sample_12: num 7.97 7 8.31 6.51 6.71 ...
## $ Sample_2 : num 8.3 6.93 8.07 6.69 7.05 ...
## $ Sample_9 : num 8.11 6.77 7.73 6.81 6.59 ...
Make a heatmap with lattice.
hsubDat <- as.matrix(t(subDat))
rownames(hsubDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))
heatmap(hsubDat, col = jBuPuFun(256), scale = c("column"), margins = c(5, 8))
Make a heatmap with ggplot2.
colnames(subDat) <- with(prDes, paste(devStage, gType, sidChar, sep = "_"))
prDatTall <- data.frame(sample = rep(colnames(subDat), each = nrow(subDat)),
probe = rownames(subDat), expression = unlist(subDat))
ggplot(prDatTall, aes(x = probe, y = sample, fill = expression)) + theme(axis.text.x = element_text(angle = 90,
hjust = 1, vjust = 0.5)) + geom_tile() + scale_fill_gradient2(low = jBuPuFun(256)[1],
mid = jBuPuFun(256)[256/2], high = jBuPuFun(256)[256], midpoint = (max(prDatTall$expression) +
min(prDatTall$expression))/2, name = "Expression")
Make a density plot with lattice.
resDat <- data.frame(sample = rep(names(subDat), each = 77), gene = LETTERS[1:77],
geneExp = unlist(subDat))
Bw3 <- 0.5
n3 <- 500
densityplot(~geneExp, resDat, auto.key = TRUE, bw = Bw3, n = n3, main = paste("bw =",
Bw3, ", n =", n3))
Make a density plot with ggplot2.
(p <- ggplot(resDat, aes(geneExp)) + stat_density(geom = "line", position = "identity") +
geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))
Be crazy and use splom().
splom(subDat, panel = panel.smoothScatter, raster = TRUE)