library(lattice)
library(RColorBrewer)
library(gplots)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
library(ggplot2)
photoDat <- read.table("GSE4051_data.tsv")
str(photoDat, max.level = 0)
## 'data.frame': 29949 obs. of 39 variables:
sampleinfo <- readRDS("GSE4051_design.rds")
str(sampleinfo)
## 'data.frame': 39 obs. of 4 variables:
## $ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
Let's make a heatmap of the 35 random probsets from our photorec dataset using heatmap.2 function of gplots. Note that we will need to reconfigure our dataframe as a matrix.
set.seed(1)
(op <- sample(1:nrow(photoDat), size = 35))
## [1] 7952 11145 17156 27198 6040 26902 28287 19786 18837 1850 6167
## [12] 5286 20568 11499 23046 14899 21481 29690 11375 23269 27975 6350
## [23] 19503 3758 7997 11555 401 11442 26023 10184 14424 17938 14766
## [34] 5571 24751
sumDat <- photoDat[op, ]
str(sumDat)
## 'data.frame': 35 obs. of 39 variables:
## $ Sample_20: num 8.3 8.25 6.91 6.79 6.4 ...
## $ Sample_21: num 8.33 9.14 6.55 6.23 6.69 ...
## $ Sample_22: num 8.43 8.19 6.59 6.69 6.14 ...
## $ Sample_23: num 8.49 8.66 6.58 6.34 6.34 ...
## $ Sample_16: num 8.51 6.66 7.75 7.25 5.5 ...
## $ Sample_17: num 8.18 7.95 6.85 6.38 7.51 ...
## $ Sample_6 : num 7.96 8.45 7.42 6.19 7.64 ...
## $ Sample_24: num 8.34 7.49 7.17 6.84 5.83 ...
## $ Sample_25: num 8.14 7.39 7.12 7.02 5.85 ...
## $ Sample_26: num 8.45 6.94 7.46 7.43 6.32 ...
## $ Sample_27: num 8.25 6.5 7.23 6.91 5.8 ...
## $ Sample_14: num 8.46 6.99 7.14 6.78 6.29 ...
## $ Sample_3 : num 8.53 7.14 7.23 6.88 6.15 ...
## $ Sample_5 : num 8.45 6.71 7.36 6.92 6 ...
## $ Sample_8 : num 8.62 6.66 7.9 6.97 5.95 ...
## $ Sample_28: num 8.63 6.46 7.45 7.17 6.03 ...
## $ Sample_29: num 8.58 7.84 6.72 6.91 6.31 ...
## $ Sample_30: num 8.28 7.01 6.81 6.75 5.81 ...
## $ Sample_31: num 8.47 6.88 7.18 6.89 5.79 ...
## $ Sample_1 : num 8.66 6.81 7.22 6.56 6.03 ...
## $ Sample_10: num 8.68 7.41 6.97 6.5 5.99 ...
## $ Sample_4 : num 8.74 7.23 6.91 6.78 5.82 ...
## $ Sample_7 : num 8.69 6.61 7.43 7.1 5.64 ...
## $ Sample_32: num 9.7 7.62 6.96 6.89 6.82 ...
## $ Sample_33: num 8.72 6.83 7.36 7.21 5.93 ...
## $ Sample_34: num 8.58 6.73 7.63 7.19 5.93 ...
## $ Sample_35: num 8.54 6.91 7.14 6.71 5.59 ...
## $ Sample_13: num 8.73 6.83 7.14 6.46 6.25 ...
## $ Sample_15: num 8.57 6.86 7.12 6.72 5.84 ...
## $ Sample_18: num 8.96 6.95 6.96 6.31 6.44 ...
## $ Sample_19: num 8.65 6.69 7.03 6.91 6.32 ...
## $ Sample_36: num 10.41 7.9 6.57 6.87 6.09 ...
## $ Sample_37: num 9.48 6.36 6.99 6.85 6.08 ...
## $ Sample_38: num 10.14 7.24 6.95 6.99 6.13 ...
## $ Sample_39: num 9.48 6.21 6.97 7.13 6.02 ...
## $ Sample_11: num 9.09 6.23 7 6.4 6.22 ...
## $ Sample_12: num 9.21 6.47 7.17 6.78 5.76 ...
## $ Sample_2 : num 9.05 6.2 7.2 6.54 5.9 ...
## $ Sample_9 : num 8.89 7.67 6.86 6.72 6.26 ...
sumDat <- as.matrix(t(sumDat))
rownames(sumDat) <- with(sampleinfo, paste(devStage, gType, sidChar, sep = "_"))
str(sumDat)
## num [1:39, 1:35] 8.3 8.33 8.43 8.49 8.51 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
## ..$ : chr [1:35] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...
feelingBlue <- colorRampPalette(brewer.pal(n = 7, "Blues"))
heatmap.2(sumDat, col = feelingBlue, trace = "none")
Now let's plot Sample 16 against Sample 2 using the stat_binhex function in ggplot2
(overplotting <- ggplot(photoDat, aes(photoDat$Sample_16, photoDat$Sample_2)) +
stat_binhex() + theme_bw() + xlab("Expression of Sample 16") + ylab("Expression of Sample 2") +
ggtitle("Gene Expression Levels in Two Samples"))
Working with a smaller subset of the dataset, we can look at the expression levels of Jenny's favourite eggBomb vs poisonFang probes by Genotype
mini_dat <- readRDS("GSE4051_MINI.rds")
str(mini_dat)
## 'data.frame': 39 obs. of 7 variables:
## $ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage : Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ crabHammer: num 10.22 10.02 9.64 9.65 8.58 ...
## $ eggBomb : num 7.46 6.89 6.72 6.53 6.47 ...
## $ poisonFang: num 7.37 7.18 7.35 7.04 7.49 ...
splot <- ggplot(mini_dat, aes(x = eggBomb, y = poisonFang, color = devStage))
(splot <- splot + geom_point() + theme_bw() + xlab("Expression of eggBomb") +
ylab("Expression of poisonFang") + ggtitle("Scatterplot of Expression Levels by Genotype") +
facet_wrap(~gType))
Let's re-shape the data a bit to look at density plots of gene expression by developmental stage
reshaped_dat <- with(mini_dat, data.frame(sidChar, sidNum, devStage, gType,
probeset = factor(rep(c("crabHammer", "eggBomb", "poisonFang"), each = nrow(mini_dat))),
geneExp = c(crabHammer, eggBomb, poisonFang)))
str(reshaped_dat)
## 'data.frame': 117 obs. of 6 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
## $ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
## $ probeset: Factor w/ 3 levels "crabHammer","eggBomb",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 10.22 10.02 9.64 9.65 8.58 ...
(densplot <- ggplot(reshaped_dat, aes(geneExp, color = devStage)) + stat_density(geom = "line",
position = "identity") + geom_point(aes(y = 0.05), position = position_jitter(height = 0.005)))