Goal: to get a little more familiar with the awesome ggplot2 and plyr functions by mimicking some results that were obtained with the older lattice and aggregate
Load the required libraries and data
library(lattice)
library(plyr)
library(ggplot2)
prDat <- read.table("GSE4051_data.tsv")
prDes <- readRDS("GSE4051_design.rds")
Extract data for one gene
set.seed(987)
theGene <- sample(1:nrow(prDat), 1)
pDat <- data.frame(prDes, gExp = unlist(prDat[theGene, ]))
Explore!
What are sample means in wildtype/knockout? First using aggregate, then using plyr
aggregate(gExp ~ gType, pDat, FUN = mean)
| gType | gExp |
|---|---|
| wt | 9.76 |
| NrlKO | 9.55 |
ddply(pDat, ~ gType, summarize, gExp = mean(gExp))
| gType | gExp |
|---|---|
| wt | 9.76 |
| NrlKO | 9.55 |
Make sure the two actually returned identical results
identical(aggregate(gExp ~ gType, pDat, FUN = mean),
ddply(pDat, ~ gType, summarize, gExp = mean(gExp)))
## [1] TRUE
Plot!
Strip plot of just the one gene, knockout vs wildtype. First using lattice, then using ggplot2
stripplot(gType ~ gExp, pDat)
ggplot(pDat, aes(x = gExp, y = gType)) + geom_point()
Load in the dataset
kDat <- readRDS("GSE4051_MINI.rds")
Explore!
Average expression of eggBomb over developmental stages, first using aggregate, then using plyr
aggregate(eggBomb ~ devStage, kDat, FUN = mean)
| devStage | eggBomb |
|---|---|
| E16 | 6.88 |
| P2 | 6.41 |
| P6 | 6.46 |
| P10 | 7.14 |
| 4_weeks | 7.06 |
ddply(kDat, ~ devStage, summarize, exp = mean(eggBomb))
| devStage | exp |
|---|---|
| E16 | 6.88 |
| P2 | 6.41 |
| P6 | 6.46 |
| P10 | 7.14 |
| 4_weeks | 7.06 |
Same thing, but now aggregate based on dev stage AND genotype
aggregate(eggBomb ~ gType * devStage, kDat, FUN = mean)
| gType | devStage | eggBomb |
|---|---|---|
| wt | E16 | 6.90 |
| NrlKO | E16 | 6.85 |
| wt | P2 | 6.61 |
| NrlKO | P2 | 6.21 |
| wt | P6 | 6.65 |
| NrlKO | P6 | 6.27 |
| wt | P10 | 7.04 |
| NrlKO | P10 | 7.24 |
| wt | 4_weeks | 7.12 |
| NrlKO | 4_weeks | 7.01 |
ddply(kDat, .(gType, devStage), summarize, exp = mean(eggBomb))
| gType | devStage | exp |
|---|---|---|
| wt | E16 | 6.90 |
| wt | P2 | 6.61 |
| wt | P6 | 6.65 |
| wt | P10 | 7.04 |
| wt | 4_weeks | 7.12 |
| NrlKO | E16 | 6.85 |
| NrlKO | P2 | 6.21 |
| NrlKO | P6 | 6.27 |
| NrlKO | P10 | 7.24 |
| NrlKO | 4_weeks | 7.01 |
Grab 6 genes: 3 interesting, 3 boring
keepGenes <- c("1431708_a_at", "1424336_at", "1454696_at",
"1416119_at", "1432141_x_at", "1429226_at")
miniDat <- subset(prDat, rownames(prDat) %in% keepGenes)
miniDat <- data.frame(gExp = as.vector(t(as.matrix(miniDat))),
gene = factor(rep(rownames(miniDat), each = ncol(miniDat)),
levels = keepGenes))
miniDat <- suppressWarnings(data.frame(prDes, miniDat))
Plot!
Strip plot of the expression vs genotype, one plot per gene. First using lattice, then using ggplot2
stripplot(gType ~ gExp | gene, miniDat,
scales = list(x = list(relation = "free")),
group = gType, auto.key = TRUE)
ggplot(miniDat, aes(x = gExp, y = gType, color = gType)) +
facet_wrap(~ gene, scales="free_x") +
geom_point(alpha = 0.7) +
theme(panel.grid.major.x = element_blank())