Load the required libraries:
library(lattice)
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.0.2
library(gplots)
## Warning: package 'gplots' was built under R version 3.0.2
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
and import the data:
design <- read.table("GSE4051_design.tsv", header = T)
dat <- read.table("GSE4051_data.tsv", header = T)
str(design)
## 'data.frame': 39 obs. of 4 variables:
## $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
## $ sidNum : int 20 21 22 23 16 17 6 24 25 26 ...
## $ devStage: Factor w/ 5 levels "4_weeks","E16",..: 2 2 2 2 2 2 2 4 4 4 ...
## $ gType : Factor w/ 2 levels "NrlKO","wt": 2 2 2 2 1 1 1 2 2 2 ...
Draw 20 probeset at random, reshape and merge the data so we can have a workable dataset:
set.seed(1)
rp <- sample(1:nrow(dat), size = 20, replace = F)
newDat <- dat[rp, ]
# reshape
newDat20 <- reshape(newDat, varying = list(1:ncol(newDat)), v.names = "geneExp",
timevar = "sample_n", times = names(newDat), idvar = "probset", ids = row.names(newDat),
direction = "long")
newDat20$sample_n <- as.factor(newDat20$sample_n)
newDat20$probset <- as.factor(newDat20$probset)
final <- merge(newDat20, design, by.x = "sample_n", by.y = "sidChar")
We can consider all the 20 probeset and make a stripplot and a density plot:
stripplot(geneExp ~ devStage | probset, final, layout = c(5, 4), groups = gType,
auto.key = TRUE)
densityplot(~geneExp | probset, groups = gType, final, grid = TRUE)
From the stripplot it is difficult to draw any conclusion. However, looking at the density plot we can see that for the genes “1431792_a_at” and “1424164_st”, there is a difference between the mean of the wild type and the one of the knock out.
Using the package “gplot” we can also build the heatmap:
# transform dataset in matrix for the heatmap
mDat <- as.matrix(t(newDat))
rownames(mDat) <- with(design, paste(devStage, gType, sidChar, sep = "_"))
str(mDat)
## num [1:39, 1:20] 8.3 8.33 8.43 8.49 8.51 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
## ..$ : chr [1:20] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...
co <- colorRampPalette(brewer.pal(n = 9, "BuPu"))
heatmap.2(mDat, col = co(256), trace = "none")
Finally, we can also look at the scatter plot matrix. In this case 4 samples were chosen at random to make the matrix more legible:
s4 <- newDat[, sample(1:ncol(newDat), 4)]
pairs(s4, col = "blue")