Seminar 3 - Introduction to graphics

Load the required libraries:

library(lattice)
library(RColorBrewer)

## Warning: package 'RColorBrewer' was built under R version 3.0.2

library(gplots)

## Warning: package 'gplots' was built under R version 3.0.2

## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess

and import the data:

design <- read.table("GSE4051_design.tsv", header = T)
dat <- read.table("GSE4051_data.tsv", header = T)
str(design)

## 'data.frame':    39 obs. of  4 variables:
##  $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
##  $ sidNum  : int  20 21 22 23 16 17 6 24 25 26 ...
##  $ devStage: Factor w/ 5 levels "4_weeks","E16",..: 2 2 2 2 2 2 2 4 4 4 ...
##  $ gType   : Factor w/ 2 levels "NrlKO","wt": 2 2 2 2 1 1 1 2 2 2 ...

Draw 20 probeset at random, reshape and merge the data so we can have a workable dataset:

set.seed(1)
rp <- sample(1:nrow(dat), size = 20, replace = F)
newDat <- dat[rp, ]
# reshape
newDat20 <- reshape(newDat, varying = list(1:ncol(newDat)), v.names = "geneExp", 
    timevar = "sample_n", times = names(newDat), idvar = "probset", ids = row.names(newDat), 
    direction = "long")
newDat20$sample_n <- as.factor(newDat20$sample_n)
newDat20$probset <- as.factor(newDat20$probset)
final <- merge(newDat20, design, by.x = "sample_n", by.y = "sidChar")

We can consider all the 20 probeset and make a stripplot and a density plot:

stripplot(geneExp ~ devStage | probset, final, layout = c(5, 4), groups = gType, 
    auto.key = TRUE)

plot of chunk unnamed-chunk-4

densityplot(~geneExp | probset, groups = gType, final, grid = TRUE)

plot of chunk unnamed-chunk-4

From the stripplot it is difficult to draw any conclusion. However, looking at the density plot we can see that for the genes “1431792_a_at” and “1424164_st”, there is a difference between the mean of the wild type and the one of the knock out.

Using the package “gplot” we can also build the heatmap:

# transform dataset in matrix for the heatmap
mDat <- as.matrix(t(newDat))
rownames(mDat) <- with(design, paste(devStage, gType, sidChar, sep = "_"))
str(mDat)

##  num [1:39, 1:20] 8.3 8.33 8.43 8.49 8.51 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:39] "E16_wt_Sample_20" "E16_wt_Sample_21" "E16_wt_Sample_22" "E16_wt_Sample_23" ...
##   ..$ : chr [1:20] "1426822_at" "1431375_s_at" "1440076_at" "1456157_at" ...

co <- colorRampPalette(brewer.pal(n = 9, "BuPu"))

heatmap.2(mDat, col = co(256), trace = "none")

plot of chunk unnamed-chunk-6

Finally, we can also look at the scatter plot matrix. In this case 4 samples were chosen at random to make the matrix more legible:

s4 <- newDat[, sample(1:ncol(newDat), 4)]

pairs(s4, col = "blue")

plot of chunk unnamed-chunk-8