# librarys
library(lattice)
library(RColorBrewer)
# data
des <- read.table("GSE4051_design.tsv", header = T)
dat <- read.table("GSE4051_data.tsv", header = T)
# constants
n <- 20 ## gene samples
myCols <- colorRampPalette(brewer.pal(n = 9, "BuPu")) ## heatmap scale
ttResults <- data.frame(gene = NA, pval = NA) ## empty df for loop
###############################################################################
############################################################################### Data
############################################################################### Preparation
# select random subset of probesets/genes
set.seed(8)
samp <- sample(1:nrow(dat), n)
small <- dat[samp, ]
# create long dataframe with experimental design
long <- reshape(small, v.names = "geneExp", varying = list(1:ncol(small)), timevar = "sample",
times = names(small), idvar = "gene", ids = row.names(small), direction = "long")
long$gene <- as.factor(long$gene)
long$sample <- as.factor(long$sample)
myDat <- merge(long, des, by.x = "sample", by.y = "sidChar")
str(myDat)
## 'data.frame': 780 obs. of 6 variables:
## $ sample : Factor w/ 39 levels "Sample_1","Sample_10",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ geneExp : num 10.54 10.9 11.38 8.26 5.75 ...
## $ gene : Factor w/ 20 levels "1415708_at","1419053_at",..: 11 4 18 15 8 16 7 20 17 14 ...
## $ sidNum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ devStage: Factor w/ 5 levels "4_weeks","E16",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ gType : Factor w/ 2 levels "NrlKO","wt": 1 1 1 1 1 1 1 1 1 1 ...
# create matrix format for heatmap plotting
smallM <- as.matrix(t(small))
rownames(smallM) <- with(des, paste(gType, sidChar, sep = "_"))
###############################################################################
############################################################################### Exploratory
############################################################################### Analysis
# order heatmap by genotypes and look for differences
heatmap(smallM[order(rownames(smallM)), order(apply(smallM, 2, mean))], Rowv = NA,
Colv = NA, margins = c(6, 5), col = myCols(500))
# 1429021_at and 1457045_at are jumping out
# compare gene expression distributions for genotypes
densityplot(~geneExp | gene, myDat, groups = gType, auto.key = T)
# the means look different for these genes 1457045_at 1429021_at
# 1437750_at
# lets do some t tests to find out which genes differ this loop is ugly
# ... there has to be a better way
pairs <- with(myDat, tapply(geneExp, list(gene, gType), function(x) x))
for (i in 1:nrow(pairs)) {
test <- t.test(pairs[i, "NrlKO"][[1]], pairs[i, "wt"][[1]])
ttResults <- rbind(ttResults, c(gene = rownames(pairs)[i], pval = test$p.value))
}
(ttResults <- ttResults[-1, ])
## gene pval
## 2 1415708_at 0.177755258547863
## 3 1419053_at 0.815330917561272
## 4 1421392_a_at 0.337458631383954
## 5 1424407_s_at 0.011441778417152
## 6 1426778_at 0.0469922530002784
## 7 1427243_at 0.46735158482114
## 8 1427929_a_at 0.781328560426702
## 9 1429021_at 0.0015549737403702
## 10 1434310_at 0.0758980812135523
## 11 1435194_at 0.912370715352243
## 12 1435552_at 0.842958777709302
## 13 1437750_at 0.000381169546848767
## 14 1438784_at 0.696057828277474
## 15 1443950_at 0.321378325751261
## 16 1444288_at 0.935752405224795
## 17 1448113_at 0.714489960072615
## 18 1450193_at 0.350054842160445
## 19 1451485_at 0.191994323268592
## 20 1457045_at 5.56888082228149e-05
## 21 1457272_at 0.519888324436242
(diffGenes <- ttResults$gene[as.numeric(ttResults$pval) <= 0.05])
## [1] "1424407_s_at" "1426778_at" "1429021_at" "1437750_at"
## [5] "1457045_at"
# we can look at the data for only those genes with pval <= 0.05
stripplot(geneExp ~ gType | gene, myDat, subset = gene %in% diffGenes, group = gType,
jitter.data = T, scales = list(y = list(relation = "free")))