Abrar — Feb 7, 2014, 9:25 PM
library(lattice)
prDat <- read.table("../data/GSE4051_data.tsv")
str(prDat, max.level = 0)
'data.frame': 29949 obs. of 39 variables:
prDes <- readRDS("../data/GSE4051_design.rds")
str(prDes)
'data.frame': 39 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
# Two sample tests -- one gene
set.seed(987)
(theGene <- sample(1:nrow(prDat), 1))
[1] 14294
pDat <- data.frame(prDes, gExp = unlist(prDat[theGene, ]))
str(pDat)
'data.frame': 39 obs. of 5 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 9.88 10.59 10.28 10.22 8.75 ...
aggregate(gExp ~ gType, pDat, FUN = mean)
gType gExp
1 wt 9.758
2 NrlKO 9.553
stripplot(gType ~ gExp, pDat)
t.test(gExp ~ gType, pDat)
Welch Two Sample t-test
data: gExp by gType
t = 1.479, df = 36.78, p-value = 0.1475
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.07586 0.48605
sample estimates:
mean in group wt mean in group NrlKO
9.758 9.553
ttRes <- t.test(gExp ~ gType, pDat)
str(ttRes)
List of 9
$ statistic : Named num 1.48
..- attr(*, "names")= chr "t"
$ parameter : Named num 36.8
..- attr(*, "names")= chr "df"
$ p.value : num 0.148
$ conf.int : atomic [1:2] -0.0759 0.4861
..- attr(*, "conf.level")= num 0.95
$ estimate : Named num [1:2] 9.76 9.55
..- attr(*, "names")= chr [1:2] "mean in group wt" "mean in group NrlKO"
$ null.value : Named num 0
..- attr(*, "names")= chr "difference in means"
$ alternative: chr "two.sided"
$ method : chr "Welch Two Sample t-test"
$ data.name : chr "gExp by gType"
- attr(*, "class")= chr "htest"
ttRes$statistic
t
1.479
ttRes$p.value
[1] 0.1475
wtRes <- wilcox.test(gExp ~ gType, pDat)
Warning: cannot compute exact p-value with ties
wtRes$statistic
W
241
wtRes$p.value
[1] 0.1559
ktRes <- ks.test(pDat$gExp[pDat$gType == "wt"],pDat$gExp[pDat$gType == "NrlKO"])
Warning: cannot compute exact p-value with ties
ktRes$statistic
D
0.2974
ktRes$p.value
[1] 0.355
# Can you pull test statistics and/or p-values from the different approaches into an common object, like a readable table?
# Yes
cbind(c(ttRes$p.value,wtRes$p.value, ktRes$p.value), c(ttRes$statistic,wtRes$statistic, ktRes$statistic))
[,1] [,2]
t 0.1475 1.4794
W 0.1559 241.0000
D 0.3550 0.2974
# Are you getting the same message from the various approaches?
# Yes all show there is no difference in means
# Apply() for computing on rows and columns of matrices
kDat <- readRDS("../data/GSE4051_MINI.rds")
kMat <- as.matrix(kDat[c('crabHammer', 'eggBomb', 'poisonFang')])
str(kMat)
num [1:39, 1:3] 10.22 10.02 9.64 9.65 8.58 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:39] "12" "13" "14" "15" ...
..$ : chr [1:3] "crabHammer" "eggBomb" "poisonFang"
median(kMat[ , 'eggBomb'])
[1] 6.757
apply(kMat, 2, median)
crabHammer eggBomb poisonFang
9.611 6.757 7.350
apply(kMat, 2, quantile, probs = 0.5)
crabHammer eggBomb poisonFang
9.611 6.757 7.350
apply(kMat, 2, quantile, probs = c(0.25, 0.75))
crabHammer eggBomb poisonFang
25% 8.938 6.278 7.188
75% 9.830 7.095 7.476
apply(kMat, 1, min)
12 13 14 15 9 10 11 28 29 30 31 24
7.370 6.890 6.720 6.529 6.470 7.005 6.735 6.587 6.170 6.870 6.800 6.138
25 26 27 36 37 38 39 32 33 34 35 20
6.166 6.269 6.264 6.530 7.100 6.269 6.211 6.286 6.347 6.270 6.188 7.005
21 22 23 16 17 18 19 5 6 7 8 1
7.082 6.757 6.155 7.228 7.226 7.363 7.081 6.993 6.992 6.608 7.003 6.981
2 3 4
7.165 7.075 6.558
colnames(kMat)[apply(kMat, 1, which.min)]
[1] "poisonFang" "eggBomb" "eggBomb" "eggBomb" "eggBomb"
[6] "poisonFang" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[11] "eggBomb" "eggBomb" "eggBomb" "eggBomb" "eggBomb"
[16] "eggBomb" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[21] "eggBomb" "eggBomb" "eggBomb" "poisonFang" "eggBomb"
[26] "eggBomb" "eggBomb" "eggBomb" "eggBomb" "poisonFang"
[31] "eggBomb" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[36] "poisonFang" "eggBomb" "poisonFang" "eggBomb"
all.equal(rowSums(kMat), apply(kMat, 1, sum))
[1] TRUE
# Computing on groups of observations with aggregate()
aggregate(eggBomb ~ devStage, kDat, FUN = mean)
devStage eggBomb
1 E16 6.879
2 P2 6.408
3 P6 6.459
4 P10 7.143
5 4_weeks 7.063
aggregate(eggBomb ~ gType * devStage, kDat, FUN = mean)
gType devStage eggBomb
1 wt E16 6.900
2 NrlKO E16 6.851
3 wt P2 6.607
4 NrlKO P2 6.209
5 wt P6 6.646
6 NrlKO P6 6.273
7 wt P10 7.042
8 NrlKO P10 7.243
9 wt 4_weeks 7.117
10 NrlKO 4_weeks 7.008
aggregate(eggBomb ~ gType * devStage, kDat, FUN = range)
gType devStage eggBomb.1 eggBomb.2
1 wt E16 6.529 7.462
2 NrlKO E16 6.470 7.065
3 wt P2 6.170 6.870
4 NrlKO P2 6.138 6.269
5 wt P6 6.211 7.574
6 NrlKO P6 6.188 6.347
7 wt P10 6.155 8.173
8 NrlKO P10 7.081 7.438
9 wt 4_weeks 6.608 7.866
10 NrlKO 4_weeks 6.558 7.204
keepGenes <- c("1431708_a_at", "1424336_at", "1454696_at",
"1416119_at", "1432141_x_at", "1429226_at" )
miniDat <- subset(prDat, rownames(prDat) %in% keepGenes)
miniDat <- data.frame(gExp = as.vector(t(as.matrix(miniDat))),gene = factor(rep(rownames(miniDat), each = ncol(miniDat)), levels = keepGenes))
miniDat <- suppressWarnings(data.frame(prDes, miniDat))
str(miniDat)
'data.frame': 234 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 10.6 11 10.8 10.9 9.2 ...
$ gene : Factor w/ 6 levels "1431708_a_at",..: 4 4 4 4 4 4 4 4 4 4 ...
stripplot(gType ~ gExp | gene, miniDat,scales = list(x = list(relation = "free")),group = gType, auto.key = TRUE)
someDat <- droplevels(subset(miniDat, gene == keepGenes[1]))
t.test(gExp ~ gType, someDat)
Welch Two Sample t-test
data: gExp by gType
t = 9.838, df = 36.89, p-value = 7.381e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.570 2.384
sample estimates:
mean in group wt mean in group NrlKO
9.554 7.578
# The plyr package
library(plyr)
d_ply(miniDat, ~ gene, function(x) t.test(gExp ~ gType, x), .print = TRUE)
Welch Two Sample t-test
data: gExp by gType
t = 9.838, df = 36.89, p-value = 7.381e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.570 2.384
sample estimates:
mean in group wt mean in group NrlKO
9.554 7.578
Welch Two Sample t-test
data: gExp by gType
t = 9.061, df = 36.49, p-value = 7.146e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.6239 0.9835
sample estimates:
mean in group wt mean in group NrlKO
7.670 6.867
Welch Two Sample t-test
data: gExp by gType
t = 8.077, df = 33.49, p-value = 2.278e-09
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.402 2.346
sample estimates:
mean in group wt mean in group NrlKO
12.85 10.97
Welch Two Sample t-test
data: gExp by gType
t = -0.184, df = 36.53, p-value = 0.8551
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5079 0.4234
sample estimates:
mean in group wt mean in group NrlKO
9.893 9.935
Welch Two Sample t-test
data: gExp by gType
t = -0.1324, df = 36.31, p-value = 0.8954
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2413 0.2118
sample estimates:
mean in group wt mean in group NrlKO
7.092 7.107
Welch Two Sample t-test
data: gExp by gType
t = 0.0983, df = 35.58, p-value = 0.9223
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.1019 0.1123
sample estimates:
mean in group wt mean in group NrlKO
6.551 6.546
ttRes <- dlply(miniDat, ~ gene, function(x) t.test(gExp ~ gType, x))
names(ttRes)
[1] "1431708_a_at" "1424336_at" "1454696_at" "1416119_at"
[5] "1432141_x_at" "1429226_at"
ttRes[["1454696_at"]]
Welch Two Sample t-test
data: gExp by gType
t = 8.077, df = 33.49, p-value = 2.278e-09
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.402 2.346
sample estimates:
mean in group wt mean in group NrlKO
12.85 10.97
ttRes <- ddply(miniDat, ~ gene, function(z) {
zz <- t.test(gExp ~ gType, z)
round(c(tStat = zz$statistic, pVal = zz$p.value), 4)
})
ttRes
gene tStat.t pVal
1 1431708_a_at 9.8380 0.0000
2 1424336_at 9.0607 0.0000
3 1454696_at 8.0771 0.0000
4 1416119_at -0.1840 0.8551
5 1432141_x_at -0.1324 0.8954
6 1429226_at 0.0983 0.9223