omaromeir — Feb 7, 2014, 9:25 PM
library(lattice)
prDat = read.table("../data/GSE4051_data.tsv")
str(prDat, max.level=0)
'data.frame': 29949 obs. of 39 variables:
prDes = readRDS("../data/GSE4051_design.rds")
str(prDes)
'data.frame': 39 obs. of 4 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
set.seed(987)
(theGene <- sample(1:nrow(prDat), 1))
[1] 14294
pDat <- data.frame(prDes, gExp = unlist(prDat[theGene, ]))
str(pDat)
'data.frame': 39 obs. of 5 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 9.88 10.59 10.28 10.22 8.75 ...
aggregate(gExp ~ gType, pDat, FUN = mean)
gType gExp
1 wt 9.758
2 NrlKO 9.553
stripplot(gType ~ gExp, pDat)
ttRes <- t.test(gExp ~ gType, pDat)
str(ttRes)
List of 9
$ statistic : Named num 1.48
..- attr(*, "names")= chr "t"
$ parameter : Named num 36.8
..- attr(*, "names")= chr "df"
$ p.value : num 0.148
$ conf.int : atomic [1:2] -0.0759 0.4861
..- attr(*, "conf.level")= num 0.95
$ estimate : Named num [1:2] 9.76 9.55
..- attr(*, "names")= chr [1:2] "mean in group wt" "mean in group NrlKO"
$ null.value : Named num 0
..- attr(*, "names")= chr "difference in means"
$ alternative: chr "two.sided"
$ method : chr "Welch Two Sample t-test"
$ data.name : chr "gExp by gType"
- attr(*, "class")= chr "htest"
ttRes$statistic
t
1.479
ttRes$p.value
[1] 0.1475
# You try
myGene <- 14158
pDat2 <- data.frame(prDes, gExp = unlist(prDat[myGene, ]))
str(pDat2)
'data.frame': 39 obs. of 5 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 6.85 7.27 7.11 7.32 7.14 ...
aggregate(gExp ~ gType, pDat2, FUN = mean)
gType gExp
1 wt 7.366
2 NrlKO 7.240
stripplot(gType ~ gExp, pDat2)
tt <- t.test(gExp ~ gType, pDat2)
str(tt)
List of 9
$ statistic : Named num 1.28
..- attr(*, "names")= chr "t"
$ parameter : Named num 28.5
..- attr(*, "names")= chr "df"
$ p.value : num 0.213
$ conf.int : atomic [1:2] -0.0764 0.3287
..- attr(*, "conf.level")= num 0.95
$ estimate : Named num [1:2] 7.37 7.24
..- attr(*, "names")= chr [1:2] "mean in group wt" "mean in group NrlKO"
$ null.value : Named num 0
..- attr(*, "names")= chr "difference in means"
$ alternative: chr "two.sided"
$ method : chr "Welch Two Sample t-test"
$ data.name : chr "gExp by gType"
- attr(*, "class")= chr "htest"
tt$statistic
t
1.275
tt$p.value
[1] 0.2126
tt2 <- t.test(gExp ~ gType, pDat2, var.equal=TRUE)
str(tt2)
List of 9
$ statistic : Named num 1.26
..- attr(*, "names")= chr "t"
$ parameter : Named num 37
..- attr(*, "names")= chr "df"
$ p.value : num 0.217
$ conf.int : atomic [1:2] -0.0775 0.3298
..- attr(*, "conf.level")= num 0.95
$ estimate : Named num [1:2] 7.37 7.24
..- attr(*, "names")= chr [1:2] "mean in group wt" "mean in group NrlKO"
$ null.value : Named num 0
..- attr(*, "names")= chr "difference in means"
$ alternative: chr "two.sided"
$ method : chr " Two Sample t-test"
$ data.name : chr "gExp by gType"
- attr(*, "class")= chr "htest"
wt <- wilcox.test(gExp ~ gType, pDat2)
Warning: cannot compute exact p-value with ties
str(wt)
List of 7
$ statistic : Named num 236
..- attr(*, "names")= chr "W"
$ parameter : NULL
$ p.value : num 0.201
$ null.value : Named num 0
..- attr(*, "names")= chr "location shift"
$ alternative: chr "two.sided"
$ method : chr "Wilcoxon rank sum test with continuity correction"
$ data.name : chr "gExp by gType"
- attr(*, "class")= chr "htest"
wt$statistic
W
236
wt$p.value
[1] 0.201
ks <- ks.test(pDat2$gExp[pDat2$gType == "wt"], pDat2$gExp[pDat2$gType == "NrlKO"])
Warning: cannot compute exact p-value with ties
str(ks)
List of 5
$ statistic : Named num 0.337
..- attr(*, "names")= chr "D"
$ p.value : num 0.219
$ alternative: chr "two-sided"
$ method : chr "Two-sample Kolmogorov-Smirnov test"
$ data.name : chr "pDat2$gExp[pDat2$gType == \"wt\"] and pDat2$gExp[pDat2$gType == \"NrlKO\"]"
- attr(*, "class")= chr "htest"
ks$statistic
D
0.3368
ks$p.value
[1] 0.2189
# Can you pull test statistics and/or p-values from the different approaches into an common object, like a readable table? Are you getting the same message from the various approaches?
rbind(c(tt$statistic, tt$p.value, wt$statistic, wt$p.value, ks$statistic, ks$p.value))
t W D
[1,] 1.275 0.2126 236 0.201 0.3368 0.2189
#According to p values all approaches show that there is no difference in means.
#apply() stuff from here on out
kDat <- readRDS("../data/GSE4051_MINI.rds")
kMat <- as.matrix(kDat[c('crabHammer', 'eggBomb', 'poisonFang')])
str(kMat)
num [1:39, 1:3] 10.22 10.02 9.64 9.65 8.58 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:39] "12" "13" "14" "15" ...
..$ : chr [1:3] "crabHammer" "eggBomb" "poisonFang"
median(kMat[, "eggBomb"])
[1] 6.757
apply(kMat, 2, median)
crabHammer eggBomb poisonFang
9.611 6.757 7.350
apply(kMat, 1, min)
12 13 14 15 9 10 11 28 29 30 31 24
7.370 6.890 6.720 6.529 6.470 7.005 6.735 6.587 6.170 6.870 6.800 6.138
25 26 27 36 37 38 39 32 33 34 35 20
6.166 6.269 6.264 6.530 7.100 6.269 6.211 6.286 6.347 6.270 6.188 7.005
21 22 23 16 17 18 19 5 6 7 8 1
7.082 6.757 6.155 7.228 7.226 7.363 7.081 6.993 6.992 6.608 7.003 6.981
2 3 4
7.165 7.075 6.558
colnames(kMat)[apply(kMat, 1, which.min)]
[1] "poisonFang" "eggBomb" "eggBomb" "eggBomb" "eggBomb"
[6] "poisonFang" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[11] "eggBomb" "eggBomb" "eggBomb" "eggBomb" "eggBomb"
[16] "eggBomb" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[21] "eggBomb" "eggBomb" "eggBomb" "poisonFang" "eggBomb"
[26] "eggBomb" "eggBomb" "eggBomb" "eggBomb" "poisonFang"
[31] "eggBomb" "poisonFang" "eggBomb" "eggBomb" "eggBomb"
[36] "poisonFang" "eggBomb" "poisonFang" "eggBomb"
#aggregate() stuff
aggregate(eggBomb ~ devStage, kDat, FUN = mean)
devStage eggBomb
1 E16 6.879
2 P2 6.408
3 P6 6.459
4 P10 7.143
5 4_weeks 7.063
aggregate(eggBomb ~ gType * devStage, kDat, FUN = mean)
gType devStage eggBomb
1 wt E16 6.900
2 NrlKO E16 6.851
3 wt P2 6.607
4 NrlKO P2 6.209
5 wt P6 6.646
6 NrlKO P6 6.273
7 wt P10 7.042
8 NrlKO P10 7.243
9 wt 4_weeks 7.117
10 NrlKO 4_weeks 7.008
#two sample test
keepGenes <- c("1431708_a_at", "1424336_at", "1454696_at","1416119_at", "1432141_x_at", "1429226_at" )
miniDat <- subset(prDat, rownames(prDat) %in% keepGenes)
miniDat <- data.frame(gExp = as.vector(t(as.matrix(miniDat))),
gene = factor(rep(rownames(miniDat), each = ncol(miniDat)),
levels = keepGenes))
miniDat <- suppressWarnings(data.frame(prDes, miniDat))
str(miniDat)
'data.frame': 234 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 10.6 11 10.8 10.9 9.2 ...
$ gene : Factor w/ 6 levels "1431708_a_at",..: 4 4 4 4 4 4 4 4 4 4 ...
stripplot(gType ~ gExp | gene, miniDat, scales = list(x = list(relation = "free")), group = gType, auto.key = TRUE)
someDat <- droplevels(subset(miniDat, gene == keepGenes[1]))
t.test(gExp ~ gType, someDat)
Welch Two Sample t-test
data: gExp by gType
t = 9.838, df = 36.89, p-value = 7.381e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.570 2.384
sample estimates:
mean in group wt mean in group NrlKO
9.554 7.578
#plyr stuff
library(plyr)
d_ply(miniDat, ~ gene, function(x) t.test(gExp ~ gType, x), .print=TRUE)
Welch Two Sample t-test
data: gExp by gType
t = 9.838, df = 36.89, p-value = 7.381e-12
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.570 2.384
sample estimates:
mean in group wt mean in group NrlKO
9.554 7.578
Welch Two Sample t-test
data: gExp by gType
t = 9.061, df = 36.49, p-value = 7.146e-11
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.6239 0.9835
sample estimates:
mean in group wt mean in group NrlKO
7.670 6.867
Welch Two Sample t-test
data: gExp by gType
t = 8.077, df = 33.49, p-value = 2.278e-09
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.402 2.346
sample estimates:
mean in group wt mean in group NrlKO
12.85 10.97
Welch Two Sample t-test
data: gExp by gType
t = -0.184, df = 36.53, p-value = 0.8551
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5079 0.4234
sample estimates:
mean in group wt mean in group NrlKO
9.893 9.935
Welch Two Sample t-test
data: gExp by gType
t = -0.1324, df = 36.31, p-value = 0.8954
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2413 0.2118
sample estimates:
mean in group wt mean in group NrlKO
7.092 7.107
Welch Two Sample t-test
data: gExp by gType
t = 0.0983, df = 35.58, p-value = 0.9223
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.1019 0.1123
sample estimates:
mean in group wt mean in group NrlKO
6.551 6.546
ttRes <- dlply(miniDat, ~ gene, function(x) t.test(gExp ~ gType, x))
names(ttRes)
[1] "1431708_a_at" "1424336_at" "1454696_at" "1416119_at"
[5] "1432141_x_at" "1429226_at"
ttRes <- ddply(miniDat, ~ gene, function(z) {
zz <- t.test(gExp ~ gType, z)
round(c(tStat = zz$statistic, pVal = zz$p.value), 4)
})
ttRes
gene tStat.t pVal
1 1431708_a_at 9.8380 0.0000
2 1424336_at 9.0607 0.0000
3 1454696_at 8.0771 0.0000
4 1416119_at -0.1840 0.8551
5 1432141_x_at -0.1324 0.8954
6 1429226_at 0.0983 0.9223