Format RMarkdown
## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE,
tidy = FALSE, fig.width = 8, fig.height = 7)
options(width = 116, scipen = 10)
It is often necessary to perform multiple t-tests (or Wilcoxon test) on different variables (age, height, weight, duration or disease) between the same groups (treatment arm vs placebo arm). However, to my knowledge, there is no function to aid this.
Ura R-jp Wiki, a “critique of the literature” site for R codes (http://blog.goo.ne.jp/r-de-r), kindly provided much simpler solutions.
library(survival)
data(kidney)
## Multiple t-tests for time, age, frail between genders.
lapply(kidney[,c("time", "age", "frail")], function(x) t.test(x ~ kidney$sex, var.equal = TRUE))
$time
Two Sample t-test
data: x by kidney$sex
t = -1.706, df = 74, p-value = 0.09221
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-124.551 9.651
sample estimates:
mean in group 1 mean in group 2
59.3 116.8
$age
Two Sample t-test
data: x by kidney$sex
t = -0.0693, df = 74, p-value = 0.9449
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-7.970 7.434
sample estimates:
mean in group 1 mean in group 2
43.50 43.77
$frail
Two Sample t-test
data: x by kidney$sex
t = 0.9503, df = 74, p-value = 0.345
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.1872 0.5287
sample estimates:
mean in group 1 mean in group 2
1.310 1.139
## Multiple Wilcoxon rank sum tests for time, age, frail between genders.
lapply(kidney[,c("time", "age", "frail")], function(x) wilcox.test(x ~ kidney$sex))
$time
Wilcoxon rank sum test with continuity correction
data: x by kidney$sex
W = 308.5, p-value = 0.003062
alternative hypothesis: true location shift is not equal to 0
$age
Wilcoxon rank sum test with continuity correction
data: x by kidney$sex
W = 563.5, p-value = 0.9717
alternative hypothesis: true location shift is not equal to 0
$frail
Wilcoxon rank sum test with continuity correction
data: x by kidney$sex
W = 618, p-value = 0.4965
alternative hypothesis: true location shift is not equal to 0
## Multiple ANOVA for time, age, frail between diseases.
lapply(kidney[,c("time", "age", "frail")], function(x) anova(lm(x ~ kidney$disease)))
$time
Analysis of Variance Table
Response: x
Df Sum Sq Mean Sq F value Pr(>F)
kidney$disease 3 53745 17915 1.05 0.38
Residuals 72 1231643 17106
$age
Analysis of Variance Table
Response: x
Df Sum Sq Mean Sq F value Pr(>F)
kidney$disease 3 6152 2051 14.6 0.00000017 ***
Residuals 72 10144 141
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$frail
Analysis of Variance Table
Response: x
Df Sum Sq Mean Sq F value Pr(>F)
kidney$disease 3 1.8 0.603 1.28 0.29
Residuals 72 33.8 0.470
## Multiple Kruskal-Wallis tests for time, age, frail between diseases.
lapply(kidney[,c("time", "age", "frail")], function(x) kruskal.test(x ~ kidney$disease))
$time
Kruskal-Wallis rank sum test
data: x by kidney$disease
Kruskal-Wallis chi-squared = 1.186, df = 3, p-value = 0.7563
$age
Kruskal-Wallis rank sum test
data: x by kidney$disease
Kruskal-Wallis chi-squared = 28.45, df = 3, p-value = 0.000002917
$frail
Kruskal-Wallis rank sum test
data: x by kidney$disease
Kruskal-Wallis chi-squared = 1.234, df = 3, p-value = 0.7449
This is more complex, but one can get what comparison was done within the output, e.g., “time by disease ”.
multi.tests <- function(fun = t.test, df, vars, group.var, ...) {
sapply(simplify = FALSE, # sapply(simplify=T) better, elements named
vars, # loop on vector of outcome variable names
function(var) {
formula <- as.formula(paste(var, "~", group.var))# create a formula with outcome and grouping var.
fun(data = df, formula, ...) # perform test with a given fun, default t.test
}
)
}
library(survival)
data(kidney)
head(kidney)
id time status age sex disease frail
1 1 8 1 28 1 Other 2.3
2 1 16 1 28 1 Other 2.3
3 2 23 1 48 2 GN 1.9
4 2 13 0 48 2 GN 1.9
5 3 22 1 32 1 Other 1.2
6 3 28 1 32 1 Other 1.2
str(kidney)
'data.frame': 76 obs. of 7 variables:
$ id : num 1 1 2 2 3 3 4 4 5 5 ...
$ time : num 8 16 23 13 22 28 447 318 30 12 ...
$ status : num 1 1 1 0 1 1 1 1 1 1 ...
$ age : num 28 28 48 48 32 32 31 32 10 10 ...
$ sex : num 1 1 2 2 1 1 2 2 1 1 ...
$ disease: Factor w/ 4 levels "Other","GN","AN",..: 1 1 2 2 1 1 1 1 1 1 ...
$ frail : num 2.3 2.3 1.9 1.9 1.2 1.2 0.5 0.5 1.5 1.5 ...
## Variable name extractor
ListVariableNames <- function(DATA) {
cat('"', noquote(paste(names(DATA), collapse='","')), '"\n', sep='')
}
ListVariableNames(kidney)
"id","time","status","age","sex","disease","frail"
res.multi.t.tests <-
multi.tests(fun = t.test,
df = kidney,
vars = c("time","age","frail"),
group.var = "sex",
var.equal = TRUE)
res.multi.t.tests
$time
Two Sample t-test
data: time by sex
t = -1.706, df = 74, p-value = 0.09221
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-124.551 9.651
sample estimates:
mean in group 1 mean in group 2
59.3 116.8
$age
Two Sample t-test
data: age by sex
t = -0.0693, df = 74, p-value = 0.9449
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-7.970 7.434
sample estimates:
mean in group 1 mean in group 2
43.50 43.77
$frail
Two Sample t-test
data: frail by sex
t = 0.9503, df = 74, p-value = 0.345
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.1872 0.5287
sample estimates:
mean in group 1 mean in group 2
1.310 1.139
## p-values can be extracted from the result object
data.frame(p.value = sapply(res.multi.t.tests, getElement, name = "p.value"))
p.value
time 0.09221
age 0.94494
frail 0.34504
res.multi.wilcox.tests <-
multi.tests(fun = wilcox.test,
df = kidney,
vars = c("time","age","frail"),
group.var = "sex")
res.multi.wilcox.tests
$time
Wilcoxon rank sum test with continuity correction
data: time by sex
W = 308.5, p-value = 0.003062
alternative hypothesis: true location shift is not equal to 0
$age
Wilcoxon rank sum test with continuity correction
data: age by sex
W = 563.5, p-value = 0.9717
alternative hypothesis: true location shift is not equal to 0
$frail
Wilcoxon rank sum test with continuity correction
data: frail by sex
W = 618, p-value = 0.4965
alternative hypothesis: true location shift is not equal to 0
res.multi.anova <-
multi.tests(fun = oneway.test,
df = kidney,
vars = c("time","age","frail"),
group.var = "disease",
var.equal = TRUE)
res.multi.anova
$time
One-way analysis of means
data: time and disease
F = 1.047, num df = 3, denom df = 72, p-value = 0.377
$age
One-way analysis of means
data: age and disease
F = 14.55, num df = 3, denom df = 72, p-value = 0.0000001666
$frail
One-way analysis of means
data: frail and disease
F = 1.285, num df = 3, denom df = 72, p-value = 0.2862
res.multi.kruskal.tests <-
multi.tests(fun = kruskal.test,
df = kidney,
vars = c("time","age","frail"),
group.var = "disease")
res.multi.kruskal.tests
$time
Kruskal-Wallis rank sum test
data: time by disease
Kruskal-Wallis chi-squared = 1.186, df = 3, p-value = 0.7563
$age
Kruskal-Wallis rank sum test
data: age by disease
Kruskal-Wallis chi-squared = 28.45, df = 3, p-value = 0.000002917
$frail
Kruskal-Wallis rank sum test
data: frail by disease
Kruskal-Wallis chi-squared = 1.234, df = 3, p-value = 0.7449
Any function that accepts, continuous variable ~ categorical variable formula works
junk <- multi.tests(fun = plot,
df = kidney,
vars = c("time","age","frail"),
group.var = "disease")