group summaries
References
Load packages
library(dplyr)
library(tableone)
Load data
dat <- read.table(header = TRUE, text = "
col1 col2 var1-mean var2-mean var3-sd var4-sd
1 1 0.1 0.12 0.54 0.33
1 1 0.102 0.13 0.45 0.30
1 2 0.1 0.12 0.54 0.33
1 2 0.102 0.13 0.45 0.30
2 1 0.1 0.12 0.54 0.33
2 1 0.102 0.13 0.45 0.30
2 2 0.1 0.12 0.54 0.33
2 2 0.102 0.13 0.45 0.30
")
TODO
Calculate the mean of every variable by col1 and col2
mean of means = sum of means divided by n
mean of sds = sum of squares of sds, square root of that?
for example:
col1 col2 var1-mean var2-mean var3-sd var4-sd
1 1 1.01 1.125 0.63 etc
1 2 .. .. .. ..
2 1
etc
dplyr solution
resSummaryized <- dat %.%
group_by(col1, col2) %.%
summarize(var1.mean = mean(var1.mean),
var2.mean = mean(var2.mean),
var3.sd = mean(var3.sd ),
var4.sd = mean(var4.sd ))
resSummaryized
## Source: local data frame [4 x 6]
## Groups: col1
##
## col1 col2 var1.mean var2.mean var3.sd var4.sd
## 1 1 1 0.101 0.125 0.495 0.315
## 2 1 2 0.101 0.125 0.495 0.315
## 3 2 1 0.101 0.125 0.495 0.315
## 4 2 2 0.101 0.125 0.495 0.315
tableone solution
tab1 <- CreateTableOne(c("var1.mean", "var2.mean", "var3.sd", "var4.sd"),c("col1","col2"),data = dat)
## NOTE: no factor/logical/character variables supplied, using CreateContTable()
print(tab1, digits = 3)
## Stratified by col1:col2
## 1:1 2:1 1:2 2:2 p test
## n 2 2 2 2
## var1.mean (mean (sd)) 0.101 (0.001) 0.101 (0.001) 0.101 (0.001) 0.101 (0.001) 1.000
## var2.mean (mean (sd)) 0.125 (0.007) 0.125 (0.007) 0.125 (0.007) 0.125 (0.007) 1.000
## var3.sd (mean (sd)) 0.495 (0.064) 0.495 (0.064) 0.495 (0.064) 0.495 (0.064) 1.000
## var4.sd (mean (sd)) 0.315 (0.021) 0.315 (0.021) 0.315 (0.021) 0.315 (0.021) 1.000
summary(tab1)
## col1: 1
## col2: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## var1.mean 2 0 0 0.10 0.0014 0.10 0.10 0.10 0.10 0.10 NaN NaN
## var2.mean 2 0 0 0.12 0.0071 0.12 0.12 0.13 0.12 0.13 NaN NaN
## var3.sd 2 0 0 0.49 0.0636 0.49 0.47 0.52 0.45 0.54 NaN NaN
## var4.sd 2 0 0 0.32 0.0212 0.32 0.31 0.32 0.30 0.33 NaN NaN
## ---------------------------------------------------------------------------------------
## col1: 2
## col2: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## var1.mean 2 0 0 0.10 0.0014 0.10 0.10 0.10 0.10 0.10 NaN NaN
## var2.mean 2 0 0 0.12 0.0071 0.12 0.12 0.13 0.12 0.13 NaN NaN
## var3.sd 2 0 0 0.49 0.0636 0.49 0.47 0.52 0.45 0.54 NaN NaN
## var4.sd 2 0 0 0.32 0.0212 0.32 0.31 0.32 0.30 0.33 NaN NaN
## ---------------------------------------------------------------------------------------
## col1: 1
## col2: 2
## n miss p.miss mean sd median p25 p75 min max skew kurt
## var1.mean 2 0 0 0.10 0.0014 0.10 0.10 0.10 0.10 0.10 NaN NaN
## var2.mean 2 0 0 0.12 0.0071 0.12 0.12 0.13 0.12 0.13 NaN NaN
## var3.sd 2 0 0 0.49 0.0636 0.49 0.47 0.52 0.45 0.54 NaN NaN
## var4.sd 2 0 0 0.32 0.0212 0.32 0.31 0.32 0.30 0.33 NaN NaN
## ---------------------------------------------------------------------------------------
## col1: 2
## col2: 2
## n miss p.miss mean sd median p25 p75 min max skew kurt
## var1.mean 2 0 0 0.10 0.0014 0.10 0.10 0.10 0.10 0.10 NaN NaN
## var2.mean 2 0 0 0.12 0.0071 0.12 0.12 0.13 0.12 0.13 NaN NaN
## var3.sd 2 0 0 0.49 0.0636 0.49 0.47 0.52 0.45 0.54 NaN NaN
## var4.sd 2 0 0 0.32 0.0212 0.32 0.31 0.32 0.30 0.33 NaN NaN
##
## p-values
## pNormal pNonNormal
## var1.mean 1 1
## var2.mean 1 1
## var3.sd 1 1
## var4.sd 1 1