Creating multiple xtabs for different row variables with the same row variable

Format RMarkdown

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, echo = TRUE, 
    tidy = FALSE, fig.width = 8, fig.height = 7)
options(width = 116, scipen = 10)

Background

It is often necessary to perform multiple cross table based tests, such as chi-squared test and Fisher exact test, on different variables (sex, smoking status,) between the same groups (treatment arm vs placebo arm).

## Load survival package
library(survival)
## Load chronic granulomatous disease dataset
data(cgd)
## Check data
head(cgd)

  id            center     random   treat    sex age height weight   inherit steroids propylac  hos.cat tstart enum
1  1 Scripps Institute 1989-06-07  rIFN-g female  12    147   62.0 autosomal        0        0 US:other      0    1
2  1 Scripps Institute 1989-06-07  rIFN-g female  12    147   62.0 autosomal        0        0 US:other    219    2
3  1 Scripps Institute 1989-06-07  rIFN-g female  12    147   62.0 autosomal        0        0 US:other    373    3
4  2 Scripps Institute 1989-06-07 placebo   male  15    159   47.5 autosomal        0        1 US:other      0    1
5  2 Scripps Institute 1989-06-07 placebo   male  15    159   47.5 autosomal        0        1 US:other      8    2
6  2 Scripps Institute 1989-06-07 placebo   male  15    159   47.5 autosomal        0        1 US:other     26    3
  tstop status
1   219      1
2   373      1
3   414      0
4     8      1
5    26      1
6   152      1

One-liner solution

Slight modification of codes kindly provided by Ura R-jp Wiki, a “critique of the literature” site for R codes (http://blog.goo.ne.jp/r-de-r). This is seemingly the quickest way.

## Create a list of cross tables.
list.of.xtabs <- lapply(cgd[,c("center","sex","inherit")], function(x) xtabs(~ x + cgd$treat))
list.of.xtabs

$center
                       cgd$treat
x                       placebo rIFN-g
  Harvard Medical Sch         3      1
  Scripps Institute          22     14
  Copenhagen                  4      1
  NIH                        20     21
  L.A. Children's Hosp        8      5
  Mott Children's Hosp       13      7
  Univ. of Utah               2      3
  Univ. of Washington         2      2
  Univ. of Minnesota          7      3
  Univ. of Zurich            13      8
  Texas Children's Hosp       7      4
  Amsterdam                  16     12
  Mt. Sinai Medical Ctr       3      2

$sex
        cgd$treat
x        placebo rIFN-g
  male       100     68
  female      20     15

$inherit
           cgd$treat
x           placebo rIFN-g
  X-linked       74     57
  autosomal      46     26


## Perform whatever you like now using another lapply()
## Adding margin sums
lapply(list.of.xtabs, addmargins)

$center
                       cgd$treat
x                       placebo rIFN-g Sum
  Harvard Medical Sch         3      1   4
  Scripps Institute          22     14  36
  Copenhagen                  4      1   5
  NIH                        20     21  41
  L.A. Children's Hosp        8      5  13
  Mott Children's Hosp       13      7  20
  Univ. of Utah               2      3   5
  Univ. of Washington         2      2   4
  Univ. of Minnesota          7      3  10
  Univ. of Zurich            13      8  21
  Texas Children's Hosp       7      4  11
  Amsterdam                  16     12  28
  Mt. Sinai Medical Ctr       3      2   5
  Sum                       120     83 203

$sex
        cgd$treat
x        placebo rIFN-g Sum
  male       100     68 168
  female      20     15  35
  Sum        120     83 203

$inherit
           cgd$treat
x           placebo rIFN-g Sum
  X-linked       74     57 131
  autosomal      46     26  72
  Sum           120     83 203


## Construct column percentage table (use margin = 2)
lapply(list.of.xtabs, prop.table, margin = 2)

$center
                       cgd$treat
x                       placebo  rIFN-g
  Harvard Medical Sch   0.02500 0.01205
  Scripps Institute     0.18333 0.16867
  Copenhagen            0.03333 0.01205
  NIH                   0.16667 0.25301
  L.A. Children's Hosp  0.06667 0.06024
  Mott Children's Hosp  0.10833 0.08434
  Univ. of Utah         0.01667 0.03614
  Univ. of Washington   0.01667 0.02410
  Univ. of Minnesota    0.05833 0.03614
  Univ. of Zurich       0.10833 0.09639
  Texas Children's Hosp 0.05833 0.04819
  Amsterdam             0.13333 0.14458
  Mt. Sinai Medical Ctr 0.02500 0.02410

$sex
        cgd$treat
x        placebo rIFN-g
  male    0.8333 0.8193
  female  0.1667 0.1807

$inherit
           cgd$treat
x           placebo rIFN-g
  X-linked   0.6167 0.6867
  autosomal  0.3833 0.3133


## Perform Fisher's exact test
res.fisher <- lapply(list.of.xtabs, fisher.test)
res.fisher

$center

    Fisher's Exact Test for Count Data

data:  X[[1L]] 
p-value = 0.9668
alternative hypothesis: two.sided 


$sex

    Fisher's Exact Test for Count Data

data:  X[[2L]] 
p-value = 0.851
alternative hypothesis: true odds ratio is not equal to 1 
95 percent confidence interval:
 0.488 2.447 
sample estimates:
odds ratio 
     1.102 


$inherit

    Fisher's Exact Test for Count Data

data:  X[[3L]] 
p-value = 0.3709
alternative hypothesis: true odds ratio is not equal to 1 
95 percent confidence interval:
 0.387 1.379 
sample estimates:
odds ratio 
    0.7349


## Extract p-values
sapply(res.fisher, "[[", "p.value")

 center     sex inherit 
 0.9668  0.8510  0.3709

Define a wrapper function for xtabs

This is more complex, but it gives more flexibility thanks to the power of formulae in R. It also gives meaningful row headers (center, sex, and inherit here).

multi.xtabs <- function(df, vars, group.var, ...) {
    sapply(simplify = FALSE,
           vars,                                                        # Loop for each element of vars
           function(var) {
               formula <- as.formula(paste("~", var, "+", group.var))   # Create formula
               xtabs(data = df, formula, ...)                           # Give formula to xtabs
           }
           )
}

Same exaples as the one-liners

## Create a list of cross tables
list.of.xtabs <- multi.xtabs(df = cgd, vars = c("center","sex","inherit"), group.var = "treat")
list.of.xtabs

$center
                       treat
center                  placebo rIFN-g
  Harvard Medical Sch         3      1
  Scripps Institute          22     14
  Copenhagen                  4      1
  NIH                        20     21
  L.A. Children's Hosp        8      5
  Mott Children's Hosp       13      7
  Univ. of Utah               2      3
  Univ. of Washington         2      2
  Univ. of Minnesota          7      3
  Univ. of Zurich            13      8
  Texas Children's Hosp       7      4
  Amsterdam                  16     12
  Mt. Sinai Medical Ctr       3      2

$sex
        treat
sex      placebo rIFN-g
  male       100     68
  female      20     15

$inherit
           treat
inherit     placebo rIFN-g
  X-linked       74     57
  autosomal      46     26


## Perform whatever you like now using another lapply()
## Adding margin sums
lapply(list.of.xtabs, addmargins)

$center
                       treat
center                  placebo rIFN-g Sum
  Harvard Medical Sch         3      1   4
  Scripps Institute          22     14  36
  Copenhagen                  4      1   5
  NIH                        20     21  41
  L.A. Children's Hosp        8      5  13
  Mott Children's Hosp       13      7  20
  Univ. of Utah               2      3   5
  Univ. of Washington         2      2   4
  Univ. of Minnesota          7      3  10
  Univ. of Zurich            13      8  21
  Texas Children's Hosp       7      4  11
  Amsterdam                  16     12  28
  Mt. Sinai Medical Ctr       3      2   5
  Sum                       120     83 203

$sex
        treat
sex      placebo rIFN-g Sum
  male       100     68 168
  female      20     15  35
  Sum        120     83 203

$inherit
           treat
inherit     placebo rIFN-g Sum
  X-linked       74     57 131
  autosomal      46     26  72
  Sum           120     83 203


## Construct column percentage table (use margin = 2)
lapply(list.of.xtabs, prop.table, margin = 2)

$center
                       treat
center                  placebo  rIFN-g
  Harvard Medical Sch   0.02500 0.01205
  Scripps Institute     0.18333 0.16867
  Copenhagen            0.03333 0.01205
  NIH                   0.16667 0.25301
  L.A. Children's Hosp  0.06667 0.06024
  Mott Children's Hosp  0.10833 0.08434
  Univ. of Utah         0.01667 0.03614
  Univ. of Washington   0.01667 0.02410
  Univ. of Minnesota    0.05833 0.03614
  Univ. of Zurich       0.10833 0.09639
  Texas Children's Hosp 0.05833 0.04819
  Amsterdam             0.13333 0.14458
  Mt. Sinai Medical Ctr 0.02500 0.02410

$sex
        treat
sex      placebo rIFN-g
  male    0.8333 0.8193
  female  0.1667 0.1807

$inherit
           treat
inherit     placebo rIFN-g
  X-linked   0.6167 0.6867
  autosomal  0.3833 0.3133


## Perform Fisher's exact test
res.fisher <- lapply(list.of.xtabs, fisher.test)
res.fisher

$center

    Fisher's Exact Test for Count Data

data:  X[[1L]] 
p-value = 0.9668
alternative hypothesis: two.sided 


$sex

    Fisher's Exact Test for Count Data

data:  X[[2L]] 
p-value = 0.851
alternative hypothesis: true odds ratio is not equal to 1 
95 percent confidence interval:
 0.488 2.447 
sample estimates:
odds ratio 
     1.102 


$inherit

    Fisher's Exact Test for Count Data

data:  X[[3L]] 
p-value = 0.3709
alternative hypothesis: true odds ratio is not equal to 1 
95 percent confidence interval:
 0.387 1.379 
sample estimates:
odds ratio 
    0.7349


## Extract p-values
sapply(res.fisher, "[[", "p.value")

 center     sex inherit 
 0.9668  0.8510  0.3709

Flexibility in action

It is possible to create dichotomized variables on the fly.

## Dichotomize age, height, and weight at given cutoffs.
list.of.xtabs <- multi.xtabs(df = cgd, vars = c("(age > 8)","(height > 120)","(weight > 60)"), group.var = "treat")
list.of.xtabs

$`(age > 8)`
       treat
age > 8 placebo rIFN-g
  FALSE      49     28
  TRUE       71     55

$`(height > 120)`
            treat
height > 120 placebo rIFN-g
       FALSE      45     22
       TRUE       75     61

$`(weight > 60)`
           treat
weight > 60 placebo rIFN-g
      FALSE      89     66
      TRUE       31     17

To my surprise, as.formula() can interpret cut() function given as a string although the output is rather untidy.

## cut(weight, breaks = c(-Inf,60,80,90,Inf)) is given as a string.
multi.xtabs(df = cgd, vars = c("(age > 8)","cut(weight, breaks = c(-Inf,60,80,90,Inf))"), group.var = "treat")

$`(age > 8)`
       treat
age > 8 placebo rIFN-g
  FALSE      49     28
  TRUE       71     55

$`cut(weight, breaks = c(-Inf,60,80,90,Inf))`
                                              treat
cut(weight, breaks = c(-Inf, 60, 80, 90, Inf)) placebo rIFN-g
                                     (-Inf,60]      89     66
                                     (60,80]        26     16
                                     (80,90]         3      0
                                     (90, Inf]       2      1


## The proper way to do it provides more readable output
## Create categorical variable from a continuous variable
cgd$weight.cut <- cut(cgd$weight, breaks = c(-Inf,60,80,90,Inf))

## Now perform action on the newly created categorical variable
multi.xtabs(df = cgd, vars = c("(age > 8)","weight.cut"), group.var = "treat")

$`(age > 8)`
       treat
age > 8 placebo rIFN-g
  FALSE      49     28
  TRUE       71     55

$weight.cut
           treat
weight.cut  placebo rIFN-g
  (-Inf,60]      89     66
  (60,80]        26     16
  (80,90]         3      0
  (90, Inf]       2      1