Emulating SAS BY statement in R

by() in R

## Load survival package
library(survival)

## Load colon dataset
data(colon)

## Show first several rows
head(colon)
  id study      rx sex age obstruct perfor adhere nodes status differ extent surg node4 time etype
1  1     1 Lev+5FU   1  43        0      0      0     5      1      2      3    0     1 1521     2
2  1     1 Lev+5FU   1  43        0      0      0     5      1      2      3    0     1  968     1
3  2     1 Lev+5FU   1  63        0      0      0     1      0      2      3    0     0 3087     2
4  2     1 Lev+5FU   1  63        0      0      0     1      0      2      3    0     0 3087     1
5  3     1     Obs   0  71        0      0      1     7      1      2      2    0     1  963     2
6  3     1     Obs   0  71        0      0      1     7      1      2      2    0     1  542     1

## PROC MEANS DATA = COLON; BY RX; emulation
library(psych)
by(colon , colon$rx , FUN = describe)
colon$rx: Obs
         var   n    mean     sd median trimmed     mad min  max range  skew kurtosis    se
id         1 630  463.44 268.64  459.0  463.63  348.41   3  924   921 -0.01    -1.21 10.70
study      2 630    1.00   0.00    1.0    1.00    0.00   1    1     0   NaN      NaN  0.00
rx*        3 630    1.00   0.00    1.0    1.00    0.00   1    1     0   NaN      NaN  0.00
sex        4 630    0.53   0.50    1.0    0.53    0.00   0    1     1 -0.11    -1.99  0.02
age        5 630   59.45  11.96   60.0   60.29   11.86  18   85    67 -0.64     0.25  0.48
obstruct   6 630    0.20   0.40    0.0    0.12    0.00   0    1     1  1.50     0.24  0.02
perfor     7 630    0.03   0.17    0.0    0.00    0.00   0    1     1  5.65    29.92  0.01
adhere     8 630    0.15   0.36    0.0    0.06    0.00   0    1     1  1.96     1.86  0.01
nodes      9 624    3.79   3.73    2.0    3.03    1.48   0   27    27  2.43     8.03  0.15
status    10 630    0.55   0.50    1.0    0.56    0.00   0    1     1 -0.19    -1.97  0.02
differ    11 616    2.08   0.50    2.0    2.09    0.00   1    3     2  0.16     0.83  0.02
extent    12 630    2.89   0.52    3.0    2.94    0.00   1    4     3 -1.18     3.60  0.02
surg      13 630    0.29   0.45    0.0    0.24    0.00   0    1     1  0.93    -1.14  0.02
node4     14 630    0.28   0.45    0.0    0.22    0.00   0    1     1  1.00    -1.00  0.02
time      15 630 1440.61 929.15 1506.0 1433.18 1248.35  20 3214  3194 -0.03    -1.50 37.02
etype     16 630    1.50   0.50    1.5    1.50    0.74   1    2     1  0.00    -2.00  0.02
--------------------------------------------------------------------------------------- 
colon$rx: Lev
         var   n    mean     sd median trimmed     mad min  max range  skew kurtosis    se
id         1 620  470.57 270.65  462.0  470.52  339.52   7  929   922  0.02    -1.21 10.87
study      2 620    1.00   0.00    1.0    1.00    0.00   1    1     0   NaN      NaN  0.00
rx*        3 620    2.00   0.00    2.0    2.00    0.00   2    2     0   NaN      NaN  0.00
sex        4 620    0.57   0.50    1.0    0.59    0.00   0    1     1 -0.29    -1.92  0.02
age        5 620   60.11  11.64   61.0   60.78   11.86  27   83    56 -0.48    -0.29  0.47
obstruct   6 620    0.20   0.40    0.0    0.13    0.00   0    1     1  1.47     0.17  0.02
perfor     7 620    0.03   0.18    0.0    0.00    0.00   0    1     1  5.28    25.94  0.01
adhere     8 620    0.16   0.37    0.0    0.07    0.00   0    1     1  1.87     1.50  0.01
nodes      9 608    3.69   3.56    2.0    3.05    1.48   0   33    33  2.95    15.76  0.14
status    10 620    0.54   0.50    1.0    0.55    0.00   0    1     1 -0.15    -1.98  0.02
differ    11 600    2.02   0.52    2.0    2.03    0.00   1    3     2  0.03     0.69  0.02
extent    12 620    2.90   0.43    3.0    2.97    0.00   1    4     3 -1.27     4.65  0.02
surg      13 620    0.26   0.44    0.0    0.20    0.00   0    1     1  1.10    -0.78  0.02
node4     14 620    0.29   0.45    0.0    0.23    0.00   0    1     1  0.94    -1.12  0.02
time      15 620 1465.28 961.73 1562.5 1460.57 1348.42  19 3329  3310 -0.02    -1.54 38.62
etype     16 620    1.50   0.50    1.5    1.50    0.74   1    2     1  0.00    -2.00  0.02
--------------------------------------------------------------------------------------- 
colon$rx: Lev+5FU
         var   n    mean     sd median trimmed    mad min  max range  skew kurtosis    se
id         1 608  460.93 265.73  475.5  460.97 346.19   1  928   927 -0.01    -1.20 10.78
study      2 608    1.00   0.00    1.0    1.00   0.00   1    1     0   NaN      NaN  0.00
rx*        3 608    3.00   0.00    3.0    3.00   0.00   3    3     0   NaN      NaN  0.00
sex        4 608    0.46   0.50    0.0    0.45   0.00   0    1     1  0.14    -1.98  0.02
age        5 608   59.70  12.25   62.0   60.67  11.86  26   81    55 -0.63    -0.32  0.50
obstruct   6 608    0.18   0.38    0.0    0.10   0.00   0    1     1  1.68     0.83  0.02
perfor     7 608    0.03   0.16    0.0    0.00   0.00   0    1     1  5.90    32.91  0.01
adhere     8 608    0.13   0.33    0.0    0.04   0.00   0    1     1  2.22     2.92  0.01
nodes      9 590    3.49   3.41    2.0    2.77   1.48   1   24    23  2.43     7.57  0.14
status    10 608    0.40   0.49    0.0    0.37   0.00   0    1     1  0.42    -1.83  0.02
differ    11 596    2.08   0.52    2.0    2.10   0.00   1    3     2  0.11     0.54  0.02
extent    12 608    2.87   0.51    3.0    2.95   0.00   1    4     3 -1.76     4.94  0.02
surg      13 608    0.25   0.43    0.0    0.19   0.00   0    1     1  1.15    -0.67  0.02
node4     14 608    0.26   0.44    0.0    0.20   0.00   0    1     1  1.09    -0.81  0.02
time      15 608 1711.68 926.63 2066.0 1750.36 877.70   8 3309  3301 -0.44    -1.22 37.58
etype     16 608    1.50   0.50    1.5    1.50   0.74   1    2     1  0.00    -2.00  0.02

## PROC FREQ DATA = COLON; BY EXTENT; TABLE SURG*DIFFER; emulation
out.xtabs <- by(colon , colon$extent , FUN = function(DF) {

    xtabs( ~ surg + differ, data = DF)
})
out.xtabs
colon$extent: 1
    differ
surg  1  2  3
   0 10 20  2
   1  0  4  4
--------------------------------------------------------------------------------------- 
colon$extent: 2
    differ
surg   1   2   3
   0  16 116  18
   1   8  40   6
--------------------------------------------------------------------------------------- 
colon$extent: 3
    differ
surg   1   2   3
   0 112 786 180
   1  40 298  74
--------------------------------------------------------------------------------------- 
colon$extent: 4
    differ
surg  2  3
   0 52 12
   1 10  4

## Include total, row, column percentages
library(gmodels)
junk <- lapply(out.xtabs, CrossTable)


   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|


Total Observations in Table:  40 


             | differ 
        surg |         1 |         2 |         3 | Row Total | 
-------------|-----------|-----------|-----------|-----------|
           0 |        10 |        20 |         2 |        32 | 
             |     0.500 |     0.033 |     1.633 |           | 
             |     0.312 |     0.625 |     0.062 |     0.800 | 
             |     1.000 |     0.833 |     0.333 |           | 
             |     0.250 |     0.500 |     0.050 |           | 
-------------|-----------|-----------|-----------|-----------|
           1 |         0 |         4 |         4 |         8 | 
             |     2.000 |     0.133 |     6.533 |           | 
             |     0.000 |     0.500 |     0.500 |     0.200 | 
             |     0.000 |     0.167 |     0.667 |           | 
             |     0.000 |     0.100 |     0.100 |           | 
-------------|-----------|-----------|-----------|-----------|
Column Total |        10 |        24 |         6 |        40 | 
             |     0.250 |     0.600 |     0.150 |           | 
-------------|-----------|-----------|-----------|-----------|




   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|


Total Observations in Table:  204 


             | differ 
        surg |         1 |         2 |         3 | Row Total | 
-------------|-----------|-----------|-----------|-----------|
           0 |        16 |       116 |        18 |       150 | 
             |     0.154 |     0.015 |     0.007 |           | 
             |     0.107 |     0.773 |     0.120 |     0.735 | 
             |     0.667 |     0.744 |     0.750 |           | 
             |     0.078 |     0.569 |     0.088 |           | 
-------------|-----------|-----------|-----------|-----------|
           1 |         8 |        40 |         6 |        54 | 
             |     0.427 |     0.041 |     0.020 |           | 
             |     0.148 |     0.741 |     0.111 |     0.265 | 
             |     0.333 |     0.256 |     0.250 |           | 
             |     0.039 |     0.196 |     0.029 |           | 
-------------|-----------|-----------|-----------|-----------|
Column Total |        24 |       156 |        24 |       204 | 
             |     0.118 |     0.765 |     0.118 |           | 
-------------|-----------|-----------|-----------|-----------|




   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|


Total Observations in Table:  1490 


             | differ 
        surg |         1 |         2 |         3 | Row Total | 
-------------|-----------|-----------|-----------|-----------|
           0 |       112 |       786 |       180 |      1078 | 
             |     0.037 |     0.004 |     0.077 |           | 
             |     0.104 |     0.729 |     0.167 |     0.723 | 
             |     0.737 |     0.725 |     0.709 |           | 
             |     0.075 |     0.528 |     0.121 |           | 
-------------|-----------|-----------|-----------|-----------|
           1 |        40 |       298 |        74 |       412 | 
             |     0.098 |     0.010 |     0.202 |           | 
             |     0.097 |     0.723 |     0.180 |     0.277 | 
             |     0.263 |     0.275 |     0.291 |           | 
             |     0.027 |     0.200 |     0.050 |           | 
-------------|-----------|-----------|-----------|-----------|
Column Total |       152 |      1084 |       254 |      1490 | 
             |     0.102 |     0.728 |     0.170 |           | 
-------------|-----------|-----------|-----------|-----------|




   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|


Total Observations in Table:  78 


             | differ 
        surg |         2 |         3 | Row Total | 
-------------|-----------|-----------|-----------|
           0 |        52 |        12 |        64 | 
             |     0.025 |     0.097 |           | 
             |     0.812 |     0.188 |     0.821 | 
             |     0.839 |     0.750 |           | 
             |     0.667 |     0.154 |           | 
-------------|-----------|-----------|-----------|
           1 |        10 |         4 |        14 | 
             |     0.114 |     0.443 |           | 
             |     0.714 |     0.286 |     0.179 | 
             |     0.161 |     0.250 |           | 
             |     0.128 |     0.051 |           | 
-------------|-----------|-----------|-----------|
Column Total |        62 |        16 |        78 | 
             |     0.795 |     0.205 |           | 
-------------|-----------|-----------|-----------|