Emulating SAS BY statement in R
by() in R
## Load survival package
library(survival)
## Load colon dataset
data(colon)
## Show first several rows
head(colon)
id study rx sex age obstruct perfor adhere nodes status differ extent surg node4 time etype
1 1 1 Lev+5FU 1 43 0 0 0 5 1 2 3 0 1 1521 2
2 1 1 Lev+5FU 1 43 0 0 0 5 1 2 3 0 1 968 1
3 2 1 Lev+5FU 1 63 0 0 0 1 0 2 3 0 0 3087 2
4 2 1 Lev+5FU 1 63 0 0 0 1 0 2 3 0 0 3087 1
5 3 1 Obs 0 71 0 0 1 7 1 2 2 0 1 963 2
6 3 1 Obs 0 71 0 0 1 7 1 2 2 0 1 542 1
## PROC MEANS DATA = COLON; BY RX; emulation
library(psych)
by(colon , colon$rx , FUN = describe)
colon$rx: Obs
var n mean sd median trimmed mad min max range skew kurtosis se
id 1 630 463.44 268.64 459.0 463.63 348.41 3 924 921 -0.01 -1.21 10.70
study 2 630 1.00 0.00 1.0 1.00 0.00 1 1 0 NaN NaN 0.00
rx* 3 630 1.00 0.00 1.0 1.00 0.00 1 1 0 NaN NaN 0.00
sex 4 630 0.53 0.50 1.0 0.53 0.00 0 1 1 -0.11 -1.99 0.02
age 5 630 59.45 11.96 60.0 60.29 11.86 18 85 67 -0.64 0.25 0.48
obstruct 6 630 0.20 0.40 0.0 0.12 0.00 0 1 1 1.50 0.24 0.02
perfor 7 630 0.03 0.17 0.0 0.00 0.00 0 1 1 5.65 29.92 0.01
adhere 8 630 0.15 0.36 0.0 0.06 0.00 0 1 1 1.96 1.86 0.01
nodes 9 624 3.79 3.73 2.0 3.03 1.48 0 27 27 2.43 8.03 0.15
status 10 630 0.55 0.50 1.0 0.56 0.00 0 1 1 -0.19 -1.97 0.02
differ 11 616 2.08 0.50 2.0 2.09 0.00 1 3 2 0.16 0.83 0.02
extent 12 630 2.89 0.52 3.0 2.94 0.00 1 4 3 -1.18 3.60 0.02
surg 13 630 0.29 0.45 0.0 0.24 0.00 0 1 1 0.93 -1.14 0.02
node4 14 630 0.28 0.45 0.0 0.22 0.00 0 1 1 1.00 -1.00 0.02
time 15 630 1440.61 929.15 1506.0 1433.18 1248.35 20 3214 3194 -0.03 -1.50 37.02
etype 16 630 1.50 0.50 1.5 1.50 0.74 1 2 1 0.00 -2.00 0.02
---------------------------------------------------------------------------------------
colon$rx: Lev
var n mean sd median trimmed mad min max range skew kurtosis se
id 1 620 470.57 270.65 462.0 470.52 339.52 7 929 922 0.02 -1.21 10.87
study 2 620 1.00 0.00 1.0 1.00 0.00 1 1 0 NaN NaN 0.00
rx* 3 620 2.00 0.00 2.0 2.00 0.00 2 2 0 NaN NaN 0.00
sex 4 620 0.57 0.50 1.0 0.59 0.00 0 1 1 -0.29 -1.92 0.02
age 5 620 60.11 11.64 61.0 60.78 11.86 27 83 56 -0.48 -0.29 0.47
obstruct 6 620 0.20 0.40 0.0 0.13 0.00 0 1 1 1.47 0.17 0.02
perfor 7 620 0.03 0.18 0.0 0.00 0.00 0 1 1 5.28 25.94 0.01
adhere 8 620 0.16 0.37 0.0 0.07 0.00 0 1 1 1.87 1.50 0.01
nodes 9 608 3.69 3.56 2.0 3.05 1.48 0 33 33 2.95 15.76 0.14
status 10 620 0.54 0.50 1.0 0.55 0.00 0 1 1 -0.15 -1.98 0.02
differ 11 600 2.02 0.52 2.0 2.03 0.00 1 3 2 0.03 0.69 0.02
extent 12 620 2.90 0.43 3.0 2.97 0.00 1 4 3 -1.27 4.65 0.02
surg 13 620 0.26 0.44 0.0 0.20 0.00 0 1 1 1.10 -0.78 0.02
node4 14 620 0.29 0.45 0.0 0.23 0.00 0 1 1 0.94 -1.12 0.02
time 15 620 1465.28 961.73 1562.5 1460.57 1348.42 19 3329 3310 -0.02 -1.54 38.62
etype 16 620 1.50 0.50 1.5 1.50 0.74 1 2 1 0.00 -2.00 0.02
---------------------------------------------------------------------------------------
colon$rx: Lev+5FU
var n mean sd median trimmed mad min max range skew kurtosis se
id 1 608 460.93 265.73 475.5 460.97 346.19 1 928 927 -0.01 -1.20 10.78
study 2 608 1.00 0.00 1.0 1.00 0.00 1 1 0 NaN NaN 0.00
rx* 3 608 3.00 0.00 3.0 3.00 0.00 3 3 0 NaN NaN 0.00
sex 4 608 0.46 0.50 0.0 0.45 0.00 0 1 1 0.14 -1.98 0.02
age 5 608 59.70 12.25 62.0 60.67 11.86 26 81 55 -0.63 -0.32 0.50
obstruct 6 608 0.18 0.38 0.0 0.10 0.00 0 1 1 1.68 0.83 0.02
perfor 7 608 0.03 0.16 0.0 0.00 0.00 0 1 1 5.90 32.91 0.01
adhere 8 608 0.13 0.33 0.0 0.04 0.00 0 1 1 2.22 2.92 0.01
nodes 9 590 3.49 3.41 2.0 2.77 1.48 1 24 23 2.43 7.57 0.14
status 10 608 0.40 0.49 0.0 0.37 0.00 0 1 1 0.42 -1.83 0.02
differ 11 596 2.08 0.52 2.0 2.10 0.00 1 3 2 0.11 0.54 0.02
extent 12 608 2.87 0.51 3.0 2.95 0.00 1 4 3 -1.76 4.94 0.02
surg 13 608 0.25 0.43 0.0 0.19 0.00 0 1 1 1.15 -0.67 0.02
node4 14 608 0.26 0.44 0.0 0.20 0.00 0 1 1 1.09 -0.81 0.02
time 15 608 1711.68 926.63 2066.0 1750.36 877.70 8 3309 3301 -0.44 -1.22 37.58
etype 16 608 1.50 0.50 1.5 1.50 0.74 1 2 1 0.00 -2.00 0.02
## PROC FREQ DATA = COLON; BY EXTENT; TABLE SURG*DIFFER; emulation
out.xtabs <- by(colon , colon$extent , FUN = function(DF) {
xtabs( ~ surg + differ, data = DF)
})
out.xtabs
colon$extent: 1
differ
surg 1 2 3
0 10 20 2
1 0 4 4
---------------------------------------------------------------------------------------
colon$extent: 2
differ
surg 1 2 3
0 16 116 18
1 8 40 6
---------------------------------------------------------------------------------------
colon$extent: 3
differ
surg 1 2 3
0 112 786 180
1 40 298 74
---------------------------------------------------------------------------------------
colon$extent: 4
differ
surg 2 3
0 52 12
1 10 4
## Include total, row, column percentages
library(gmodels)
junk <- lapply(out.xtabs, CrossTable)
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 40
| differ
surg | 1 | 2 | 3 | Row Total |
-------------|-----------|-----------|-----------|-----------|
0 | 10 | 20 | 2 | 32 |
| 0.500 | 0.033 | 1.633 | |
| 0.312 | 0.625 | 0.062 | 0.800 |
| 1.000 | 0.833 | 0.333 | |
| 0.250 | 0.500 | 0.050 | |
-------------|-----------|-----------|-----------|-----------|
1 | 0 | 4 | 4 | 8 |
| 2.000 | 0.133 | 6.533 | |
| 0.000 | 0.500 | 0.500 | 0.200 |
| 0.000 | 0.167 | 0.667 | |
| 0.000 | 0.100 | 0.100 | |
-------------|-----------|-----------|-----------|-----------|
Column Total | 10 | 24 | 6 | 40 |
| 0.250 | 0.600 | 0.150 | |
-------------|-----------|-----------|-----------|-----------|
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 204
| differ
surg | 1 | 2 | 3 | Row Total |
-------------|-----------|-----------|-----------|-----------|
0 | 16 | 116 | 18 | 150 |
| 0.154 | 0.015 | 0.007 | |
| 0.107 | 0.773 | 0.120 | 0.735 |
| 0.667 | 0.744 | 0.750 | |
| 0.078 | 0.569 | 0.088 | |
-------------|-----------|-----------|-----------|-----------|
1 | 8 | 40 | 6 | 54 |
| 0.427 | 0.041 | 0.020 | |
| 0.148 | 0.741 | 0.111 | 0.265 |
| 0.333 | 0.256 | 0.250 | |
| 0.039 | 0.196 | 0.029 | |
-------------|-----------|-----------|-----------|-----------|
Column Total | 24 | 156 | 24 | 204 |
| 0.118 | 0.765 | 0.118 | |
-------------|-----------|-----------|-----------|-----------|
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 1490
| differ
surg | 1 | 2 | 3 | Row Total |
-------------|-----------|-----------|-----------|-----------|
0 | 112 | 786 | 180 | 1078 |
| 0.037 | 0.004 | 0.077 | |
| 0.104 | 0.729 | 0.167 | 0.723 |
| 0.737 | 0.725 | 0.709 | |
| 0.075 | 0.528 | 0.121 | |
-------------|-----------|-----------|-----------|-----------|
1 | 40 | 298 | 74 | 412 |
| 0.098 | 0.010 | 0.202 | |
| 0.097 | 0.723 | 0.180 | 0.277 |
| 0.263 | 0.275 | 0.291 | |
| 0.027 | 0.200 | 0.050 | |
-------------|-----------|-----------|-----------|-----------|
Column Total | 152 | 1084 | 254 | 1490 |
| 0.102 | 0.728 | 0.170 | |
-------------|-----------|-----------|-----------|-----------|
Cell Contents
|-------------------------|
| N |
| Chi-square contribution |
| N / Row Total |
| N / Col Total |
| N / Table Total |
|-------------------------|
Total Observations in Table: 78
| differ
surg | 2 | 3 | Row Total |
-------------|-----------|-----------|-----------|
0 | 52 | 12 | 64 |
| 0.025 | 0.097 | |
| 0.812 | 0.188 | 0.821 |
| 0.839 | 0.750 | |
| 0.667 | 0.154 | |
-------------|-----------|-----------|-----------|
1 | 10 | 4 | 14 |
| 0.114 | 0.443 | |
| 0.714 | 0.286 | 0.179 |
| 0.161 | 0.250 | |
| 0.128 | 0.051 | |
-------------|-----------|-----------|-----------|
Column Total | 62 | 16 | 78 |
| 0.795 | 0.205 | |
-------------|-----------|-----------|-----------|