Creating tables, accessing elements of tables, and accessing elements of other objects

References

Low birthweight dataset explanation: http://www.umass.edu/statdata/statdata/data/lowbwt.txt

Prepare data

## Load dataset directly from the internet
lbw <- read.table("http://www.umass.edu/statdata/statdata/data/lowbwt.dat", head = T, skip = 4)

## Change variable names to lower cases
names(lbw) <- tolower(names(lbw))

## Recoding using within function
lbw <- within(lbw, {

    ## Relabel race: 1, 2, 3 -> White, Black, Other
    race.cat <- factor(race, levels = 1:3, labels = c("White","Black","Other"))

    ## Dichotomize ptl
    preterm  <- factor(ptl >= 1, levels = c(FALSE,TRUE), labels = c("0","1+"))
    ## You can alse use cut
    ## preterm  <- cut(ptl, breaks = c(-Inf, 0, Inf), labels = c("0","1+"))

    ## Change 0,1 binary to No,Yes binary
    smoke    <- factor(smoke, levels  = c(0,1), labels = c("No","Yes"))
    ht       <- factor(ht   , levels  = c(0,1), labels = c("No","Yes"))
    ui       <- factor(ui   , levels  = c(0,1), labels = c("No","Yes"))
    low      <- factor(low  , levels  = c(0,1), labels = c("No","Yes"))

    ## Categorize ftv (frequency of visit): 0   1   2   3   4   6 -> None(0), Normal(1-2), Many (>= 3)
    ftv.cat  <- cut(ftv, breaks = c(-Inf, 0, 2, Inf), labels = c("None","Normal","Many"))

    ## Make Normal the reference level
    ftv.cat  <- relevel(ftv.cat, ref = "Normal")
})

Exaplanation

These functions take data, split it by specified variables, and apply a specified summary function.

Tables

## One categorical variable
table(lbw$smoke)


 No Yes 
115  74


## Two categorical variables
table(lbw$smoke, lbw$ftv.cat)


      Normal None Many
  No      54   55    6
  Yes     23   45    6


## Two categorical variables (variable name included)
xtabs( ~ smoke + ftv.cat, data = lbw)

     ftv.cat
smoke Normal None Many
  No      54   55    6
  Yes     23   45    6


## tapply can be used by measuring length of an arbitrary variable
with(lbw, tapply(age, list(smoke, ftv.cat), FUN = length))

    Normal None Many
No      54   55    6
Yes     23   45    6


## Two categorical variables (variable name included)
tab.3d <- xtabs( ~ smoke + ftv.cat + low, data = lbw)

## Flat table
ftable(tab.3d)

              low No Yes
smoke ftv.cat           
No    Normal      43  11
      None        39  16
      Many         4   2
Yes   Normal      16   7
      None        25  20
      Many         3   3

Indexing to select out elements

Objects that are really single vectors (vectors, matrices, and arrays) can be indexed the following ways.

## Original
tab.3d

, , low = No

     ftv.cat
smoke Normal None Many
  No      43   39    4
  Yes     16   25    3

, , low = Yes

     ftv.cat
smoke Normal None Many
  No      11   16    2
  Yes      7   20    3

## No smokers only
tab.3d["No",,]

        low
ftv.cat  No Yes
  Normal 43  11
  None   39  16
  Many    4   2

## Smokers only
tab.3d[2,,]

        low
ftv.cat  No Yes
  Normal 16   7
  None   25  20
  Many    3   3

## Smokers who had low birth weight baby
tab.3d["Yes",, "Yes"]

Normal   None   Many 
     7     20      3

Getting elements from multi-vector objects

Objects that are multiple vectors (data frames and lists) can also be addressed by element names. Elements can be examined by str().

## Examine lbw
str(lbw)

'data.frame':   189 obs. of  14 variables:
 $ id      : int  85 86 87 88 89 91 92 93 94 95 ...
 $ low     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ age     : int  19 33 20 21 18 21 22 17 29 26 ...
 $ lwt     : int  182 155 105 108 107 124 118 103 123 113 ...
 $ race    : int  2 3 1 1 1 3 1 3 1 1 ...
 $ smoke   : Factor w/ 2 levels "No","Yes": 1 1 2 2 2 1 1 1 2 2 ...
 $ ptl     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ ht      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ ui      : Factor w/ 2 levels "No","Yes": 2 1 1 2 2 1 1 1 1 1 ...
 $ ftv     : int  0 3 1 2 0 0 1 1 1 0 ...
 $ bwt     : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
 $ ftv.cat : Factor w/ 3 levels "Normal","None",..: 2 3 1 1 2 2 1 1 1 2 ...
 $ preterm : Factor w/ 2 levels "0","1+": 1 1 1 1 1 1 1 1 1 1 ...
 $ race.cat: Factor w/ 3 levels "White","Black",..: 2 3 1 1 1 3 1 3 1 1 ...

## Extract id element
lbw$id

  [1]  85  86  87  88  89  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 111 112 113
 [28] 114 115 116 117 118 119 120 121 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
 [55] 142 143 144 145 146 147 148 149 150 151 154 155 156 159 160 161 162 163 164 166 167 168 169 170 172 173 174
 [82] 175 176 177 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 195 196 197 199 200 201 202 203 204
[109] 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226   4  10  11  13  15
[136]  16  17  18  19  20  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  40  42  43  44  45  46
[163]  47  49  50  51  52  54  56  57  59  60  61  62  63  65  67  68  69  71  75  76  77  78  79  81  82  83  84

## Model objects can work in similar ways (linear regression)
lm.full <- lm(bwt ~ age + smoke + ht, data = lbw)
## str(lm.full)
## Coefficients element in the model object
lm.full$coefficients

(Intercept)         age    smokeYes       htYes 
    2824.67       10.93     -273.62     -424.47

## Examine summary objects
lm.summary <- summary(lm.full)
str(lm.summary)

List of 11
 $ call         : language lm(formula = bwt ~ age + smoke + ht, data = lbw)
 $ terms        :Classes 'terms', 'formula' length 3 bwt ~ age + smoke + ht
  .. ..- attr(*, "variables")= language list(bwt, age, smoke, ht)
  .. ..- attr(*, "factors")= int [1:4, 1:3] 0 1 0 0 0 0 1 0 0 0 ...
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:4] "bwt" "age" "smoke" "ht"
  .. .. .. ..$ : chr [1:3] "age" "smoke" "ht"
  .. ..- attr(*, "term.labels")= chr [1:3] "age" "smoke" "ht"
  .. ..- attr(*, "order")= int [1:3] 1 1 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. ..- attr(*, "predvars")= language list(bwt, age, smoke, ht)
  .. ..- attr(*, "dataClasses")= Named chr [1:4] "numeric" "numeric" "factor" "factor"
  .. .. ..- attr(*, "names")= chr [1:4] "bwt" "age" "smoke" "ht"
 $ residuals    : Named num [1:189] -509 -634 -213 -187 -148 ...
  ..- attr(*, "names")= chr [1:189] "1" "2" "3" "4" ...
 $ coefficients : num [1:4, 1:4] 2824.7 10.9 -273.6 -424.5 239.6 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:4] "(Intercept)" "age" "smokeYes" "htYes"
  .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"
 $ aliased      : Named logi [1:4] FALSE FALSE FALSE FALSE
  ..- attr(*, "names")= chr [1:4] "(Intercept)" "age" "smokeYes" "htYes"
 $ sigma        : num 712
 $ df           : int [1:3] 4 185 4
 $ r.squared    : num 0.0627
 $ adj.r.squared: num 0.0475
 $ fstatistic   : Named num [1:3] 4.12 3 185
  ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"
 $ cov.unscaled : num [1:4, 1:4] 0.1134 -0.00445 -0.01079 -0.00689 -0.00445 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:4] "(Intercept)" "age" "smokeYes" "htYes"
  .. ..$ : chr [1:4] "(Intercept)" "age" "smokeYes" "htYes"
 - attr(*, "class")= chr "summary.lm"

## Extract coefficients element in the summary object
lm.summary$coefficients

            Estimate Std. Error t value  Pr(>|t|)
(Intercept)  2824.67    239.603  11.789 2.748e-24
age            10.93      9.804   1.115 2.662e-01
smokeYes     -273.62    106.147  -2.578 1.072e-02
htYes        -424.47    212.287  -1.999 4.702e-02


## Generalized linear model: binomial family, logit link
logit.full <- glm(low ~ age + smoke + ht, data = lbw, family = binomial(link = "logit"))
logit.summary <- summary(logit.full)
logit.summary$coefficients

            Estimate Std. Error  z value Pr(>|z|)
(Intercept) -0.01716    0.76890 -0.02232  0.98220
age         -0.05043    0.03249 -1.55219  0.12062
smokeYes     0.70089    0.32584  2.15104  0.03147
htYes        1.23394    0.62095  1.98718  0.04690

print.default() can be used to show everything inside the object without formatting

## Show contents of a data frame without formatting
print.default(lbw[,c("age","bwt","low")])

$age
  [1] 19 33 20 21 18 21 22 17 29 26 19 19 22 30 18 18 15 25 20 28 32 31 36 28 25 28 17 29 26 17 17 24 35 25 25 29 19
 [38] 27 31 33 21 19 23 21 18 18 32 19 24 22 22 23 22 30 19 16 21 30 20 17 17 23 24 28 26 20 24 28 20 22 22 31 23 16
 [75] 16 18 25 32 20 23 22 32 30 20 23 17 19 23 36 22 24 21 19 25 16 29 29 19 19 30 24 19 24 23 20 25 30 22 18 16 32
[112] 18 29 33 20 28 14 28 25 16 20 26 21 22 25 31 35 19 24 45 28 29 34 25 25 27 23 24 24 21 32 19 25 16 25 20 21 24
[149] 21 20 25 19 19 26 24 17 20 22 27 20 17 25 20 18 18 20 21 26 31 15 23 20 24 15 23 30 22 17 23 17 26 20 26 14 28
[186] 14 23 17 21

$bwt
  [1] 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 2722 2733 2750 2750 2769 2769 2778 2782 2807 2821 2835 2835
 [23] 2836 2863 2877 2877 2906 2920 2920 2920 2920 2948 2948 2977 2977 2977 2977 2992 3005 3033 3042 3062 3062 3062
 [45] 3076 3076 3080 3090 3090 3090 3100 3104 3132 3147 3175 3175 3203 3203 3203 3225 3225 3232 3232 3234 3260 3274
 [67] 3274 3303 3317 3317 3317 3321 3331 3374 3374 3402 3416 3430 3444 3459 3460 3473 3475 3487 3544 3572 3572 3586
 [89] 3600 3614 3614 3629 3629 3637 3643 3651 3651 3651 3651 3699 3728 3756 3770 3770 3770 3790 3799 3827 3856 3860
[111] 3860 3884 3884 3912 3940 3941 3941 3969 3983 3997 3997 4054 4054 4111 4153 4167 4174 4238 4593 4990  709 1021
[133] 1135 1330 1474 1588 1588 1701 1729 1790 1818 1885 1893 1899 1928 1928 1928 1936 1970 2055 2055 2082 2084 2084
[155] 2100 2125 2126 2187 2187 2211 2225 2240 2240 2282 2296 2296 2301 2325 2353 2353 2367 2381 2381 2381 2395 2410
[177] 2410 2414 2424 2438 2442 2450 2466 2466 2466 2495 2495 2495 2495

$low
  [1] No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No 
 [28] No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No 
 [55] No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No 
 [82] No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No 
[109] No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  No  Yes Yes Yes Yes Yes
[136] Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes
[163] Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes Yes
Levels: No Yes

attr(,"class")
[1] "data.frame"

## Show contents of a summary object without formatting
print.default(lm.summary)

$call
lm(formula = bwt ~ age + smoke + ht, data = lbw)

$terms
bwt ~ age + smoke + ht
attr(,"variables")
list(bwt, age, smoke, ht)
attr(,"factors")
      age smoke ht
bwt     0     0  0
age     1     0  0
smoke   0     1  0
ht      0     0  1
attr(,"term.labels")
[1] "age"   "smoke" "ht"   
attr(,"order")
[1] 1 1 1
attr(,"intercept")
[1] 1
attr(,"response")
[1] 1
attr(,".Environment")
<environment: R_GlobalEnv>
attr(,"predvars")
list(bwt, age, smoke, ht)
attr(,"dataClasses")
      bwt       age     smoke        ht 
"numeric" "numeric"  "factor"  "factor" 

$residuals
        1         2         3         4         5         6         7         8         9        10        11 
 -509.401  -634.469  -212.714  -186.647  -147.847  -432.268  -428.201  -373.534  -205.115  -170.315  -310.401 
       12        13        14        15        16        17        18        19        20        21        22 
 -299.401   109.264  -402.669    21.153    21.153  -210.667   -42.381  -236.335   -36.182  -339.536  -328.602 
       23        24        25        26        27        28        29        30        31        32        33 
 -382.270  -267.802  -221.002  -253.802   169.086  -221.736    84.685   -90.534   -90.534   134.552    14.284 
       34        35        36        37        38        39        40        41        42        43        44 
 -121.002  -121.002   108.885   218.219   145.752   115.018   121.151   261.353    29.599   -14.135     7.732 
       45        46        47        48        49        50        51        52        53        54        55 
  328.153   328.153   -94.536    57.599     2.932   298.419   459.264    27.865   340.419   267.952   142.599 
       56        57        58        59        60        61        62        63        64        65        66 
  175.399   422.353    50.331   159.665   214.466   214.466   155.865   144.932   103.198   424.685   230.665 
       67        68        69        70        71        72        73        74        75        76        77 
  186.932   445.818   273.665   251.799   525.419   431.018   528.486   374.399   648.020   380.532   317.998 
       78        79        80        81        82        83        84        85        86        87        88 
  529.085   674.286   382.865   394.799   298.464   322.331   443.665   467.865   835.086   539.599   509.865 
       89        90        91        92        93        94        95        96        97        98        99 
  381.730   548.799   526.932   574.732  1294.685   812.619   917.020   509.264   509.264   892.219   892.219 
      100       101       102       103       104       105       106       107       108       109       110 
  546.331   640.932  1421.685   682.932   693.865   726.665  1116.463   646.331   761.799  1108.153   860.399 
      111       112       113       114       115       116       117       118       119       120       121 
  685.464   862.532  1015.885   726.531  1170.286   810.198   963.266   838.198   884.998   997.399   953.665 
      122       123       124       125       126       127       128       129       130       131       132 
  945.065   999.732  1045.799  1054.998  1003.398   966.664  1479.219  1505.932  1673.329 -2148.182 -2120.736 
      133       134       135       136       137       138       139       140       141       142       143 
-1363.317 -1343.537 -1624.002 -1531.869 -1488.135 -1386.068  -933.603  -566.182 -1082.915  -873.781 -1205.002 
      144       145       146       147       148       149       150       151       152       153       154 
-1100.601  -896.381  -841.714 -1126.268  -877.448 -1084.268  -988.335 -1043.002  -950.401  -674.781  -751.315 
      155       156       157       158       159       160       161       162       163       164       165 
 -987.068  -611.914  -643.714  -604.581  -932.869  -558.714  -511.914  -858.002  -803.335  -739.468  -451.847 
      166       167       168       169       170       171       172       173       174       175       176 
 -473.714  -753.268  -783.935  -536.982  -635.667  -435.514  -388.714  -432.448  -607.667  -681.135  -469.048 
      177       178       179       180       181       182       183       184       185       186       187 
 -381.581  -322.914  -378.514  -572.534  -242.470  -593.335  -369.315  -238.113  -391.182  -482.734  -307.514 
      188       189 
  -91.069   138.818 

$coefficients
            Estimate Std. Error t value  Pr(>|t|)
(Intercept)  2824.67    239.603  11.789 2.748e-24
age            10.93      9.804   1.115 2.662e-01
smokeYes     -273.62    106.147  -2.578 1.072e-02
htYes        -424.47    212.287  -1.999 4.702e-02

$aliased
(Intercept)         age    smokeYes       htYes 
      FALSE       FALSE       FALSE       FALSE 

$sigma
[1] 711.5

$df
[1]   4 185   4

$r.squared
[1] 0.06267

$adj.r.squared
[1] 0.04747

$fstatistic
  value   numdf   dendf 
  4.123   3.000 185.000 

$cov.unscaled
            (Intercept)         age    smokeYes       htYes
(Intercept)    0.113403 -0.00445180 -0.01078701 -0.00688818
age           -0.004452  0.00018987  0.00009075  0.00006273
smokeYes      -0.010787  0.00009075  0.02225653 -0.00056613
htYes         -0.006888  0.00006273 -0.00056613  0.08901977

attr(,"class")
[1] "summary.lm"