Introduction

This document demonstrates different ways of generating contingency tables and bar charts, using the mtcars dataset.

library(knitr)
library(ggplot2)
library(gridExtra)
knitr::opts_chunk$set(tidy=F, 
               fig.width=8,
               fig.height=8,
               fig.align='left',
               warning=FALSE,
               message=FALSE,
               echo=TRUE)
options(width = 120)
data(mtcars)

Prepare the Data

# gear and cyl are numeric variables in mtcars
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
# must make gear and cyl factors for bar charts to work
mtcars$gear = factor(mtcars$gear)
mtcars$cyl = factor(mtcars$cyl)
levels(mtcars$gear)
## [1] "3" "4" "5"
levels(mtcars$cyl)
## [1] "4" "6" "8"

Raw Counts

# Create side-by-side barcharts of cylinders by gear
p1 = ggplot(mtcars, aes(x = gear, fill = cyl)) + 
  geom_bar(position = "dodge")
p2 = ggplot(mtcars, aes(x = cyl, fill = gear)) + 
  geom_bar(position = "dodge")
grid.arrange(p1, p2, ncol=2)

addmargins(table(mtcars$gear, mtcars$cyl))
##      
##        4  6  8 Sum
##   3    1  2 12  15
##   4    8  4  0  12
##   5    2  1  2   5
##   Sum 11  7 14  32

Cylinders By Gear

# Create stacked barcharts of cylinders by gear
p1 = ggplot(mtcars, aes(x = gear, fill = cyl)) + 
  geom_bar()
p2 = ggplot(mtcars, aes(x = gear, fill = cyl)) + 
  geom_bar(position = "fill") +
  labs(y = "proportion")
grid.arrange(p1, p2, ncol=2)

addmargins(table(mtcars$gear, mtcars$cyl), 2)
##    
##      4  6  8 Sum
##   3  1  2 12  15
##   4  8  4  0  12
##   5  2  1  2   5
prop.table(table(mtcars$gear, mtcars$cyl), margin=1)
##    
##              4          6          8
##   3 0.06666667 0.13333333 0.80000000
##   4 0.66666667 0.33333333 0.00000000
##   5 0.40000000 0.20000000 0.40000000

Gears By Cylinder

# Create stacked barcharts of gears by cylinders
p1 = ggplot(mtcars, aes(x = cyl, fill = gear)) + 
  geom_bar()
p2 = ggplot(mtcars, aes(x = cyl, fill = gear)) + 
  geom_bar(position = "fill") +
  labs(y = "proportion")
grid.arrange(p1, p2, ncol=2)

addmargins(table(mtcars$gear, mtcars$cyl), 1)
##      
##        4  6  8
##   3    1  2 12
##   4    8  4  0
##   5    2  1  2
##   Sum 11  7 14
prop.table(table(mtcars$gear, mtcars$cyl), margin=2)
##    
##              4          6          8
##   3 0.09090909 0.28571429 0.85714286
##   4 0.72727273 0.57142857 0.00000000
##   5 0.18181818 0.14285714 0.14285714

Programming Environment

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.2
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gridExtra_2.3 ggplot2_2.2.1 knitr_1.18   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.14     digest_0.6.13    rprojroot_1.3-1  plyr_1.8.4       grid_3.4.3       gtable_0.2.0    
##  [7] backports_1.1.2  magrittr_1.5     evaluate_0.10.1  scales_0.5.0     pillar_1.0.1     rlang_0.1.6     
## [13] stringi_1.1.6    lazyeval_0.2.1   rmarkdown_1.8    labeling_0.3     tools_3.4.3      stringr_1.2.0   
## [19] munsell_0.4.3    yaml_2.1.16      compiler_3.4.3   colorspace_1.3-2 htmltools_0.3.6  tibble_1.4.1