This document demonstrates different ways of generating contingency tables and bar charts, using the mtcars dataset.
library(knitr)
library(ggplot2)
library(gridExtra)
knitr::opts_chunk$set(tidy=F,
fig.width=8,
fig.height=8,
fig.align='left',
warning=FALSE,
message=FALSE,
echo=TRUE)
options(width = 120)
data(mtcars)
# gear and cyl are numeric variables in mtcars
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# must make gear and cyl factors for bar charts to work
mtcars$gear = factor(mtcars$gear)
mtcars$cyl = factor(mtcars$cyl)
levels(mtcars$gear)
## [1] "3" "4" "5"
levels(mtcars$cyl)
## [1] "4" "6" "8"
# Create side-by-side barcharts of cylinders by gear
p1 = ggplot(mtcars, aes(x = gear, fill = cyl)) +
geom_bar(position = "dodge")
p2 = ggplot(mtcars, aes(x = cyl, fill = gear)) +
geom_bar(position = "dodge")
grid.arrange(p1, p2, ncol=2)
addmargins(table(mtcars$gear, mtcars$cyl))
##
## 4 6 8 Sum
## 3 1 2 12 15
## 4 8 4 0 12
## 5 2 1 2 5
## Sum 11 7 14 32
# Create stacked barcharts of cylinders by gear
p1 = ggplot(mtcars, aes(x = gear, fill = cyl)) +
geom_bar()
p2 = ggplot(mtcars, aes(x = gear, fill = cyl)) +
geom_bar(position = "fill") +
labs(y = "proportion")
grid.arrange(p1, p2, ncol=2)
addmargins(table(mtcars$gear, mtcars$cyl), 2)
##
## 4 6 8 Sum
## 3 1 2 12 15
## 4 8 4 0 12
## 5 2 1 2 5
prop.table(table(mtcars$gear, mtcars$cyl), margin=1)
##
## 4 6 8
## 3 0.06666667 0.13333333 0.80000000
## 4 0.66666667 0.33333333 0.00000000
## 5 0.40000000 0.20000000 0.40000000
# Create stacked barcharts of gears by cylinders
p1 = ggplot(mtcars, aes(x = cyl, fill = gear)) +
geom_bar()
p2 = ggplot(mtcars, aes(x = cyl, fill = gear)) +
geom_bar(position = "fill") +
labs(y = "proportion")
grid.arrange(p1, p2, ncol=2)
addmargins(table(mtcars$gear, mtcars$cyl), 1)
##
## 4 6 8
## 3 1 2 12
## 4 8 4 0
## 5 2 1 2
## Sum 11 7 14
prop.table(table(mtcars$gear, mtcars$cyl), margin=2)
##
## 4 6 8
## 3 0.09090909 0.28571429 0.85714286
## 4 0.72727273 0.57142857 0.00000000
## 5 0.18181818 0.14285714 0.14285714
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 ggplot2_2.2.1 knitr_1.18
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.14 digest_0.6.13 rprojroot_1.3-1 plyr_1.8.4 grid_3.4.3 gtable_0.2.0
## [7] backports_1.1.2 magrittr_1.5 evaluate_0.10.1 scales_0.5.0 pillar_1.0.1 rlang_0.1.6
## [13] stringi_1.1.6 lazyeval_0.2.1 rmarkdown_1.8 labeling_0.3 tools_3.4.3 stringr_1.2.0
## [19] munsell_0.4.3 yaml_2.1.16 compiler_3.4.3 colorspace_1.3-2 htmltools_0.3.6 tibble_1.4.1