- Let's test and visualize association between race and protest behavior.
- Discuss re-ordering factors and changing level names
- Use mosaic plots to visualize association between factors
Justin Murphy
setwd("~/Dropbox/Data General/GSS") #Set your working directory to whatever folder holds GSS.csv
options(scipen = 999) #Turn off scientific notation
x <- read.csv("GSS.csv") #Read in the dataset
table(x$attrally, x$race) #View simple contingency table
##
## black other white
## have done it in the more distant past 29 13 251
## have done it in the past yr 22 8 153
## have not done it and would never do it 84 37 285
## have not done it but might do it 81 38 466
x$attrally <- factor(x$attrally, levels = c("have not done it and would never do it",
"have not done it but might do it", "have done it in the more distant past",
"have done it in the past yr"))
x$attrally <- factor(x$attrally, labels = c("0", "1", "2", "3"))
table(x$attrally, x$race) #Review simple contingency table
##
## black other white
## 0 84 37 285
## 1 81 38 466
## 2 29 13 251
## 3 22 8 153
tab <- table(x$attrally, x$race)
chisq.test(tab)
##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 29.23, df = 6, p-value = 0.00005505
mosaicplot(tab, color = TRUE, main = "Association Between Race and Protest")