library(plyr) # Important that this one come first.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'tibble' was built under R version 3.3.3
## Conflicts with tidy packages ----------------------------------------------
## arrange(): dplyr, plyr
## compact(): purrr, plyr
## count(): dplyr, plyr
## failwith(): dplyr, plyr
## filter(): dplyr, stats
## id(): dplyr, plyr
## lag(): dplyr, stats
## mutate(): dplyr, plyr
## rename(): dplyr, plyr
## summarise(): dplyr, plyr
## summarize(): dplyr, plyr
library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.3.3
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following object is masked from 'package:datasets':
##
## cars
This week we learned how to find the probablity of independence for a categorical variable. I am goin to look at the cars dataset to see if there is independence between city mpg and a vehicles drivetrain.
str(cars)
## 'data.frame': 54 obs. of 6 variables:
## $ type : Factor w/ 3 levels "large","midsize",..: 3 2 2 2 2 1 1 2 1 2 ...
## $ price : num 15.9 33.9 37.7 30 15.7 20.8 23.7 26.3 34.7 40.1 ...
## $ mpgCity : int 25 18 19 22 22 19 16 19 16 16 ...
## $ driveTrain: Factor w/ 3 levels "4WD","front",..: 2 2 2 3 2 2 3 2 2 2 ...
## $ passengers: int 5 5 6 4 6 6 6 5 6 5 ...
## $ weight : int 2705 3560 3405 3640 2880 3470 4105 3495 3620 3935 ...
table(cars$mpgCity, cars$driveTrain)
##
## 4WD front rear
## 16 0 2 1
## 17 0 1 2
## 18 0 3 3
## 19 0 6 2
## 20 0 5 0
## 21 0 4 0
## 22 0 3 1
## 23 0 3 0
## 25 1 2 0
## 28 0 2 0
## 29 0 6 0
## 31 0 2 0
## 32 0 1 0
## 33 1 0 0
## 39 0 1 0
## 42 0 1 0
## 46 0 1 0
CrossTable(cars$mpgCity, cars$driveTrain,
chisq = TRUE)
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation
## may be incorrect
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 54
##
##
## | cars$driveTrain
## cars$mpgCity | 4WD | front | rear | Row Total |
## -------------|-----------|-----------|-----------|-----------|
## 16 | 0 | 2 | 1 | 3 |
## | 0.111 | 0.063 | 0.500 | |
## | 0.000 | 0.667 | 0.333 | 0.056 |
## | 0.000 | 0.047 | 0.111 | |
## | 0.000 | 0.037 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 17 | 0 | 1 | 2 | 3 |
## | 0.111 | 0.807 | 4.500 | |
## | 0.000 | 0.333 | 0.667 | 0.056 |
## | 0.000 | 0.023 | 0.222 | |
## | 0.000 | 0.019 | 0.037 | |
## -------------|-----------|-----------|-----------|-----------|
## 18 | 0 | 3 | 3 | 6 |
## | 0.222 | 0.661 | 4.000 | |
## | 0.000 | 0.500 | 0.500 | 0.111 |
## | 0.000 | 0.070 | 0.333 | |
## | 0.000 | 0.056 | 0.056 | |
## -------------|-----------|-----------|-----------|-----------|
## 19 | 0 | 6 | 2 | 8 |
## | 0.296 | 0.022 | 0.333 | |
## | 0.000 | 0.750 | 0.250 | 0.148 |
## | 0.000 | 0.140 | 0.222 | |
## | 0.000 | 0.111 | 0.037 | |
## -------------|-----------|-----------|-----------|-----------|
## 20 | 0 | 5 | 0 | 5 |
## | 0.185 | 0.261 | 0.833 | |
## | 0.000 | 1.000 | 0.000 | 0.093 |
## | 0.000 | 0.116 | 0.000 | |
## | 0.000 | 0.093 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 21 | 0 | 4 | 0 | 4 |
## | 0.148 | 0.208 | 0.667 | |
## | 0.000 | 1.000 | 0.000 | 0.074 |
## | 0.000 | 0.093 | 0.000 | |
## | 0.000 | 0.074 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 22 | 0 | 3 | 1 | 4 |
## | 0.148 | 0.011 | 0.167 | |
## | 0.000 | 0.750 | 0.250 | 0.074 |
## | 0.000 | 0.070 | 0.111 | |
## | 0.000 | 0.056 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 23 | 0 | 3 | 0 | 3 |
## | 0.111 | 0.156 | 0.500 | |
## | 0.000 | 1.000 | 0.000 | 0.056 |
## | 0.000 | 0.070 | 0.000 | |
## | 0.000 | 0.056 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 25 | 1 | 2 | 0 | 3 |
## | 7.111 | 0.063 | 0.500 | |
## | 0.333 | 0.667 | 0.000 | 0.056 |
## | 0.500 | 0.047 | 0.000 | |
## | 0.019 | 0.037 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 28 | 0 | 2 | 0 | 2 |
## | 0.074 | 0.104 | 0.333 | |
## | 0.000 | 1.000 | 0.000 | 0.037 |
## | 0.000 | 0.047 | 0.000 | |
## | 0.000 | 0.037 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 29 | 0 | 6 | 0 | 6 |
## | 0.222 | 0.313 | 1.000 | |
## | 0.000 | 1.000 | 0.000 | 0.111 |
## | 0.000 | 0.140 | 0.000 | |
## | 0.000 | 0.111 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 31 | 0 | 2 | 0 | 2 |
## | 0.074 | 0.104 | 0.333 | |
## | 0.000 | 1.000 | 0.000 | 0.037 |
## | 0.000 | 0.047 | 0.000 | |
## | 0.000 | 0.037 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 32 | 0 | 1 | 0 | 1 |
## | 0.037 | 0.052 | 0.167 | |
## | 0.000 | 1.000 | 0.000 | 0.019 |
## | 0.000 | 0.023 | 0.000 | |
## | 0.000 | 0.019 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 33 | 1 | 0 | 0 | 1 |
## | 25.037 | 0.796 | 0.167 | |
## | 1.000 | 0.000 | 0.000 | 0.019 |
## | 0.500 | 0.000 | 0.000 | |
## | 0.019 | 0.000 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 39 | 0 | 1 | 0 | 1 |
## | 0.037 | 0.052 | 0.167 | |
## | 0.000 | 1.000 | 0.000 | 0.019 |
## | 0.000 | 0.023 | 0.000 | |
## | 0.000 | 0.019 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 42 | 0 | 1 | 0 | 1 |
## | 0.037 | 0.052 | 0.167 | |
## | 0.000 | 1.000 | 0.000 | 0.019 |
## | 0.000 | 0.023 | 0.000 | |
## | 0.000 | 0.019 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 46 | 0 | 1 | 0 | 1 |
## | 0.037 | 0.052 | 0.167 | |
## | 0.000 | 1.000 | 0.000 | 0.019 |
## | 0.000 | 0.023 | 0.000 | |
## | 0.000 | 0.019 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## Column Total | 2 | 43 | 9 | 54 |
## | 0.037 | 0.796 | 0.167 | |
## -------------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 52.27907 d.f. = 32 p = 0.01328178
##
##
##
As expected, the p-value shown in the chart lets us know that city mpg and the drivetrain of a vehicle are not independent variables.
mosaic(~ driveTrain + mpgCity, data = cars,shade = TRUE)
Based on the graph, we can expect to see more 4WD cars around 33 mpgCity under indpendence.