library(plyr) # Important that this one come first.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'tibble' was built under R version 3.3.3
## Conflicts with tidy packages ----------------------------------------------
## arrange():   dplyr, plyr
## compact():   purrr, plyr
## count():     dplyr, plyr
## failwith():  dplyr, plyr
## filter():    dplyr, stats
## id():        dplyr, plyr
## lag():       dplyr, stats
## mutate():    dplyr, plyr
## rename():    dplyr, plyr
## summarise(): dplyr, plyr
## summarize(): dplyr, plyr
library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.3.3
library(openintro)
## Please visit openintro.org for free statistics materials
## 
## Attaching package: 'openintro'
## The following object is masked from 'package:datasets':
## 
##     cars

This week we learned how to find the probablity of independence for a categorical variable. I am goin to look at the cars dataset to see if there is independence between city mpg and a vehicles drivetrain.

str(cars)
## 'data.frame':    54 obs. of  6 variables:
##  $ type      : Factor w/ 3 levels "large","midsize",..: 3 2 2 2 2 1 1 2 1 2 ...
##  $ price     : num  15.9 33.9 37.7 30 15.7 20.8 23.7 26.3 34.7 40.1 ...
##  $ mpgCity   : int  25 18 19 22 22 19 16 19 16 16 ...
##  $ driveTrain: Factor w/ 3 levels "4WD","front",..: 2 2 2 3 2 2 3 2 2 2 ...
##  $ passengers: int  5 5 6 4 6 6 6 5 6 5 ...
##  $ weight    : int  2705 3560 3405 3640 2880 3470 4105 3495 3620 3935 ...
table(cars$mpgCity, cars$driveTrain)
##     
##      4WD front rear
##   16   0     2    1
##   17   0     1    2
##   18   0     3    3
##   19   0     6    2
##   20   0     5    0
##   21   0     4    0
##   22   0     3    1
##   23   0     3    0
##   25   1     2    0
##   28   0     2    0
##   29   0     6    0
##   31   0     2    0
##   32   0     1    0
##   33   1     0    0
##   39   0     1    0
##   42   0     1    0
##   46   0     1    0
CrossTable(cars$mpgCity, cars$driveTrain,
           chisq = TRUE)
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation
## may be incorrect
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  54 
## 
##  
##              | cars$driveTrain 
## cars$mpgCity |       4WD |     front |      rear | Row Total | 
## -------------|-----------|-----------|-----------|-----------|
##           16 |         0 |         2 |         1 |         3 | 
##              |     0.111 |     0.063 |     0.500 |           | 
##              |     0.000 |     0.667 |     0.333 |     0.056 | 
##              |     0.000 |     0.047 |     0.111 |           | 
##              |     0.000 |     0.037 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           17 |         0 |         1 |         2 |         3 | 
##              |     0.111 |     0.807 |     4.500 |           | 
##              |     0.000 |     0.333 |     0.667 |     0.056 | 
##              |     0.000 |     0.023 |     0.222 |           | 
##              |     0.000 |     0.019 |     0.037 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           18 |         0 |         3 |         3 |         6 | 
##              |     0.222 |     0.661 |     4.000 |           | 
##              |     0.000 |     0.500 |     0.500 |     0.111 | 
##              |     0.000 |     0.070 |     0.333 |           | 
##              |     0.000 |     0.056 |     0.056 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           19 |         0 |         6 |         2 |         8 | 
##              |     0.296 |     0.022 |     0.333 |           | 
##              |     0.000 |     0.750 |     0.250 |     0.148 | 
##              |     0.000 |     0.140 |     0.222 |           | 
##              |     0.000 |     0.111 |     0.037 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           20 |         0 |         5 |         0 |         5 | 
##              |     0.185 |     0.261 |     0.833 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.093 | 
##              |     0.000 |     0.116 |     0.000 |           | 
##              |     0.000 |     0.093 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           21 |         0 |         4 |         0 |         4 | 
##              |     0.148 |     0.208 |     0.667 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.074 | 
##              |     0.000 |     0.093 |     0.000 |           | 
##              |     0.000 |     0.074 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           22 |         0 |         3 |         1 |         4 | 
##              |     0.148 |     0.011 |     0.167 |           | 
##              |     0.000 |     0.750 |     0.250 |     0.074 | 
##              |     0.000 |     0.070 |     0.111 |           | 
##              |     0.000 |     0.056 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           23 |         0 |         3 |         0 |         3 | 
##              |     0.111 |     0.156 |     0.500 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.056 | 
##              |     0.000 |     0.070 |     0.000 |           | 
##              |     0.000 |     0.056 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           25 |         1 |         2 |         0 |         3 | 
##              |     7.111 |     0.063 |     0.500 |           | 
##              |     0.333 |     0.667 |     0.000 |     0.056 | 
##              |     0.500 |     0.047 |     0.000 |           | 
##              |     0.019 |     0.037 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           28 |         0 |         2 |         0 |         2 | 
##              |     0.074 |     0.104 |     0.333 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.037 | 
##              |     0.000 |     0.047 |     0.000 |           | 
##              |     0.000 |     0.037 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           29 |         0 |         6 |         0 |         6 | 
##              |     0.222 |     0.313 |     1.000 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.111 | 
##              |     0.000 |     0.140 |     0.000 |           | 
##              |     0.000 |     0.111 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           31 |         0 |         2 |         0 |         2 | 
##              |     0.074 |     0.104 |     0.333 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.037 | 
##              |     0.000 |     0.047 |     0.000 |           | 
##              |     0.000 |     0.037 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           32 |         0 |         1 |         0 |         1 | 
##              |     0.037 |     0.052 |     0.167 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.019 | 
##              |     0.000 |     0.023 |     0.000 |           | 
##              |     0.000 |     0.019 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           33 |         1 |         0 |         0 |         1 | 
##              |    25.037 |     0.796 |     0.167 |           | 
##              |     1.000 |     0.000 |     0.000 |     0.019 | 
##              |     0.500 |     0.000 |     0.000 |           | 
##              |     0.019 |     0.000 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           39 |         0 |         1 |         0 |         1 | 
##              |     0.037 |     0.052 |     0.167 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.019 | 
##              |     0.000 |     0.023 |     0.000 |           | 
##              |     0.000 |     0.019 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           42 |         0 |         1 |         0 |         1 | 
##              |     0.037 |     0.052 |     0.167 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.019 | 
##              |     0.000 |     0.023 |     0.000 |           | 
##              |     0.000 |     0.019 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           46 |         0 |         1 |         0 |         1 | 
##              |     0.037 |     0.052 |     0.167 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.019 | 
##              |     0.000 |     0.023 |     0.000 |           | 
##              |     0.000 |     0.019 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
## Column Total |         2 |        43 |         9 |        54 | 
##              |     0.037 |     0.796 |     0.167 |           | 
## -------------|-----------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  52.27907     d.f. =  32     p =  0.01328178 
## 
## 
## 

As expected, the p-value shown in the chart lets us know that city mpg and the drivetrain of a vehicle are not independent variables.

mosaic(~ driveTrain + mpgCity, data = cars,shade = TRUE)

Based on the graph, we can expect to see more 4WD cars around 33 mpgCity under indpendence.