library(plyr) # Important that this one come first.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'tibble' was built under R version 3.3.3
## Conflicts with tidy packages ----------------------------------------------
## arrange():   dplyr, plyr
## compact():   purrr, plyr
## count():     dplyr, plyr
## failwith():  dplyr, plyr
## filter():    dplyr, stats
## id():        dplyr, plyr
## lag():       dplyr, stats
## mutate():    dplyr, plyr
## rename():    dplyr, plyr
## summarise(): dplyr, plyr
## summarize(): dplyr, plyr
library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.3.3
library(openintro)
## Please visit openintro.org for free statistics materials
## 
## Attaching package: 'openintro'
## The following object is masked from 'package:datasets':
## 
##     cars

This week we learned how to find the probablity of independence for a categorical variable. I am goin to look at the cars dataset to see if there is independence between city mpg and the vehicle type

str(cars)
## 'data.frame':    54 obs. of  6 variables:
##  $ type      : Factor w/ 3 levels "large","midsize",..: 3 2 2 2 2 1 1 2 1 2 ...
##  $ price     : num  15.9 33.9 37.7 30 15.7 20.8 23.7 26.3 34.7 40.1 ...
##  $ mpgCity   : int  25 18 19 22 22 19 16 19 16 16 ...
##  $ driveTrain: Factor w/ 3 levels "4WD","front",..: 2 2 2 3 2 2 3 2 2 2 ...
##  $ passengers: int  5 5 6 4 6 6 6 5 6 5 ...
##  $ weight    : int  2705 3560 3405 3640 2880 3470 4105 3495 3620 3935 ...
table(cars$mpgCity, cars$type)
##     
##      large midsize small
##   16     2       1     0
##   17     1       2     0
##   18     2       4     0
##   19     3       5     0
##   20     3       2     0
##   21     0       4     0
##   22     0       3     1
##   23     0       1     2
##   25     0       0     3
##   28     0       0     2
##   29     0       0     6
##   31     0       0     2
##   32     0       0     1
##   33     0       0     1
##   39     0       0     1
##   42     0       0     1
##   46     0       0     1
CrossTable(cars$mpgCity, cars$type,
           chisq = TRUE)
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation
## may be incorrect
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  54 
## 
##  
##              | cars$type 
## cars$mpgCity |     large |   midsize |     small | Row Total | 
## -------------|-----------|-----------|-----------|-----------|
##           16 |         2 |         1 |         0 |         3 | 
##              |     3.157 |     0.040 |     1.167 |           | 
##              |     0.667 |     0.333 |     0.000 |     0.056 | 
##              |     0.182 |     0.045 |     0.000 |           | 
##              |     0.037 |     0.019 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           17 |         1 |         2 |         0 |         3 | 
##              |     0.247 |     0.495 |     1.167 |           | 
##              |     0.333 |     0.667 |     0.000 |     0.056 | 
##              |     0.091 |     0.091 |     0.000 |           | 
##              |     0.019 |     0.037 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           18 |         2 |         4 |         0 |         6 | 
##              |     0.495 |     0.990 |     2.333 |           | 
##              |     0.333 |     0.667 |     0.000 |     0.111 | 
##              |     0.182 |     0.182 |     0.000 |           | 
##              |     0.037 |     0.074 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           19 |         3 |         5 |         0 |         8 | 
##              |     1.152 |     0.930 |     3.111 |           | 
##              |     0.375 |     0.625 |     0.000 |     0.148 | 
##              |     0.273 |     0.227 |     0.000 |           | 
##              |     0.056 |     0.093 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           20 |         3 |         2 |         0 |         5 | 
##              |     3.855 |     0.001 |     1.944 |           | 
##              |     0.600 |     0.400 |     0.000 |     0.093 | 
##              |     0.273 |     0.091 |     0.000 |           | 
##              |     0.056 |     0.037 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           21 |         0 |         4 |         0 |         4 | 
##              |     0.815 |     3.448 |     1.556 |           | 
##              |     0.000 |     1.000 |     0.000 |     0.074 | 
##              |     0.000 |     0.182 |     0.000 |           | 
##              |     0.000 |     0.074 |     0.000 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           22 |         0 |         3 |         1 |         4 | 
##              |     0.815 |     1.152 |     0.198 |           | 
##              |     0.000 |     0.750 |     0.250 |     0.074 | 
##              |     0.000 |     0.136 |     0.048 |           | 
##              |     0.000 |     0.056 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           23 |         0 |         1 |         2 |         3 | 
##              |     0.611 |     0.040 |     0.595 |           | 
##              |     0.000 |     0.333 |     0.667 |     0.056 | 
##              |     0.000 |     0.045 |     0.095 |           | 
##              |     0.000 |     0.019 |     0.037 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           25 |         0 |         0 |         3 |         3 | 
##              |     0.611 |     1.222 |     2.881 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.056 | 
##              |     0.000 |     0.000 |     0.143 |           | 
##              |     0.000 |     0.000 |     0.056 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           28 |         0 |         0 |         2 |         2 | 
##              |     0.407 |     0.815 |     1.921 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.037 | 
##              |     0.000 |     0.000 |     0.095 |           | 
##              |     0.000 |     0.000 |     0.037 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           29 |         0 |         0 |         6 |         6 | 
##              |     1.222 |     2.444 |     5.762 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.111 | 
##              |     0.000 |     0.000 |     0.286 |           | 
##              |     0.000 |     0.000 |     0.111 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           31 |         0 |         0 |         2 |         2 | 
##              |     0.407 |     0.815 |     1.921 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.037 | 
##              |     0.000 |     0.000 |     0.095 |           | 
##              |     0.000 |     0.000 |     0.037 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           32 |         0 |         0 |         1 |         1 | 
##              |     0.204 |     0.407 |     0.960 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.019 | 
##              |     0.000 |     0.000 |     0.048 |           | 
##              |     0.000 |     0.000 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           33 |         0 |         0 |         1 |         1 | 
##              |     0.204 |     0.407 |     0.960 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.019 | 
##              |     0.000 |     0.000 |     0.048 |           | 
##              |     0.000 |     0.000 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           39 |         0 |         0 |         1 |         1 | 
##              |     0.204 |     0.407 |     0.960 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.019 | 
##              |     0.000 |     0.000 |     0.048 |           | 
##              |     0.000 |     0.000 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           42 |         0 |         0 |         1 |         1 | 
##              |     0.204 |     0.407 |     0.960 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.019 | 
##              |     0.000 |     0.000 |     0.048 |           | 
##              |     0.000 |     0.000 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
##           46 |         0 |         0 |         1 |         1 | 
##              |     0.204 |     0.407 |     0.960 |           | 
##              |     0.000 |     0.000 |     1.000 |     0.019 | 
##              |     0.000 |     0.000 |     0.048 |           | 
##              |     0.000 |     0.000 |     0.019 |           | 
## -------------|-----------|-----------|-----------|-----------|
## Column Total |        11 |        22 |        21 |        54 | 
##              |     0.204 |     0.407 |     0.389 |           | 
## -------------|-----------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  58.60032     d.f. =  32     p =  0.002806551 
## 
## 
## 

As expected, the p-value shown in the chart lets us know that city mpg and the type ofvehicle are not independent variables.

mosaic(~ type + mpgCity, data = cars,shade = TRUE)

Based on the graph, we can expect to see more small vehicles around 20-23 mpg city to be under indpendence.