library(plyr) # Important that this one come first.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'tibble' was built under R version 3.3.3
## Conflicts with tidy packages ----------------------------------------------
## arrange(): dplyr, plyr
## compact(): purrr, plyr
## count(): dplyr, plyr
## failwith(): dplyr, plyr
## filter(): dplyr, stats
## id(): dplyr, plyr
## lag(): dplyr, stats
## mutate(): dplyr, plyr
## rename(): dplyr, plyr
## summarise(): dplyr, plyr
## summarize(): dplyr, plyr
library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.3.3
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following object is masked from 'package:datasets':
##
## cars
This week we learned how to find the probablity of independence for a categorical variable. I am goin to look at the cars dataset to see if there is independence between city mpg and the vehicle type
str(cars)
## 'data.frame': 54 obs. of 6 variables:
## $ type : Factor w/ 3 levels "large","midsize",..: 3 2 2 2 2 1 1 2 1 2 ...
## $ price : num 15.9 33.9 37.7 30 15.7 20.8 23.7 26.3 34.7 40.1 ...
## $ mpgCity : int 25 18 19 22 22 19 16 19 16 16 ...
## $ driveTrain: Factor w/ 3 levels "4WD","front",..: 2 2 2 3 2 2 3 2 2 2 ...
## $ passengers: int 5 5 6 4 6 6 6 5 6 5 ...
## $ weight : int 2705 3560 3405 3640 2880 3470 4105 3495 3620 3935 ...
table(cars$mpgCity, cars$type)
##
## large midsize small
## 16 2 1 0
## 17 1 2 0
## 18 2 4 0
## 19 3 5 0
## 20 3 2 0
## 21 0 4 0
## 22 0 3 1
## 23 0 1 2
## 25 0 0 3
## 28 0 0 2
## 29 0 0 6
## 31 0 0 2
## 32 0 0 1
## 33 0 0 1
## 39 0 0 1
## 42 0 0 1
## 46 0 0 1
CrossTable(cars$mpgCity, cars$type,
chisq = TRUE)
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation
## may be incorrect
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 54
##
##
## | cars$type
## cars$mpgCity | large | midsize | small | Row Total |
## -------------|-----------|-----------|-----------|-----------|
## 16 | 2 | 1 | 0 | 3 |
## | 3.157 | 0.040 | 1.167 | |
## | 0.667 | 0.333 | 0.000 | 0.056 |
## | 0.182 | 0.045 | 0.000 | |
## | 0.037 | 0.019 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 17 | 1 | 2 | 0 | 3 |
## | 0.247 | 0.495 | 1.167 | |
## | 0.333 | 0.667 | 0.000 | 0.056 |
## | 0.091 | 0.091 | 0.000 | |
## | 0.019 | 0.037 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 18 | 2 | 4 | 0 | 6 |
## | 0.495 | 0.990 | 2.333 | |
## | 0.333 | 0.667 | 0.000 | 0.111 |
## | 0.182 | 0.182 | 0.000 | |
## | 0.037 | 0.074 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 19 | 3 | 5 | 0 | 8 |
## | 1.152 | 0.930 | 3.111 | |
## | 0.375 | 0.625 | 0.000 | 0.148 |
## | 0.273 | 0.227 | 0.000 | |
## | 0.056 | 0.093 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 20 | 3 | 2 | 0 | 5 |
## | 3.855 | 0.001 | 1.944 | |
## | 0.600 | 0.400 | 0.000 | 0.093 |
## | 0.273 | 0.091 | 0.000 | |
## | 0.056 | 0.037 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 21 | 0 | 4 | 0 | 4 |
## | 0.815 | 3.448 | 1.556 | |
## | 0.000 | 1.000 | 0.000 | 0.074 |
## | 0.000 | 0.182 | 0.000 | |
## | 0.000 | 0.074 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|
## 22 | 0 | 3 | 1 | 4 |
## | 0.815 | 1.152 | 0.198 | |
## | 0.000 | 0.750 | 0.250 | 0.074 |
## | 0.000 | 0.136 | 0.048 | |
## | 0.000 | 0.056 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 23 | 0 | 1 | 2 | 3 |
## | 0.611 | 0.040 | 0.595 | |
## | 0.000 | 0.333 | 0.667 | 0.056 |
## | 0.000 | 0.045 | 0.095 | |
## | 0.000 | 0.019 | 0.037 | |
## -------------|-----------|-----------|-----------|-----------|
## 25 | 0 | 0 | 3 | 3 |
## | 0.611 | 1.222 | 2.881 | |
## | 0.000 | 0.000 | 1.000 | 0.056 |
## | 0.000 | 0.000 | 0.143 | |
## | 0.000 | 0.000 | 0.056 | |
## -------------|-----------|-----------|-----------|-----------|
## 28 | 0 | 0 | 2 | 2 |
## | 0.407 | 0.815 | 1.921 | |
## | 0.000 | 0.000 | 1.000 | 0.037 |
## | 0.000 | 0.000 | 0.095 | |
## | 0.000 | 0.000 | 0.037 | |
## -------------|-----------|-----------|-----------|-----------|
## 29 | 0 | 0 | 6 | 6 |
## | 1.222 | 2.444 | 5.762 | |
## | 0.000 | 0.000 | 1.000 | 0.111 |
## | 0.000 | 0.000 | 0.286 | |
## | 0.000 | 0.000 | 0.111 | |
## -------------|-----------|-----------|-----------|-----------|
## 31 | 0 | 0 | 2 | 2 |
## | 0.407 | 0.815 | 1.921 | |
## | 0.000 | 0.000 | 1.000 | 0.037 |
## | 0.000 | 0.000 | 0.095 | |
## | 0.000 | 0.000 | 0.037 | |
## -------------|-----------|-----------|-----------|-----------|
## 32 | 0 | 0 | 1 | 1 |
## | 0.204 | 0.407 | 0.960 | |
## | 0.000 | 0.000 | 1.000 | 0.019 |
## | 0.000 | 0.000 | 0.048 | |
## | 0.000 | 0.000 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 33 | 0 | 0 | 1 | 1 |
## | 0.204 | 0.407 | 0.960 | |
## | 0.000 | 0.000 | 1.000 | 0.019 |
## | 0.000 | 0.000 | 0.048 | |
## | 0.000 | 0.000 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 39 | 0 | 0 | 1 | 1 |
## | 0.204 | 0.407 | 0.960 | |
## | 0.000 | 0.000 | 1.000 | 0.019 |
## | 0.000 | 0.000 | 0.048 | |
## | 0.000 | 0.000 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 42 | 0 | 0 | 1 | 1 |
## | 0.204 | 0.407 | 0.960 | |
## | 0.000 | 0.000 | 1.000 | 0.019 |
## | 0.000 | 0.000 | 0.048 | |
## | 0.000 | 0.000 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## 46 | 0 | 0 | 1 | 1 |
## | 0.204 | 0.407 | 0.960 | |
## | 0.000 | 0.000 | 1.000 | 0.019 |
## | 0.000 | 0.000 | 0.048 | |
## | 0.000 | 0.000 | 0.019 | |
## -------------|-----------|-----------|-----------|-----------|
## Column Total | 11 | 22 | 21 | 54 |
## | 0.204 | 0.407 | 0.389 | |
## -------------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 58.60032 d.f. = 32 p = 0.002806551
##
##
##
As expected, the p-value shown in the chart lets us know that city mpg and the type ofvehicle are not independent variables.
mosaic(~ type + mpgCity, data = cars,shade = TRUE)
Based on the graph, we can expect to see more small vehicles around 20-23 mpg city to be under indpendence.