load("~/Dropbox/RProjects/Module 8/cdc.Rdata")
library(tidyverse)
## ── Attaching packages ──────────────────
## ✔ ggplot2 2.2.1.9000     ✔ purrr   0.2.4     
## ✔ tibble  1.4.2          ✔ dplyr   0.7.4     
## ✔ tidyr   0.8.0          ✔ stringr 1.3.0     
## ✔ readr   1.1.1          ✔ forcats 0.3.0
## ── Conflicts ── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(gmodels)
glimpse(cdc)
## Observations: 20,000
## Variables: 9
## $ genhlth  <fct> good, good, good, good, very good, very good, very go...
## $ exerany  <dbl> 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0,...
## $ hlthplan <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,...
## $ smoke100 <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,...
## $ height   <dbl> 70, 64, 60, 66, 61, 64, 71, 67, 65, 70, 69, 69, 66, 7...
## $ weight   <int> 175, 125, 105, 132, 150, 114, 194, 170, 150, 180, 186...
## $ wtdesire <int> 175, 115, 105, 124, 130, 114, 185, 160, 130, 170, 175...
## $ age      <int> 77, 33, 49, 42, 55, 55, 31, 45, 27, 44, 46, 62, 21, 6...
## $ gender   <fct> m, f, f, f, f, f, m, m, f, m, m, m, m, m, m, m, m, m,...
ext = table(cdc$gender,cdc$exerany)
mosaicplot(ext)

CrossTable(cdc$gender,cdc$exerany,chisq = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  20000 
## 
##  
##              | cdc$exerany 
##   cdc$gender |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##            m |      2149 |      7420 |      9569 | 
##              |    33.238 |    11.335 |           | 
##              |     0.225 |     0.775 |     0.478 | 
##              |     0.423 |     0.498 |           | 
##              |     0.107 |     0.371 |           | 
## -------------|-----------|-----------|-----------|
##            f |      2937 |      7494 |     10431 | 
##              |    30.491 |    10.398 |           | 
##              |     0.282 |     0.718 |     0.522 | 
##              |     0.577 |     0.502 |           | 
##              |     0.147 |     0.375 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |      5086 |     14914 |     20000 | 
##              |     0.254 |     0.746 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  85.4626     d.f. =  1     p =  2.361258e-20 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  85.16236     d.f. =  1     p =  2.748445e-20 
## 
## 
cdc$wtdiffp = (cdc$weight-cdc$wtdesire)/cdc$weight 
cdc %>% filter(wtdiffp>-.5) %>% 
  ggplot(aes(x=gender,y=wtdiffp)) + geom_boxplot()