load("~/Dropbox/RProjects/Module 8/cdc.Rdata")
library(tidyverse)
## ── Attaching packages ──────────────────
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(gmodels)
glimpse(cdc)
## Observations: 20,000
## Variables: 9
## $ genhlth <fct> good, good, good, good, very good, very good, very go...
## $ exerany <dbl> 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0,...
## $ hlthplan <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,...
## $ smoke100 <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,...
## $ height <dbl> 70, 64, 60, 66, 61, 64, 71, 67, 65, 70, 69, 69, 66, 7...
## $ weight <int> 175, 125, 105, 132, 150, 114, 194, 170, 150, 180, 186...
## $ wtdesire <int> 175, 115, 105, 124, 130, 114, 185, 160, 130, 170, 175...
## $ age <int> 77, 33, 49, 42, 55, 55, 31, 45, 27, 44, 46, 62, 21, 6...
## $ gender <fct> m, f, f, f, f, f, m, m, f, m, m, m, m, m, m, m, m, m,...
ext = table(cdc$gender,cdc$exerany)
mosaicplot(ext)

CrossTable(cdc$gender,cdc$exerany,chisq = TRUE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 20000
##
##
## | cdc$exerany
## cdc$gender | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## m | 2149 | 7420 | 9569 |
## | 33.238 | 11.335 | |
## | 0.225 | 0.775 | 0.478 |
## | 0.423 | 0.498 | |
## | 0.107 | 0.371 | |
## -------------|-----------|-----------|-----------|
## f | 2937 | 7494 | 10431 |
## | 30.491 | 10.398 | |
## | 0.282 | 0.718 | 0.522 |
## | 0.577 | 0.502 | |
## | 0.147 | 0.375 | |
## -------------|-----------|-----------|-----------|
## Column Total | 5086 | 14914 | 20000 |
## | 0.254 | 0.746 | |
## -------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 85.4626 d.f. = 1 p = 2.361258e-20
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 85.16236 d.f. = 1 p = 2.748445e-20
##
##
cdc$wtdiffp = (cdc$weight-cdc$wtdesire)/cdc$weight
cdc %>% filter(wtdiffp>-.5) %>%
ggplot(aes(x=gender,y=wtdiffp)) + geom_boxplot()
