library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(readr)
NHIS <- read_csv("Downloads/NHIS.csv")
## Parsed with column specification:
## cols(
##   health = col_double(),
##   sex = col_double(),
##   age = col_double(),
##   pooryn = col_double()
## )
View(NHIS)
head(NHIS)
## # A tibble: 6 x 4
##   health   sex   age pooryn
##    <dbl> <dbl> <dbl>  <dbl>
## 1      1     2    33      2
## 2      2     2    52      1
## 3      1     1    41      1
## 4      2     1    67      1
## 5      3     1    25      2
## 6      5     2    61      1
new_NHIS <- NHIS %>%
  mutate(health = factor(health, levels=c("Excellent", "Very Good","Good", "Fair", "Poor")))

How do those above poverty & those below poverty distribute across categories of health?

new_NHIS <- new_NHIS %>%
  mutate(health = ifelse(health==1,"Excellent",
                  ifelse(health==2,"Very Good",
                  ifelse(health==3,"Good",
                  ifelse(health==4,"Fair",
                 ifelse(health==5,"Poor",NA)))))) 

What is the average age of people in each category of health?

new_NHIS %>%
  select(health, age) %>%
  group_by(health) %>%
  summarize(AVERAGE = mean(age))
## # A tibble: 1 x 2
##   health AVERAGE
##   <lgl>    <dbl>
## 1 NA        47.7

Compare the age distribution for those in each category of health