library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(readr)
NHIS <- read_csv("Downloads/NHIS.csv")
## Parsed with column specification:
## cols(
## health = col_double(),
## sex = col_double(),
## age = col_double(),
## pooryn = col_double()
## )
View(NHIS)
head(NHIS)
## # A tibble: 6 x 4
## health sex age pooryn
## <dbl> <dbl> <dbl> <dbl>
## 1 1 2 33 2
## 2 2 2 52 1
## 3 1 1 41 1
## 4 2 1 67 1
## 5 3 1 25 2
## 6 5 2 61 1
new_NHIS <- NHIS %>%
mutate(health = factor(health, levels=c("Excellent", "Very Good","Good", "Fair", "Poor")))
How do those above poverty & those below poverty distribute across categories of health?
new_NHIS <- new_NHIS %>%
mutate(health = ifelse(health==1,"Excellent",
ifelse(health==2,"Very Good",
ifelse(health==3,"Good",
ifelse(health==4,"Fair",
ifelse(health==5,"Poor",NA))))))
What is the average age of people in each category of health?
new_NHIS %>%
select(health, age) %>%
group_by(health) %>%
summarize(AVERAGE = mean(age))
## # A tibble: 1 x 2
## health AVERAGE
## <lgl> <dbl>
## 1 NA 47.7
Compare the age distribution for those in each category of health