library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Recoding Health, Sex, and BMI Variables
library(ggplot2)
SD4_NHIS_Data <- read_csv("Downloads/SD4 NHIS Data.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## health = col_double(),
## sex = col_double(),
## bmi = col_double()
## )
attach(SD4_NHIS_Data)
head(SD4_NHIS_Data)
## # A tibble: 6 x 3
## health sex bmi
## <dbl> <dbl> <dbl>
## 1 3 1 33.4
## 2 1 2 20.2
## 3 3 1 27.3
## 4 3 2 38.6
## 5 1 2 40.0
## 6 2 2 18.8
HealthSexBMI<-SD4_NHIS_Data%>%
mutate(Health=ifelse(health==1, "Excellent",
ifelse(health==2,"Very Good",
ifelse(health==3, "Good",
ifelse(health==4, "Fair",
ifelse(health==5, "Poor",NA))))),
Sex=ifelse(sex==1, "Male",
ifelse(sex==2, "Female", NA)),
BMI=ifelse(bmi==0, NA,bmi),
BMI=ifelse(bmi>=9999, NA,bmi))%>%
filter(!is.na(Health),!is.na(Sex),!is.na(BMI))
HealthSexBMI
## # A tibble: 31,887 x 6
## health sex bmi Health Sex BMI
## <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 3 1 33.4 Good Male 33.4
## 2 1 2 20.2 Excellent Female 20.2
## 3 3 1 27.3 Good Male 27.3
## 4 3 2 38.6 Good Female 38.6
## 5 1 2 40.0 Excellent Female 40.0
## 6 2 2 18.8 Very Good Female 18.8
## 7 2 2 19.7 Very Good Female 19.7
## 8 3 2 26.2 Good Female 26.2
## 9 2 2 20.4 Very Good Female 20.4
## 10 1 2 23.0 Excellent Female 23.0
## # … with 31,877 more rows
Data Summaries
% of people in each Health category
table(HealthSexBMI$Health)%>%
prop.table()%>%
round(2)
##
## Excellent Fair Good Poor Very Good
## 0.25 0.11 0.27 0.03 0.34
% of people of each Sex
table(HealthSexBMI$Sex)%>%
prop.table()%>%
round(2)
##
## Female Male
## 0.54 0.46
Mean BMI
HealthSexBMI%>%
summarize(meanBMI=mean(BMI,na.rm=TRUE))
## # A tibble: 1 x 1
## meanBMI
## <dbl>
## 1 28.0