library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("/Users/Nazija/Desktop/SD4 NHIS Data.csv")
head(data)
## health sex bmi
## 1 3 1 33.36
## 2 1 2 20.19
## 3 3 1 27.27
## 4 3 2 38.62
## 5 1 2 39.95
## 6 2 2 18.83
1.
final <-data%>%
mutate(health = ifelse(health == 1, "Excellent",
ifelse(health == 2, "Very Good",
ifelse(health == 3, "Good",
ifelse(health == 4, "Fair",
ifelse(health == 5, "Poor", NA))))),
sex = ifelse(sex == 1, "Male",
ifelse(sex == 2, "Female", NA)),
BMI = ifelse(bmi <= 0, NA,
ifelse(bmi >= 9999, NA, bmi)),
BMI_category = ifelse(BMI < 19, "Underweight",
ifelse(BMI <25, "Normal",
ifelse(BMI < 30, "Overweight",
ifelse(BMI < 40, "Obese",
ifelse(BMI >= 40, "Extremely Obese", NA))))))
2.
health <- final%>%
select(health)%>%
filter(!is.na(health))
table(health)%>%
prop.table()
## health
## Excellent Fair Good Poor Very Good
## 0.25221105 0.10985583 0.26938454 0.03313545 0.33541313
sex<-final%>%
select(sex)%>%
filter(!is.na(sex))
table(sex)%>%
prop.table()
## sex
## Female Male
## 0.5461124 0.4538876
final%>%
summarize(avgBMI = mean(BMI, na.rm = TRUE))
## avgBMI
## 1 27.96422
BMI<-final%>%
select(BMI_category)%>%
filter(!is.na(BMI_category))
table(BMI)%>%
prop.table()
## BMI
## Extremely Obese Normal Obese Overweight Underweight
## 0.04937768 0.32231871 0.25212402 0.34928050 0.02689908