Loading the necessary packages. Importing data into R and named it Data.
library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data = read_csv("/Users/sakif/Downloads/SD4 NHIS Data.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## health = col_double(),
## sex = col_double(),
## bmi = col_double()
## )
Data
## # A tibble: 33,028 x 3
## health sex bmi
## <dbl> <dbl> <dbl>
## 1 3 1 33.4
## 2 1 2 20.2
## 3 3 1 27.3
## 4 3 2 38.6
## 5 1 2 40.0
## 6 2 2 18.8
## 7 2 2 19.7
## 8 3 2 26.2
## 9 2 2 20.4
## 10 1 2 23.0
## # … with 33,018 more rows
Recoding_Data = Data %>%
mutate(Health = ifelse(health == 1, "Excellent",
ifelse(health == 2, "Very Good",
ifelse(health == 3, "Good",
ifelse(health == 4, "Fair",
ifelse(health == 5, "Poor", NA))))),
Gender = ifelse(sex == 1, "Male",
ifelse(sex == 2, "Female", NA)),
BMI = ifelse(bmi == 0, NA,
ifelse(bmi >= 9999, NA, bmi))) %>%
select(Health, Gender, BMI)
Recoding_Data
## # A tibble: 33,028 x 3
## Health Gender BMI
## <chr> <chr> <dbl>
## 1 Good Male 33.4
## 2 Excellent Female 20.2
## 3 Good Male 27.3
## 4 Good Female 38.6
## 5 Excellent Female 40.0
## 6 Very Good Female 18.8
## 7 Very Good Female 19.7
## 8 Good Female 26.2
## 9 Very Good Female 20.4
## 10 Excellent Female 23.0
## # … with 33,018 more rows
Extra = Recoding_Data %>%
mutate(BMI_Category = ifelse(BMI < 19, "Underweight",
ifelse((BMI >= 19) & (BMI < 25), "Normal",
ifelse((BMI >= 25) & (BMI < 30), "Overweight",
ifelse((BMI >= 30) & (BMI < 40), "Obese",
ifelse(BMI >= 40, "Extremely Obese", NA))))))
Extra
## # A tibble: 33,028 x 4
## Health Gender BMI BMI_Category
## <chr> <chr> <dbl> <chr>
## 1 Good Male 33.4 Obese
## 2 Excellent Female 20.2 Normal
## 3 Good Male 27.3 Overweight
## 4 Good Female 38.6 Obese
## 5 Excellent Female 40.0 Obese
## 6 Very Good Female 18.8 Underweight
## 7 Very Good Female 19.7 Normal
## 8 Good Female 26.2 Overweight
## 9 Very Good Female 20.4 Normal
## 10 Excellent Female 23.0 Normal
## # … with 33,018 more rows
Health_Data = Recoding_Data %>%
filter(!is.na(Health))
table(Health_Data$Health) %>%
prop.table() %>%
round(2)
##
## Excellent Fair Good Poor Very Good
## 0.25 0.11 0.27 0.03 0.34
Sex_Data = Recoding_Data %>%
filter(!is.na(Gender))
table(Sex_Data$Gender) %>%
prop.table() %>%
round(2)
##
## Female Male
## 0.55 0.45
Recoding_Data %>%
filter(!is.na(BMI)) %>%
summarise(Avg_BMI = mean(BMI))
## # A tibble: 1 x 1
## Avg_BMI
## <dbl>
## 1 28.0
BMI_Category_Data = Extra %>%
filter(!is.na(BMI_Category))
table(BMI_Category_Data$BMI_Category) %>%
prop.table() %>%
round(2)
##
## Extremely Obese Normal Obese Overweight Underweight
## 0.05 0.32 0.25 0.35 0.03