Import Data

Loading the necessary packages. Importing data into R and named it Data.

library(readr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Data = read_csv("/Users/sakif/Downloads/SD4 NHIS Data.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   health = col_double(),
##   sex = col_double(),
##   bmi = col_double()
## )
Data
## # A tibble: 33,028 x 3
##    health   sex   bmi
##     <dbl> <dbl> <dbl>
##  1      3     1  33.4
##  2      1     2  20.2
##  3      3     1  27.3
##  4      3     2  38.6
##  5      1     2  40.0
##  6      2     2  18.8
##  7      2     2  19.7
##  8      3     2  26.2
##  9      2     2  20.4
## 10      1     2  23.0
## # … with 33,018 more rows

Recoding Data

Health, Gender, BMI

Recoding_Data = Data %>%
  mutate(Health = ifelse(health == 1, "Excellent",
                  ifelse(health == 2, "Very Good",
                  ifelse(health == 3, "Good",
                  ifelse(health == 4, "Fair",
                  ifelse(health == 5, "Poor", NA))))),
         Gender = ifelse(sex == 1, "Male",
                  ifelse(sex == 2, "Female", NA)), 
         BMI = ifelse(bmi == 0, NA,
               ifelse(bmi >= 9999, NA, bmi))) %>%
  select(Health, Gender, BMI)

Recoding_Data
## # A tibble: 33,028 x 3
##    Health    Gender   BMI
##    <chr>     <chr>  <dbl>
##  1 Good      Male    33.4
##  2 Excellent Female  20.2
##  3 Good      Male    27.3
##  4 Good      Female  38.6
##  5 Excellent Female  40.0
##  6 Very Good Female  18.8
##  7 Very Good Female  19.7
##  8 Good      Female  26.2
##  9 Very Good Female  20.4
## 10 Excellent Female  23.0
## # … with 33,018 more rows

Extra

BMI Category

Extra = Recoding_Data %>%
  mutate(BMI_Category = ifelse(BMI < 19, "Underweight",
                        ifelse((BMI >= 19) & (BMI < 25), "Normal",
                        ifelse((BMI >= 25) & (BMI < 30), "Overweight",
                        ifelse((BMI >= 30) & (BMI < 40), "Obese",
                        ifelse(BMI >= 40, "Extremely Obese", NA))))))

Extra
## # A tibble: 33,028 x 4
##    Health    Gender   BMI BMI_Category
##    <chr>     <chr>  <dbl> <chr>       
##  1 Good      Male    33.4 Obese       
##  2 Excellent Female  20.2 Normal      
##  3 Good      Male    27.3 Overweight  
##  4 Good      Female  38.6 Obese       
##  5 Excellent Female  40.0 Obese       
##  6 Very Good Female  18.8 Underweight 
##  7 Very Good Female  19.7 Normal      
##  8 Good      Female  26.2 Overweight  
##  9 Very Good Female  20.4 Normal      
## 10 Excellent Female  23.0 Normal      
## # … with 33,018 more rows

Data Summary

Health

Health_Data = Recoding_Data %>%
  filter(!is.na(Health))
  
table(Health_Data$Health) %>% 
  prop.table() %>%
  round(2)
## 
## Excellent      Fair      Good      Poor Very Good 
##      0.25      0.11      0.27      0.03      0.34

Gender

Sex_Data = Recoding_Data %>%
  filter(!is.na(Gender))
  
table(Sex_Data$Gender) %>% 
  prop.table() %>%
  round(2)
## 
## Female   Male 
##   0.55   0.45

Mean of BMI

Recoding_Data %>%
  filter(!is.na(BMI)) %>%
  summarise(Avg_BMI = mean(BMI))
## # A tibble: 1 x 1
##   Avg_BMI
##     <dbl>
## 1    28.0

Extra

BMI Category

BMI_Category_Data = Extra %>%
  filter(!is.na(BMI_Category))

table(BMI_Category_Data$BMI_Category) %>%
  prop.table() %>%
  round(2)
## 
## Extremely Obese          Normal           Obese      Overweight     Underweight 
##            0.05            0.32            0.25            0.35            0.03