library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data<-read_csv("C:/Users/JaminS/Downloads/SD4 NHIS Data.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   health = col_double(),
##   sex = col_double(),
##   bmi = col_double()
## )
data_score <- data%>%
select(health,sex,bmi)%>%
  mutate(healthforall = ifelse(health==1,"Excellent",
                        ifelse(health==2, "Very Good",
                        ifelse(health==3, "Good",
                        ifelse(health==4, "Fair",
                        ifelse(health==5,"Poor", NA))))),
         WhichSex = ifelse(sex==1,"male",
                    ifelse(sex==2,"Female",NA)),
         BMIstatus = ifelse(bmi == 0, "NA", 
                    ifelse(bmi >= 9999, "NA",
                    ifelse(bmi<=9999,"",NA))))
 data_outcome<-data%>%
  select(bmi)%>%
  mutate(Bmicategory = ifelse(bmi>=40, "Extermely obese", 
                      ifelse(bmi>=30, "obese",
                      ifelse(bmi>=25, "overweight",
                      ifelse(bmi>= 19,"Normal",
                      ifelse(bmi<19, "underweight",NA))))))
Datasummary<-data_score%>%
  select(healthforall,WhichSex,BMIstatus)
Datasummary%>%
  group_by(healthforall)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 3
##   healthforall     n  percent
##   <chr>        <int>    <dbl>
## 1 Excellent     8327 0.252   
## 2 Fair          3627 0.110   
## 3 Good          8894 0.269   
## 4 Poor          1094 0.0331  
## 5 Very Good    11074 0.335   
## 6 <NA>            12 0.000363
Datasummary%>%
  group_by(WhichSex)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
##   WhichSex     n percent
##   <chr>    <int>   <dbl>
## 1 Female   18037     Inf
## 2 male     14991     Inf
data%>%
  summarize(BMIstatusmean = mean(bmi, na.rm = TRUE))
## # A tibble: 1 x 1
##   BMIstatusmean
##           <dbl>
## 1          28.0
data_outcome%>%
group_by(Bmicategory)%>%
  summarize(n=n())%>%
  mutate(percent= n/sum())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 3
##   Bmicategory         n percent
##   <chr>           <int>   <dbl>
## 1 Extermely obese  1575     Inf
## 2 Normal          10281     Inf
## 3 obese            8042     Inf
## 4 overweight      11141     Inf
## 5 underweight       858     Inf
## 6 <NA>             1131     Inf