library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data<-read_csv("C:/Users/JaminS/Downloads/SD4 NHIS Data.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## health = col_double(),
## sex = col_double(),
## bmi = col_double()
## )
data_score <- data%>%
select(health,sex,bmi)%>%
mutate(healthforall = ifelse(health==1,"Excellent",
ifelse(health==2, "Very Good",
ifelse(health==3, "Good",
ifelse(health==4, "Fair",
ifelse(health==5,"Poor", NA))))),
WhichSex = ifelse(sex==1,"male",
ifelse(sex==2,"Female",NA)),
BMIstatus = ifelse(bmi == 0, "NA",
ifelse(bmi >= 9999, "NA",
ifelse(bmi<=9999,"",NA))))
data_outcome<-data%>%
select(bmi)%>%
mutate(Bmicategory = ifelse(bmi>=40, "Extermely obese",
ifelse(bmi>=30, "obese",
ifelse(bmi>=25, "overweight",
ifelse(bmi>= 19,"Normal",
ifelse(bmi<19, "underweight",NA))))))
Datasummary<-data_score%>%
select(healthforall,WhichSex,BMIstatus)
Datasummary%>%
group_by(healthforall)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 3
## healthforall n percent
## <chr> <int> <dbl>
## 1 Excellent 8327 0.252
## 2 Fair 3627 0.110
## 3 Good 8894 0.269
## 4 Poor 1094 0.0331
## 5 Very Good 11074 0.335
## 6 <NA> 12 0.000363
Datasummary%>%
group_by(WhichSex)%>%
summarize(n=n())%>%
mutate(percent=n/sum())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 3
## WhichSex n percent
## <chr> <int> <dbl>
## 1 Female 18037 Inf
## 2 male 14991 Inf
data%>%
summarize(BMIstatusmean = mean(bmi, na.rm = TRUE))
## # A tibble: 1 x 1
## BMIstatusmean
## <dbl>
## 1 28.0
data_outcome%>%
group_by(Bmicategory)%>%
summarize(n=n())%>%
mutate(percent= n/sum())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 3
## Bmicategory n percent
## <chr> <int> <dbl>
## 1 Extermely obese 1575 Inf
## 2 Normal 10281 Inf
## 3 obese 8042 Inf
## 4 overweight 11141 Inf
## 5 underweight 858 Inf
## 6 <NA> 1131 Inf