final<-read.csv("/Users/anikalewis/Downloads/SD4 NHIS Data ALT.csv")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

Recode

finalrecode<-final%>%
  
  mutate(health=ifelse(health==1,"Excellent",
  ifelse(health==2,"Very Good",
  ifelse(health==3,"Good",
  ifelse(health==4,"Fair",
  ifelse(health==5,"Poor",NA))))),
 
   sexorien=ifelse(sexorien==1,"GayOrLesbian",
   ifelse(sexorien==2,"Straight",
   ifelse(sexorien==3,"Bisexual",
   ifelse(sexorien==4,"Other",NA)))),
  
  K6=ifelse(K6>99,NA,K6))
  
head(finalrecode)
##       sexorien    health K6
## 1     Straight Very Good  0
## 2     Straight Excellent  0
## 3     Straight Excellent  0
## 4     Straight Excellent  0
## 5 GayOrLesbian Excellent  0
## 6     Straight      Good  0

Data Summary

finalrecode%>%
  filter(!is.na(health))%>%
  group_by(health)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))
## # A tibble: 5 x 3
##   health        n percent
## * <chr>     <int>   <dbl>
## 1 Excellent 26630  0.258 
## 2 Fair      11602  0.112 
## 3 Good      28383  0.275 
## 4 Poor       3534  0.0342
## 5 Very Good 33202  0.321
finalrecode%>%
  filter(!is.na(sexorien))%>%
  group_by(sexorien)%>%
  summarize(n=n())%>%
  mutate(percent=n/sum(n))
## # A tibble: 4 x 3
##   sexorien         n percent
## * <chr>        <int>   <dbl>
## 1 Bisexual       882 0.00883
## 2 GayOrLesbian  1673 0.0168 
## 3 Other          333 0.00333
## 4 Straight     96988 0.971
finalrecode%>%
  summarize(K6=mean(K6,na.rm=TRUE))
##         K6
## 1 6.266265