library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(knitr)
NHDUH_Data <- read_csv("~/Downloads/Skills Drill 3 NSDUH Data.csv")
## Parsed with column specification:
## cols(
## MDScore = col_double(),
## sexident = col_double()
## )
head(NHDUH_Data)
## # A tibble: 6 x 2
## MDScore sexident
## <dbl> <dbl>
## 1 NA 99
## 2 NA 1
## 3 4 1
## 4 NA 99
## 5 11 1
## 6 8 1
NHDUH<-NHDUH_Data%>%
mutate(MDCategory = ifelse(MDScore<5,"Low Risk",
ifelse(MDScore>=5 & MDScore<=12,"Moderate Risk",
ifelse(MDScore>=13,"High Risk",NA))),
sexident = ifelse(sexident==1,"Straight",
ifelse(sexident==2,"GayLesbian",
ifelse(sexident==3,"Bisexual",NA))))%>%
select(MDScore,MDCategory,sexident)
table(NHDUH$MDCategory,NHDUH$sexident)%>%
prop.table(2)%>%
round(2)
##
## Bisexual GayLesbian Straight
## High Risk 0.22 0.15 0.06
## Low Risk 0.33 0.52 0.64
## Moderate Risk 0.45 0.34 0.30
chisq.test(NHDUH$MDCategory,NHDUH$sexident)
##
## Pearson's Chi-squared test
##
## data: NHDUH$MDCategory and NHDUH$sexident
## X-squared = 1057.7, df = 4, p-value < 2.2e-16
chisq.test(NHDUH$MDCategory,NHDUH$sexident)[7]
## $expected
## NHDUH$sexident
## NHDUH$MDCategory Bisexual GayLesbian Straight
## High Risk 122.6741 63.07335 2817.253
## Low Risk 1079.1074 554.82702 24782.066
## Moderate Risk 529.2185 272.09964 12153.682
chisq.test(NHDUH$MDCategory,NHDUH$sexident)[6]
## $observed
## NHDUH$sexident
## NHDUH$MDCategory Bisexual GayLesbian Straight
## High Risk 383 132 2488
## Low Risk 573 459 25384
## Moderate Risk 775 299 11881