##Variable Selection and Research Question## The variable that I have chosen is heroin, and I predict that those that report heroin use will on average have a higher mental health score compared to those that do not. ##Data Prep##
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
dataset<- read.csv('/Users/apple/Downloads/SOC333_NSDUH_2016.csv')
head(dataset)
## sexident Nervous Hopeless Restless Effort Sad Worthless k6score k6category
## 1 <NA> NA NA NA NA NA NA NA <NA>
## 2 Straight 0 0 0 NA 0 0 NA <NA>
## 3 Straight 2 1 1 0 0 0 4 Low Risk
## 4 <NA> NA NA NA NA NA NA NA <NA>
## 5 Straight 1 3 2 2 1 2 11 MMD
## 6 Straight 2 1 1 2 1 1 8 MMD
## marij_month cocaine_month crack_month heroin_month hallucinogen_month
## 1 No No No No No
## 2 Yes No No No No
## 3 No No No No No
## 4 No No No No No
## 5 No No No No No
## 6 No No No No No
## inhalant_month meth_month painrelieve_month tranq_month stimulant_month
## 1 No No No No No
## 2 No No No No No
## 3 No No No No No
## 4 No No No No No
## 5 No No No No No
## 6 No No No No No
## sedative_month
## 1 No
## 2 No
## 3 No
## 4 No
## 5 No
## 6 No
##FilterData##
heroinuse<-dataset%>%
select(heroin_month,k6score)%>%
filter(heroin_month %in% combine("Yes", "No"), !is.na(k6score))
## Warning: `combine()` is deprecated as of dplyr 1.0.0.
## Please use `vctrs::vec_c()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
##Comparison of Means##
##table##
heroinuse%>%
group_by(heroin_month)%>%
summarise(mentalscore=mean(k6score))
## # A tibble: 2 x 2
## heroin_month mentalscore
## * <chr> <dbl>
## 1 No 4.42
## 2 Yes 10.2
##visualization##
heroinuse%>%
group_by(heroin_month)%>%
summarise(mentalscore=mean(k6score))%>%
ggplot()+
geom_col(aes(x=heroin_month,y=mentalscore, fill=heroin_month))
##Interpretation## Those who reported use of heroin within the last 30 days had a higher k6score. This means that those who use heroin are at a greater risk of being very mentally ill than those who do not. ##Comparison of Distribution##
heroinuse%>%
ggplot()+
geom_histogram(aes(x=k6score, fill= heroin_month))+
facet_wrap(~heroin_month)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.