Variable Selection & Research Question

Is there a relationship between Methamphetamine Users and Non-Users and their risk for serious mental illness?

Data Prep

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
SOC333_NSDUH_2016 <- read_csv("Downloads/SOC333_NSDUH_2016.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Nervous = col_double(),
##   Hopeless = col_double(),
##   Restless = col_double(),
##   Effort = col_double(),
##   Sad = col_double(),
##   Worthless = col_double(),
##   k6score = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
soc333_nsduh_2016<-SOC333_NSDUH_2016 %>%
  select(meth_month,k6score) %>%
  filter(meth_month %in% c("Yes", "No"),!is.na(k6score))

Comparison of Means

  • The average risk for serious mental illness for Users who use Methamphetamine is higher than Non-Users who do not use Methamphetamine.
soc333_nsduh_2016%>%
  group_by(meth_month) %>%
    summarize(avg_k6score=mean(k6score)) %>%
  
    ggplot()+
    geom_col(aes(x=meth_month, y=avg_k6score))

Comparison of Distributions

  • Non-Users have the lowest possible risk of serious mental illness while Users have the highest possible risk of serious mental illness.
soc333_nsduh_2016%>%
  ggplot()+
  geom_histogram(aes(x=k6score, fill=meth_month))+
  facet_wrap(~meth_month)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Sampling Distribution & T-test

esoc333_nsduh_2016<-soc333_nsduh_2016%>%
  filter(meth_month=="Yes")

e_Samp_Distro<- replicate(10000,sample(esoc333_nsduh_2016$k6score,40)%>%
                            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1)

ggplot()+geom_histogram(data=e_Samp_Distro, aes(x=mean), fill="midnightblue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

fsoc333_nsduh_2016<-soc333_nsduh_2016%>%
  filter(meth_month=="No") 

f_Samp_Distro<- replicate(10000,sample(fsoc333_nsduh_2016$k6score,40)%>%
                            mean(na.rm=TRUE))%>%
  data.frame()%>%
  rename("mean"=1)

ggplot()+geom_histogram(data=f_Samp_Distro, aes(x=mean), fill="darkgoldenrod1")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

EC: Sampling Distribution Together

ggplot()+geom_histogram(data=e_Samp_Distro, aes(x=mean), fill="darkgoldenrod1")+
  geom_histogram(data=f_Samp_Distro, aes(x=mean), fill="midnightblue", alpha=0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

T-Test

  • I have tried to do a T-Test but kept getting “object k6score not found”.
  • Here is what I attempted to use: t.test(k6score~meth_month, data=data)