library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
library(ggplot2)
Drugtest<-read_csv("/Users/juliushunte/Desktop/NSDUH-2015-DS0001-data.csv")
drugqueries <-Drugtest%>%
mutate(PainRlv30days = ifelse(PNRNM30D==1,"Yes",
ifelse(PNRNM30D==2,"No",NA)),
MissWk = ifelse(wrksickmo>30,NA,wrksickmo))%>%
select(PainRlv30days,MissWk)
yespk_drug<-drugqueries%>%
filter(PainRlv30days=="Yes",
!is.na(MissWk))
nopk_drug<-drugqueries%>%
filter(PainRlv30days=="No",
!is.na(MissWk))
sample(yespk_drug$MissWk, 40)
## [1] 0 0 1 2 2 0 0 7 0 0 0 0 0 0 0 0 0 0 2 4 0 0 0
## [24] 0 0 1 0 2 0 0 2 0 0 10 0 2 0 3 0 15
Sample mean of 40 using pain killers and missing work
sample(yespk_drug$MissWk, 40)%>%
mean()
## [1] 1.35
Sample mean of 40 replicated 10000 times using pain killers and missing work
replicate(10000, mean(sample(yespk_drug$MissWk, 40)))%>%
data.frame()%>%
rename("mean"=1)%>%
ggplot()+
geom_histogram(aes(x=mean),fill="red",alpha=.5)+
geom_vline(aes(xintercept=mean(yespk_drug$MissWk,na.rm=TRUE)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

sample(nopk_drug$MissWk, 40)
## [1] 2 0 0 2 0 2 0 0 0 0 0 0 0 1 0 2 3 0 3 2 0 2 3
## [24] 2 0 0 0 0 0 0 0 1 0 10 5 0 0 0 0 0
Sample mean of 40 without using pain killers and missing work
sample(nopk_drug$MissWk, 40)%>%
mean()
## [1] 2.35
Sample mean of 40 replicated 10000 times without using pain killers and missing work
replicate(10000, mean(sample(nopk_drug$MissWk, 40)))%>%
data.frame()%>%
rename("mean"=1)%>%
ggplot()+
geom_histogram(aes(x=mean),fill="blue",alpha=.5)+
geom_vline(aes(xintercept=mean(nopk_drug$MissWk,na.rm=TRUE)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
