This analysis will investigate whether users of different types of drugs vary in how frequenlty they decide NOT to go to work, simply because they “don’t feel like it”. Respondents will be categorized as follows, based on drug use within the past 30-days:
Note calculations of Serious Mental Illness are present, but unused in this analysis so far. Will be introduced later as a control measure to evaluate differences between classes of drug users while controlling for psychological distress levels.
library(dplyr)
library(Zelig)
library(tidyr)
library(ggplot2)
library(knitr)
load("/Users/Brett/Library/Mobile Documents/com~apple~CloudDocs/All Files/Education/Data Analysis Masters/Sociology of Addiction/NSDUH Data/NSDUH-2015-DS0001-data-r.rda")
druguse2<- PUF2015_102016%>%
rename("marij_month" = mrjmon, "cocaine_month" = cocmon, "crack_month" = crkmon,
"heroin_month" = hermon,"hallucinogen_month" = hallucmon,"inhalant_month" = inhalmon,
"meth_month" = methammon,"painrelieve_month" = pnrnmmon,"tranq_month" = trqnmmon,
"stimulant_month" = stmnmmon,"sedative_month" = sednmmon,"sex" = irsex,
"education" = eduhighcat,"SelectiveLeave" = wrkskipmo,"SkipSick" =wrksickmo,
"EmploymentStatus" = IRWRKSTAT18, "PersonalIncome" = IRPINC3)%>%
mutate(pharmamonth = ifelse(painrelieve_month+stimulant_month+sedative_month+tranq_month >=1,1,0),
illicitmonth = ifelse(cocaine_month+hallucinogen_month+inhalant_month+heroin_month+meth_month >=1,1,0),
race2 = factor(NEWRACE2),
sex = factor(sex),
drugtype = factor(ifelse(pharmamonth+illicitmonth+marij_month>1,"mixed",
ifelse(illicitmonth ==1, "illicit",
ifelse(pharmamonth==1,"pharmaceutical",
ifelse(marij_month==1,"marijuana","none")))),
levels=c("none","pharmaceutical","illicit","marijuana","mixed")),
SelectiveLeave=as.numeric(SelectiveLeave),
K6Nervous30 = ifelse(DSTNRV30>5,NA,DSTNRV30),
K6Hopeless30 = ifelse(DSTHOP30>5,NA,DSTHOP30),
K6Restless30 = ifelse(DSTRST30>5,NA,DSTRST30),
K6Sad30 = ifelse(DSTCHR30>5,NA,DSTCHR30),
K6Effort30 = ifelse(DSTEFF30>5,NA,DSTEFF30),
K6Worthless30 = ifelse(DSTNGD30>5,NA,DSTNGD30),
K6Score30 = K6Nervous30+K6Hopeless30+K6Restless30+K6Sad30+K6Effort30+K6Worthless30,
K6Nervous12 = ifelse(DSTNRV12>5,NA,
ifelse(DSTNRV12==1,4,
ifelse(DSTNRV12==2,3,
ifelse(DSTNRV12==3,2,
ifelse(DSTNRV12==4,1,
ifelse(DSTNRV12==5,0,NA)))))),
K6Hopeless12 = ifelse(DSTHOP12>5,NA,
ifelse(DSTHOP12==1,4,
ifelse(DSTHOP12==2,3,
ifelse(DSTHOP12==3,2,
ifelse(DSTHOP12==4,1,
ifelse(DSTHOP12==5,0,NA)))))),
K6Restless12 = ifelse(DSTRST12>5,NA,
ifelse(DSTRST12==1,4,
ifelse(DSTRST12==2,3,
ifelse(DSTRST12==3,2,
ifelse(DSTRST12==4,1,
ifelse(DSTRST12==5,0,NA)))))),
K6Sad12 = ifelse(DSTCHR12>5,NA,
ifelse(DSTCHR12==1,4,
ifelse(DSTCHR12==2,3,
ifelse(DSTCHR12==3,2,
ifelse(DSTCHR12==4,1,
ifelse(DSTCHR12==5,0,NA)))))),
K6Effort12 = ifelse(DSTEFF12>5,NA,
ifelse(DSTEFF12==1,4,
ifelse(DSTEFF12==2,3,
ifelse(DSTEFF12==3,2,
ifelse(DSTEFF12==4,1,
ifelse(DSTEFF12==5,0,NA)))))),
K6Worthless12 = ifelse(DSTNGD12>5,NA,
ifelse(DSTNGD12==1,4,
ifelse(DSTNGD12==2,3,
ifelse(DSTNGD12==3,2,
ifelse(DSTNGD12==4,1,
ifelse(DSTNGD12==5,0,NA)))))),
K6Score12 = K6Nervous12+K6Hopeless12+K6Restless12+K6Sad12+K6Effort12+K6Worthless12,
K6ScoreGroup12 = factor(ifelse(K6Score12<5,"Low Risk",#Classify K6
ifelse(K6Score12>=5 & K6Score12<13,"MMD",
ifelse(K6Score12>=13,"SMI",NA))),
levels=c("Low Risk","MMD","SMI")),
K6SMI_12 = ifelse(K6ScoreGroup12=="SMI",1,0),
K6MMD_12 = ifelse(K6ScoreGroup12=="MMD",1,0))%>%
filter(EmploymentStatus < 3,
SelectiveLeave <85,
SelectiveLeave >=0)%>%
select(drugtype,
PersonalIncome,
race2,
education,
sex,
SelectiveLeave,
K6Score30,
K6Score12,
K6ScoreGroup12,
K6SCYR,
K6SMI_12,
K6MMD_12,
QUESTID2,
ANALWT_C,
vestr)
#K6 Variables
#DSTNRV30 During the past 30 days, how often did you feel nervous?
#DSTHOP30 During the past 30 days, how often did you feel hopeless?
#DSTRST30 During the past 30 days, how often did you feel restless or fidgety?
#DSTCHR30 During the past 30 days, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the past 30 days, how often did you feel that everything was an effort?
#DSTNGD30 During the past 30 days, how often did you feel down on yourself, no good, or worthless?
#DSTWORST - Was there a worse Month?
#DSTNRV12 During the worst month, how often did you feel nervous?
#DSTHOP12 During the worst month, how often did you feel hopeless?
#DSTRST30 During the worst month, how often did you feel restless or fidgety?
#DSTCHR30 During the worst month, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the worst month, how often did you feel that everything was an effort?
#DSTNGD30 During the worst month, how often did you feel down on yourself, no good, or worthless?
#Responses for all variables are:
#1 All of the time
#2 Most of the time
#3 Some of the time
#4 A little of the time
#5 None of the time'
#Check on accuracy of Recode by crosstab of new vs. original variables.
#druguse2%>%
# group_by(K6SCYR,K6SMI_12)%>%
# summarize(n=n())%>%
# spread(K6SMI_12,n)
#druguse2%>%
# group_by(K6SCYR,k6ScoreGroup12)%>%
# summarize(n=n())%>%
# spread(k6ScoreGroup12,n)
#druguse2%>%
# group_by(drugtype)%>%
# summarize(K6Score30=mean(K6Score30), K6Score12=mean(K6Score12))
#-----------------------#
#Survey Design Object---#
#-----------------------#
#To account for complex sample design.
library(survey)
NSDUHDesign <-
svydesign(
ids = ~QUESTID2,
strata = ~vestr ,
data = druguse2,
weights = ~ANALWT_C,
nest=TRUE)
Running a quick logistic regression evaluating odds of serious mental illness by type of drug use. This is really just a test of the survey design object to ensure that everything runs smoothly.
#------------------------------------------#
#Logistic Regression-----------------------#
#Serious Mental Illness by Drug Type-------#
#------------------------------------------#
SMILogit <- svyglm(K6SMI_12~drugtype,
family=binomial,
data=druguse2,
NSDUHDesign)
texreg::htmlreg(SMILogit)
| Model 1 | ||
|---|---|---|
| (Intercept) | -1.06*** | |
| (0.04) | ||
| drugtypepharmaceutical | 1.01*** | |
| (0.19) | ||
| drugtypeillicit | 1.18** | |
| (0.41) | ||
| drugtypemarijuana | 0.56*** | |
| (0.09) | ||
| drugtypemixed | 1.48*** | |
| (0.15) | ||
| Deviance | 9617.29 | |
| Dispersion | 0.90 | |
| Num. obs. | 9139 | |
| p < 0.001, p < 0.01, p < 0.05 | ||
The distribution of Selective Leave shows most respondents (approx 25K of 28K) reporting 0 days of selective leave. As the number of days of selective leave increases, the number of observations decreases. The dependent variables (Selective Leave) is a count variable, indicating number of days a respondent was absent from work.
A Poisson regression is determined as the appropriate model for estimation, given the type of value & distribution of the DV.
ggplot(data=druguse2)+
geom_histogram(aes(x=SelectiveLeave))
Using Poisson Regression Model to estimate Number of Days of Selective Leave from work for different kinds of drug users & non-users.
z5<- zpoisson$new()
z5$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
summary(z5)
## Model:
##
## Call:
## z5$zelig(formula = SelectiveLeave ~ drugtype + race2 + education +
## PersonalIncome, data = druguse2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0180 -0.8270 -0.7331 -0.5978 15.9499
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.683585 0.036728 -18.612 < 2e-16
## drugtypepharmaceutical 0.734704 0.063978 11.484 < 2e-16
## drugtypeillicit 1.033572 0.098965 10.444 < 2e-16
## drugtypemarijuana 0.477735 0.029478 16.207 < 2e-16
## drugtypemixed 1.055409 0.040184 26.264 < 2e-16
## race22 0.390200 0.028950 13.478 < 2e-16
## race23 0.565068 0.070399 8.027 1.00e-15
## race24 -0.004146 0.153232 -0.027 0.9784
## race25 -0.098174 0.058526 -1.677 0.0935
## race26 -0.074872 0.057665 -1.298 0.1942
## race27 -0.017688 0.029590 -0.598 0.5500
## education -0.092742 0.011870 -7.813 5.58e-15
## PersonalIncome -0.111275 0.005965 -18.655 < 2e-16
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 46060 on 28691 degrees of freedom
## Residual deviance: 44037 on 28679 degrees of freedom
## AIC: 53493
##
## Number of Fisher Scoring iterations: 6
##
## Next step: Use 'setx' method
#Illicit Drug Users vs. Non-Drug Users
z5.illicit_drugabsence <- zpoisson$new()
z5.illicit_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
z5.illicit_drugabsence$setx(drugtype = "none")
z5.illicit_drugabsence$setx1(drugtype = "illicit")
z5.illicit_drugabsence$sim()
kable(summary(z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")))
| V1 | |
|---|---|
| Min. :0.2908 | |
| 1st Qu.:0.4207 | |
| Median :0.4673 | |
| Mean :0.4732 | |
| 3rd Qu.:0.5186 | |
| Max. :0.7651 |
plot(z5.illicit_drugabsence)
#Marijuana Users vs. Non-Drug Users
z5.marij_drugabsence <- zpoisson$new()
z5.marij_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
z5.marij_drugabsence$setx(drugtype = "none")
z5.marij_drugabsence$setx1(drugtype = "marijuana")
z5.marij_drugabsence$sim()
kable(summary(z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")))
| V1 | |
|---|---|
| Min. :0.1275 | |
| 1st Qu.:0.1509 | |
| Median :0.1585 | |
| Mean :0.1589 | |
| 3rd Qu.:0.1669 | |
| Max. :0.2048 |
plot(z5.marij_drugabsence)
#Pharmaceutical Drug Users vs. Non-Drug Users
z5.pharma_drugabsence <- zpoisson$new()
z5.pharma_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
z5.pharma_drugabsence$setx(drugtype = "none")
z5.pharma_drugabsence$setx1(drugtype = "pharmaceutical")
z5.pharma_drugabsence$sim()
kable(summary(z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")))
| V1 | |
|---|---|
| Min. :0.1583 | |
| 1st Qu.:0.2580 | |
| Median :0.2806 | |
| Mean :0.2811 | |
| 3rd Qu.:0.3029 | |
| Max. :0.4036 |
plot(z5.pharma_drugabsence)
#Mixed Drug Users vs. Non-Drug Users
z5.mixed_drugabsence <- zpoisson$new()
z5.mixed_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
z5.mixed_drugabsence$setx(drugtype = "none")
z5.mixed_drugabsence$setx1(drugtype = "mixed")
z5.mixed_drugabsence$sim()
kable(summary(z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")))
| V1 | |
|---|---|
| Min. :0.3984 | |
| 1st Qu.:0.4647 | |
| Median :0.4844 | |
| Mean :0.4847 | |
| 3rd Qu.:0.5035 | |
| Max. :0.6004 |
plot(z5.mixed_drugabsence)
#Put FD in one place for all 3 models
marij.fd <- z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")
pharma.fd <- z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")
illicit.fd <- z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")
mixed.fd <- z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")
fd.table <- as.data.frame(cbind(marij.fd, pharma.fd, illicit.fd,mixed.fd))
kable(head(fd.table))
| V1 | V2 | V3 | V4 |
|---|---|---|---|
| 0.1743799 | 0.2419429 | 0.5183997 | 0.4787108 |
| 0.1352712 | 0.2464555 | 0.4153260 | 0.4875062 |
| 0.1654886 | 0.3006257 | 0.4967814 | 0.4659910 |
| 0.1366238 | 0.2736484 | 0.7207119 | 0.4772435 |
| 0.1395281 | 0.3486921 | 0.3927542 | 0.4506084 |
| 0.1460869 | 0.2506594 | 0.4878452 | 0.5218407 |
fd.table.long <- fd.table %>%
rename("Marijuana"=V1,
"Pharmaceuticals"=V2,
"Illicit"=V3,
"Mixed"=V4)%>%
gather(drugtype, simv, 1:4)
ggplot(data=fd.table.long,aes(simv)) + geom_histogram() + facet_grid(~drugtype)
#Groupwise Summary
fd.table.long %>%
group_by(drugtype) %>%
summarise(mean = mean(simv), sd = sd(simv))%>%
kable()
| drugtype | mean | sd |
|---|---|---|
| Illicit | 0.4731628 | 0.0749738 |
| Marijuana | 0.1588840 | 0.0115742 |
| Mixed | 0.4846912 | 0.0293016 |
| Pharmaceuticals | 0.2810678 | 0.0331629 |
Illicit & Mixed Drug users are at the top, showing similar effect(~ 47-48) of drug use on selective absence from work, followed by Pharmaceutical drug misusers. Marijuana use was the smallest driver of Selective Absence. Marijuana also showed the smallest standard deviation value for Selective Absence when compared to other kinds of drug users, implying that the absenteesims rates among Marijuana users were more consistent than the absenteesim rates of illicit drug users (who showed the highest sd value). This can be observed in the distributions of the simulated FD values between these groups & non-users. Marijuana users show a high peak and a narrow distribution, compared with illicit drug users who show a wider, more blunted distribution.