Report Description



This analysis will investigate whether users of different types of drugs vary in how frequenlty they decide NOT to go to work, simply because they “don’t feel like it”. Respondents will be categorized as follows, based on drug use within the past 30-days:



National Survey of Drug Use & Health (NSDUH)

Preparing Data


Note calculations of Serious Mental Illness are present, but unused in this analysis so far. Will be introduced later as a control measure to evaluate differences between classes of drug users while controlling for psychological distress levels.

library(dplyr)
library(Zelig)
library(tidyr)
library(ggplot2)
library(knitr)

load("/Users/Brett/Library/Mobile Documents/com~apple~CloudDocs/All Files/Education/Data Analysis Masters/Sociology of Addiction/NSDUH Data/NSDUH-2015-DS0001-data-r.rda")


druguse2<- PUF2015_102016%>%
          rename("marij_month" = mrjmon, "cocaine_month" = cocmon, "crack_month" = crkmon,
                 "heroin_month" = hermon,"hallucinogen_month" = hallucmon,"inhalant_month" = inhalmon,
                 "meth_month" = methammon,"painrelieve_month" = pnrnmmon,"tranq_month" = trqnmmon,
                 "stimulant_month" = stmnmmon,"sedative_month" = sednmmon,"sex" = irsex,
                  "education" = eduhighcat,"SelectiveLeave" = wrkskipmo,"SkipSick" =wrksickmo,
                 "EmploymentStatus" = IRWRKSTAT18, "PersonalIncome" = IRPINC3)%>%
           mutate(pharmamonth = ifelse(painrelieve_month+stimulant_month+sedative_month+tranq_month >=1,1,0),
                  illicitmonth = ifelse(cocaine_month+hallucinogen_month+inhalant_month+heroin_month+meth_month >=1,1,0),
                  race2 = factor(NEWRACE2), 
                  sex = factor(sex),
                  drugtype = factor(ifelse(pharmamonth+illicitmonth+marij_month>1,"mixed",
                                    ifelse(illicitmonth ==1, "illicit",
                                    ifelse(pharmamonth==1,"pharmaceutical",
                                    ifelse(marij_month==1,"marijuana","none")))),
                             levels=c("none","pharmaceutical","illicit","marijuana","mixed")),
                  SelectiveLeave=as.numeric(SelectiveLeave),
                    K6Nervous30 = ifelse(DSTNRV30>5,NA,DSTNRV30),
                    K6Hopeless30 = ifelse(DSTHOP30>5,NA,DSTHOP30),
                    K6Restless30 = ifelse(DSTRST30>5,NA,DSTRST30),
                    K6Sad30 = ifelse(DSTCHR30>5,NA,DSTCHR30),
                    K6Effort30 = ifelse(DSTEFF30>5,NA,DSTEFF30),
                    K6Worthless30 = ifelse(DSTNGD30>5,NA,DSTNGD30),
                  K6Score30 = K6Nervous30+K6Hopeless30+K6Restless30+K6Sad30+K6Effort30+K6Worthless30,
                    K6Nervous12 = ifelse(DSTNRV12>5,NA,
                                  ifelse(DSTNRV12==1,4,
                                  ifelse(DSTNRV12==2,3,
                                  ifelse(DSTNRV12==3,2,
                                  ifelse(DSTNRV12==4,1,
                                  ifelse(DSTNRV12==5,0,NA)))))),
                    K6Hopeless12 = ifelse(DSTHOP12>5,NA,
                                   ifelse(DSTHOP12==1,4,
                                   ifelse(DSTHOP12==2,3,
                                   ifelse(DSTHOP12==3,2,
                                   ifelse(DSTHOP12==4,1,
                                   ifelse(DSTHOP12==5,0,NA)))))),
                    K6Restless12 = ifelse(DSTRST12>5,NA,
                                   ifelse(DSTRST12==1,4,
                                   ifelse(DSTRST12==2,3,
                                   ifelse(DSTRST12==3,2,
                                   ifelse(DSTRST12==4,1,
                                   ifelse(DSTRST12==5,0,NA)))))),
                    K6Sad12 = ifelse(DSTCHR12>5,NA,
                                   ifelse(DSTCHR12==1,4,
                                   ifelse(DSTCHR12==2,3,
                                   ifelse(DSTCHR12==3,2,
                                   ifelse(DSTCHR12==4,1,
                                   ifelse(DSTCHR12==5,0,NA)))))),
                    K6Effort12 = ifelse(DSTEFF12>5,NA,
                                   ifelse(DSTEFF12==1,4,
                                   ifelse(DSTEFF12==2,3,
                                   ifelse(DSTEFF12==3,2,
                                   ifelse(DSTEFF12==4,1,
                                   ifelse(DSTEFF12==5,0,NA)))))),
                    K6Worthless12 = ifelse(DSTNGD12>5,NA,
                                    ifelse(DSTNGD12==1,4,
                                    ifelse(DSTNGD12==2,3,
                                    ifelse(DSTNGD12==3,2,
                                    ifelse(DSTNGD12==4,1,
                                    ifelse(DSTNGD12==5,0,NA)))))),
                  K6Score12 = K6Nervous12+K6Hopeless12+K6Restless12+K6Sad12+K6Effort12+K6Worthless12,
                  K6ScoreGroup12 = factor(ifelse(K6Score12<5,"Low Risk",#Classify K6
                       ifelse(K6Score12>=5 & K6Score12<13,"MMD",
                       ifelse(K6Score12>=13,"SMI",NA))),
                       levels=c("Low Risk","MMD","SMI")),
                  K6SMI_12 = ifelse(K6ScoreGroup12=="SMI",1,0),
                  K6MMD_12 = ifelse(K6ScoreGroup12=="MMD",1,0))%>%
            filter(EmploymentStatus < 3,
                 SelectiveLeave <85,
                 SelectiveLeave >=0)%>%
            select(drugtype,
                    PersonalIncome,
                    race2,
                    education,
                    sex,
                    SelectiveLeave,
                    K6Score30,
                    K6Score12,
                   K6ScoreGroup12,
                   K6SCYR,
                   K6SMI_12,
                   K6MMD_12,
                   QUESTID2,
                   ANALWT_C,
                   vestr)

#K6 Variables
#DSTNRV30 During the past 30 days, how often did you feel nervous?
#DSTHOP30 During the past 30 days, how often did you feel hopeless?
#DSTRST30 During the past 30 days, how often did you feel restless or fidgety?
#DSTCHR30 During the past 30 days, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the past 30 days, how often did you feel that everything was an effort?
#DSTNGD30 During the past 30 days, how often did you feel down on yourself, no good, or worthless?

#DSTWORST - Was there a worse Month?
#DSTNRV12 During the worst month, how often did you feel nervous?
#DSTHOP12 During the worst month, how often did you feel hopeless?
#DSTRST30 During the worst month, how often did you feel restless or fidgety?
#DSTCHR30 During the worst month, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the worst month, how often did you feel that everything was an effort?
#DSTNGD30 During the worst month, how often did you feel down on yourself, no good, or worthless?

#Responses for all variables are:
  #1 All of the time
  #2 Most of the time
  #3 Some of the time
  #4 A little of the time
  #5 None of the time'

#Check on accuracy of Recode by crosstab of new vs. original variables.

  #druguse2%>%
  #  group_by(K6SCYR,K6SMI_12)%>%
  #  summarize(n=n())%>%
  #  spread(K6SMI_12,n)

  #druguse2%>%
  #  group_by(K6SCYR,k6ScoreGroup12)%>%
  #  summarize(n=n())%>%
  #  spread(k6ScoreGroup12,n)

  #druguse2%>%
  #  group_by(drugtype)%>%
  #  summarize(K6Score30=mean(K6Score30), K6Score12=mean(K6Score12))
#-----------------------#
#Survey Design Object---#
#-----------------------#
#To account for complex sample design.
library(survey)
NSDUHDesign <-
  svydesign(
    ids = ~QUESTID2,
    strata = ~vestr ,
        data = druguse2,
        weights = ~ANALWT_C,
        nest=TRUE)

Running a quick logistic regression evaluating odds of serious mental illness by type of drug use. This is really just a test of the survey design object to ensure that everything runs smoothly.

#------------------------------------------#
#Logistic Regression-----------------------#
#Serious Mental Illness by Drug Type-------#
#------------------------------------------#

SMILogit <- svyglm(K6SMI_12~drugtype, 
                   family=binomial, 
                   data=druguse2, 
                   NSDUHDesign)

texreg::htmlreg(SMILogit)
Statistical models
Model 1
(Intercept) -1.06***
(0.04)
drugtypepharmaceutical 1.01***
(0.19)
drugtypeillicit 1.18**
(0.41)
drugtypemarijuana 0.56***
(0.09)
drugtypemixed 1.48***
(0.15)
Deviance 9617.29
Dispersion 0.90
Num. obs. 9139
p < 0.001, p < 0.01, p < 0.05


Distribution of Dependent Variable (Selective Leave)

The distribution of Selective Leave shows most respondents (approx 25K of 28K) reporting 0 days of selective leave. As the number of days of selective leave increases, the number of observations decreases. The dependent variables (Selective Leave) is a count variable, indicating number of days a respondent was absent from work.

A Poisson regression is determined as the appropriate model for estimation, given the type of value & distribution of the DV.

ggplot(data=druguse2)+
  geom_histogram(aes(x=SelectiveLeave))



Poisson Regression

Using Poisson Regression Model to estimate Number of Days of Selective Leave from work for different kinds of drug users & non-users.

z5<- zpoisson$new()
z5$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
summary(z5)
## Model: 
## 
## Call:
## z5$zelig(formula = SelectiveLeave ~ drugtype + race2 + education + 
##     PersonalIncome, data = druguse2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0180  -0.8270  -0.7331  -0.5978  15.9499  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -0.683585   0.036728 -18.612  < 2e-16
## drugtypepharmaceutical  0.734704   0.063978  11.484  < 2e-16
## drugtypeillicit         1.033572   0.098965  10.444  < 2e-16
## drugtypemarijuana       0.477735   0.029478  16.207  < 2e-16
## drugtypemixed           1.055409   0.040184  26.264  < 2e-16
## race22                  0.390200   0.028950  13.478  < 2e-16
## race23                  0.565068   0.070399   8.027 1.00e-15
## race24                 -0.004146   0.153232  -0.027   0.9784
## race25                 -0.098174   0.058526  -1.677   0.0935
## race26                 -0.074872   0.057665  -1.298   0.1942
## race27                 -0.017688   0.029590  -0.598   0.5500
## education              -0.092742   0.011870  -7.813 5.58e-15
## PersonalIncome         -0.111275   0.005965 -18.655  < 2e-16
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 46060  on 28691  degrees of freedom
## Residual deviance: 44037  on 28679  degrees of freedom
## AIC: 53493
## 
## Number of Fisher Scoring iterations: 6
## 
## Next step: Use 'setx' method

Illicit Drug Users vs. Non-Drug Users

  • The mean of the simulated first difference values for # of Days of Selective Leave between Illicit Drug Users & Non-Drug Users is .47.
#Illicit Drug Users vs. Non-Drug Users
  z5.illicit_drugabsence <- zpoisson$new()
  z5.illicit_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.illicit_drugabsence$setx(drugtype = "none") 
  z5.illicit_drugabsence$setx1(drugtype = "illicit")
  z5.illicit_drugabsence$sim()
  kable(summary(z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")))
V1
Min. :0.2908
1st Qu.:0.4207
Median :0.4673
Mean :0.4732
3rd Qu.:0.5186
Max. :0.7651
  plot(z5.illicit_drugabsence)



Marijuana Users vs. Non-Drug Users

  • The mean of the simulated first difference values for # of Days of Selective Leave between Marijuana Users & Non-Drug Users is .16.
#Marijuana Users vs. Non-Drug Users  
  z5.marij_drugabsence <- zpoisson$new()
  z5.marij_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.marij_drugabsence$setx(drugtype = "none") 
  z5.marij_drugabsence$setx1(drugtype = "marijuana")
  z5.marij_drugabsence$sim()
  kable(summary(z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")))
V1
Min. :0.1275
1st Qu.:0.1509
Median :0.1585
Mean :0.1589
3rd Qu.:0.1669
Max. :0.2048
  plot(z5.marij_drugabsence)



Pharmaceutical Drug Users vs. Non-Drug Users

  • The mean of the simulated first difference values for # of Days of Selective Leave between Pharmaceutical Drug Users & Non-Drug Users is .28.
#Pharmaceutical Drug Users vs. Non-Drug Users
  z5.pharma_drugabsence <- zpoisson$new()
  z5.pharma_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.pharma_drugabsence$setx(drugtype = "none") 
  z5.pharma_drugabsence$setx1(drugtype = "pharmaceutical")
  z5.pharma_drugabsence$sim()
  kable(summary(z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")))
V1
Min. :0.1583
1st Qu.:0.2580
Median :0.2806
Mean :0.2811
3rd Qu.:0.3029
Max. :0.4036
  plot(z5.pharma_drugabsence)



Mixed Drug Users vs. Non-Drug Users

  • The mean of the simulated first difference values for # of Days of Selective Leave between Mixed Drug Users & Non-Drug Users is .49.
#Mixed Drug Users vs. Non-Drug Users
  z5.mixed_drugabsence <- zpoisson$new()
  z5.mixed_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.mixed_drugabsence$setx(drugtype = "none") 
  z5.mixed_drugabsence$setx1(drugtype = "mixed")
  z5.mixed_drugabsence$sim()
  kable(summary(z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")))
V1
Min. :0.3984
1st Qu.:0.4647
Median :0.4844
Mean :0.4847
3rd Qu.:0.5035
Max. :0.6004
  plot(z5.mixed_drugabsence)  



Simulated First Differences in a Single Table

#Put FD in one place for all 3 models
marij.fd <- z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")
pharma.fd <- z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")
illicit.fd <- z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")
mixed.fd <- z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")

fd.table <- as.data.frame(cbind(marij.fd, pharma.fd, illicit.fd,mixed.fd))
kable(head(fd.table))
V1 V2 V3 V4
0.1743799 0.2419429 0.5183997 0.4787108
0.1352712 0.2464555 0.4153260 0.4875062
0.1654886 0.3006257 0.4967814 0.4659910
0.1366238 0.2736484 0.7207119 0.4772435
0.1395281 0.3486921 0.3927542 0.4506084
0.1460869 0.2506594 0.4878452 0.5218407



Distribution of Simulated First Differences Between Users & Non-Users by Kind of Drug Use

fd.table.long <- fd.table %>% 
  rename("Marijuana"=V1,
         "Pharmaceuticals"=V2,
         "Illicit"=V3,
         "Mixed"=V4)%>%
  gather(drugtype, simv, 1:4)


  ggplot(data=fd.table.long,aes(simv)) + geom_histogram() + facet_grid(~drugtype)

#Groupwise Summary
fd.table.long %>% 
  group_by(drugtype) %>% 
  summarise(mean = mean(simv), sd = sd(simv))%>%
  kable()
drugtype mean sd
Illicit 0.4731628 0.0749738
Marijuana 0.1588840 0.0115742
Mixed 0.4846912 0.0293016
Pharmaceuticals 0.2810678 0.0331629


Illicit & Mixed Drug users are at the top, showing similar effect(~ 47-48) of drug use on selective absence from work, followed by Pharmaceutical drug misusers. Marijuana use was the smallest driver of Selective Absence. Marijuana also showed the smallest standard deviation value for Selective Absence when compared to other kinds of drug users, implying that the absenteesims rates among Marijuana users were more consistent than the absenteesim rates of illicit drug users (who showed the highest sd value). This can be observed in the distributions of the simulated FD values between these groups & non-users. Marijuana users show a high peak and a narrow distribution, compared with illicit drug users who show a wider, more blunted distribution.