Report Description

This analysis will investigate whether users of different types of drugs vary in how frequenlty they decide NOT to go to work, simply because they “don’t feel like it”. Respondents will be categorized as follows, based on drug use within the past 30-days:

Non-Users: Those who report no drug use.
Illicit Drug Users: Respondents who used cocaine, hallucinogens, inhalants, heroin, or methamphetamines.
Pharmaceutical Drug Users:Respondents who used pain relievers, stimulants, sedatives, or tranqualizers other than as advised by a doctor.
Marijuana Users: Respondents who used Marijuana.
Mixed - Respondents who fit into more than 1 of the above groups

National Survey of Drug Use & Health (NSDUH)

In order to conduct this analysis, we will look at respondents to the NSDUH who are employed full or part time (28,692 observations).

Preparing Data

Note calculations of Serious Mental Illness are present, but unused in this analysis so far. Will be introduced later as a control measure to evaluate differences between classes of drug users while controlling for psychological distress levels.

library(dplyr)
library(Zelig)
library(tidyr)
library(ggplot2)
library(knitr)

load("/Users/Brett/Library/Mobile Documents/com~apple~CloudDocs/All Files/Education/Data Analysis Masters/Sociology of Addiction/NSDUH Data/NSDUH-2015-DS0001-data-r.rda")


druguse2<- PUF2015_102016%>%
          rename("marij_month" = mrjmon, "cocaine_month" = cocmon, "crack_month" = crkmon,
                 "heroin_month" = hermon,"hallucinogen_month" = hallucmon,"inhalant_month" = inhalmon,
                 "meth_month" = methammon,"painrelieve_month" = pnrnmmon,"tranq_month" = trqnmmon,
                 "stimulant_month" = stmnmmon,"sedative_month" = sednmmon,"sex" = irsex,
                  "education" = eduhighcat,"SelectiveLeave" = wrkskipmo,"SkipSick" =wrksickmo,
                 "EmploymentStatus" = IRWRKSTAT18, "PersonalIncome" = IRPINC3)%>%
           mutate(pharmamonth = ifelse(painrelieve_month+stimulant_month+sedative_month+tranq_month >=1,1,0),
                  illicitmonth = ifelse(cocaine_month+hallucinogen_month+inhalant_month+heroin_month+meth_month >=1,1,0),
                  race2 = factor(NEWRACE2), 
                  sex = factor(sex),
                  drugtype = factor(ifelse(pharmamonth+illicitmonth+marij_month>1,"mixed",
                                    ifelse(illicitmonth ==1, "illicit",
                                    ifelse(pharmamonth==1,"pharmaceutical",
                                    ifelse(marij_month==1,"marijuana","none")))),
                             levels=c("none","pharmaceutical","illicit","marijuana","mixed")),
                  SelectiveLeave=as.numeric(SelectiveLeave),
                    K6Nervous30 = ifelse(DSTNRV30>5,NA,DSTNRV30),
                    K6Hopeless30 = ifelse(DSTHOP30>5,NA,DSTHOP30),
                    K6Restless30 = ifelse(DSTRST30>5,NA,DSTRST30),
                    K6Sad30 = ifelse(DSTCHR30>5,NA,DSTCHR30),
                    K6Effort30 = ifelse(DSTEFF30>5,NA,DSTEFF30),
                    K6Worthless30 = ifelse(DSTNGD30>5,NA,DSTNGD30),
                  K6Score30 = K6Nervous30+K6Hopeless30+K6Restless30+K6Sad30+K6Effort30+K6Worthless30,
                    K6Nervous12 = ifelse(DSTNRV12>5,NA,
                                  ifelse(DSTNRV12==1,4,
                                  ifelse(DSTNRV12==2,3,
                                  ifelse(DSTNRV12==3,2,
                                  ifelse(DSTNRV12==4,1,
                                  ifelse(DSTNRV12==5,0,NA)))))),
                    K6Hopeless12 = ifelse(DSTHOP12>5,NA,
                                   ifelse(DSTHOP12==1,4,
                                   ifelse(DSTHOP12==2,3,
                                   ifelse(DSTHOP12==3,2,
                                   ifelse(DSTHOP12==4,1,
                                   ifelse(DSTHOP12==5,0,NA)))))),
                    K6Restless12 = ifelse(DSTRST12>5,NA,
                                   ifelse(DSTRST12==1,4,
                                   ifelse(DSTRST12==2,3,
                                   ifelse(DSTRST12==3,2,
                                   ifelse(DSTRST12==4,1,
                                   ifelse(DSTRST12==5,0,NA)))))),
                    K6Sad12 = ifelse(DSTCHR12>5,NA,
                                   ifelse(DSTCHR12==1,4,
                                   ifelse(DSTCHR12==2,3,
                                   ifelse(DSTCHR12==3,2,
                                   ifelse(DSTCHR12==4,1,
                                   ifelse(DSTCHR12==5,0,NA)))))),
                    K6Effort12 = ifelse(DSTEFF12>5,NA,
                                   ifelse(DSTEFF12==1,4,
                                   ifelse(DSTEFF12==2,3,
                                   ifelse(DSTEFF12==3,2,
                                   ifelse(DSTEFF12==4,1,
                                   ifelse(DSTEFF12==5,0,NA)))))),
                    K6Worthless12 = ifelse(DSTNGD12>5,NA,
                                    ifelse(DSTNGD12==1,4,
                                    ifelse(DSTNGD12==2,3,
                                    ifelse(DSTNGD12==3,2,
                                    ifelse(DSTNGD12==4,1,
                                    ifelse(DSTNGD12==5,0,NA)))))),
                  K6Score12 = K6Nervous12+K6Hopeless12+K6Restless12+K6Sad12+K6Effort12+K6Worthless12,
                  K6ScoreGroup12 = factor(ifelse(K6Score12<5,"Low Risk",#Classify K6
                       ifelse(K6Score12>=5 & K6Score12<13,"MMD",
                       ifelse(K6Score12>=13,"SMI",NA))),
                       levels=c("Low Risk","MMD","SMI")),
                  K6SMI_12 = ifelse(K6ScoreGroup12=="SMI",1,0),
                  K6MMD_12 = ifelse(K6ScoreGroup12=="MMD",1,0))%>%
            filter(EmploymentStatus < 3,
                 SelectiveLeave <85,
                 SelectiveLeave >=0)%>%
            select(drugtype,
                    PersonalIncome,
                    race2,
                    education,
                    sex,
                    SelectiveLeave,
                    K6Score30,
                    K6Score12,
                   K6ScoreGroup12,
                   K6SCYR,
                   K6SMI_12,
                   K6MMD_12,
                   QUESTID2,
                   ANALWT_C,
                   vestr)

#K6 Variables
#DSTNRV30 During the past 30 days, how often did you feel nervous?
#DSTHOP30 During the past 30 days, how often did you feel hopeless?
#DSTRST30 During the past 30 days, how often did you feel restless or fidgety?
#DSTCHR30 During the past 30 days, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the past 30 days, how often did you feel that everything was an effort?
#DSTNGD30 During the past 30 days, how often did you feel down on yourself, no good, or worthless?

#DSTWORST - Was there a worse Month?
#DSTNRV12 During the worst month, how often did you feel nervous?
#DSTHOP12 During the worst month, how often did you feel hopeless?
#DSTRST30 During the worst month, how often did you feel restless or fidgety?
#DSTCHR30 During the worst month, how often did you feel so sad or depressed that nothing could cheer you up?
#DSTEFF30 During the worst month, how often did you feel that everything was an effort?
#DSTNGD30 During the worst month, how often did you feel down on yourself, no good, or worthless?

#Responses for all variables are:
  #1 All of the time
  #2 Most of the time
  #3 Some of the time
  #4 A little of the time
  #5 None of the time'

#Check on accuracy of Recode by crosstab of new vs. original variables.

  #druguse2%>%
  #  group_by(K6SCYR,K6SMI_12)%>%
  #  summarize(n=n())%>%
  #  spread(K6SMI_12,n)

  #druguse2%>%
  #  group_by(K6SCYR,k6ScoreGroup12)%>%
  #  summarize(n=n())%>%
  #  spread(k6ScoreGroup12,n)

  #druguse2%>%
  #  group_by(drugtype)%>%
  #  summarize(K6Score30=mean(K6Score30), K6Score12=mean(K6Score12))

#-----------------------#
#Survey Design Object---#
#-----------------------#
#To account for complex sample design.
library(survey)
NSDUHDesign <-
  svydesign(
    ids = ~QUESTID2,
    strata = ~vestr ,
        data = druguse2,
        weights = ~ANALWT_C,
        nest=TRUE)

Running a quick logistic regression evaluating odds of serious mental illness by type of drug use. This is really just a test of the survey design object to ensure that everything runs smoothly.

#------------------------------------------#
#Logistic Regression-----------------------#
#Serious Mental Illness by Drug Type-------#
#------------------------------------------#

SMILogit <- svyglm(K6SMI_12~drugtype, 
                   family=binomial, 
                   data=druguse2, 
                   NSDUHDesign)

texreg::htmlreg(SMILogit)

Statistical models
	Model 1
(Intercept)	-1.06^***
	(0.04)
drugtypepharmaceutical	1.01^***
	(0.19)
drugtypeillicit	1.18^**
	(0.41)
drugtypemarijuana	0.56^***
	(0.09)
drugtypemixed	1.48^***
	(0.15)
Deviance	9617.29
Dispersion	0.90
Num. obs.	9139
p < 0.001, p < 0.01, p < 0.05

Distribution of Dependent Variable (Selective Leave)

The distribution of Selective Leave shows most respondents (approx 25K of 28K) reporting 0 days of selective leave. As the number of days of selective leave increases, the number of observations decreases. The dependent variables (Selective Leave) is a count variable, indicating number of days a respondent was absent from work.

A Poisson regression is determined as the appropriate model for estimation, given the type of value & distribution of the DV.

ggplot(data=druguse2)+
  geom_histogram(aes(x=SelectiveLeave))

Poisson Regression

Using Poisson Regression Model to estimate Number of Days of Selective Leave from work for different kinds of drug users & non-users.

z5<- zpoisson$new()
z5$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
summary(z5)

## Model: 
## 
## Call:
## z5$zelig(formula = SelectiveLeave ~ drugtype + race2 + education + 
##     PersonalIncome, data = druguse2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0180  -0.8270  -0.7331  -0.5978  15.9499  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -0.683585   0.036728 -18.612  < 2e-16
## drugtypepharmaceutical  0.734704   0.063978  11.484  < 2e-16
## drugtypeillicit         1.033572   0.098965  10.444  < 2e-16
## drugtypemarijuana       0.477735   0.029478  16.207  < 2e-16
## drugtypemixed           1.055409   0.040184  26.264  < 2e-16
## race22                  0.390200   0.028950  13.478  < 2e-16
## race23                  0.565068   0.070399   8.027 1.00e-15
## race24                 -0.004146   0.153232  -0.027   0.9784
## race25                 -0.098174   0.058526  -1.677   0.0935
## race26                 -0.074872   0.057665  -1.298   0.1942
## race27                 -0.017688   0.029590  -0.598   0.5500
## education              -0.092742   0.011870  -7.813 5.58e-15
## PersonalIncome         -0.111275   0.005965 -18.655  < 2e-16
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 46060  on 28691  degrees of freedom
## Residual deviance: 44037  on 28679  degrees of freedom
## AIC: 53493
## 
## Number of Fisher Scoring iterations: 6
## 
## Next step: Use 'setx' method

Illicit Drug Users vs. Non-Drug Users

The mean of the simulated first difference values for # of Days of Selective Leave between Illicit Drug Users & Non-Drug Users is .47.

#Illicit Drug Users vs. Non-Drug Users
  z5.illicit_drugabsence <- zpoisson$new()
  z5.illicit_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.illicit_drugabsence$setx(drugtype = "none") 
  z5.illicit_drugabsence$setx1(drugtype = "illicit")
  z5.illicit_drugabsence$sim()
  kable(summary(z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")))

	V1
	Min. :0.2908
	1st Qu.:0.4207
	Median :0.4673
	Mean :0.4732
	3rd Qu.:0.5186
	Max. :0.7651

  plot(z5.illicit_drugabsence)

Marijuana Users vs. Non-Drug Users

The mean of the simulated first difference values for # of Days of Selective Leave between Marijuana Users & Non-Drug Users is .16.

#Marijuana Users vs. Non-Drug Users  
  z5.marij_drugabsence <- zpoisson$new()
  z5.marij_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.marij_drugabsence$setx(drugtype = "none") 
  z5.marij_drugabsence$setx1(drugtype = "marijuana")
  z5.marij_drugabsence$sim()
  kable(summary(z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")))

	V1
	Min. :0.1275
	1st Qu.:0.1509
	Median :0.1585
	Mean :0.1589
	3rd Qu.:0.1669
	Max. :0.2048

  plot(z5.marij_drugabsence)

Pharmaceutical Drug Users vs. Non-Drug Users

The mean of the simulated first difference values for # of Days of Selective Leave between Pharmaceutical Drug Users & Non-Drug Users is .28.

#Pharmaceutical Drug Users vs. Non-Drug Users
  z5.pharma_drugabsence <- zpoisson$new()
  z5.pharma_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.pharma_drugabsence$setx(drugtype = "none") 
  z5.pharma_drugabsence$setx1(drugtype = "pharmaceutical")
  z5.pharma_drugabsence$sim()
  kable(summary(z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")))

	V1
	Min. :0.1583
	1st Qu.:0.2580
	Median :0.2806
	Mean :0.2811
	3rd Qu.:0.3029
	Max. :0.4036

  plot(z5.pharma_drugabsence)

Mixed Drug Users vs. Non-Drug Users

The mean of the simulated first difference values for # of Days of Selective Leave between Mixed Drug Users & Non-Drug Users is .49.

#Mixed Drug Users vs. Non-Drug Users
  z5.mixed_drugabsence <- zpoisson$new()
  z5.mixed_drugabsence$zelig(SelectiveLeave ~ drugtype+race2+education+PersonalIncome,data=druguse2)
  z5.mixed_drugabsence$setx(drugtype = "none") 
  z5.mixed_drugabsence$setx1(drugtype = "mixed")
  z5.mixed_drugabsence$sim()
  kable(summary(z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")))

	V1
	Min. :0.3984
	1st Qu.:0.4647
	Median :0.4844
	Mean :0.4847
	3rd Qu.:0.5035
	Max. :0.6004

  plot(z5.mixed_drugabsence)

Simulated First Differences in a Single Table

V1: First Differences across 1,000 simulations evaluating selective leave between Marijuana Users & Non-Users
V2: First Differences across 1,000 simulations evaluating selective leave between Pharmaceutical Drug Users & Non-Users
V3: First Differences across 1,000 simulations evaluating selective leave between Illicit Drug Users & Non-Users
V4: First Differences across 1,000 simulations evaluating selective leave between Mixed Drug Users & Non-Users

#Put FD in one place for all 3 models
marij.fd <- z5.marij_drugabsence$get_qi(xvalue="x1", qi="fd")
pharma.fd <- z5.pharma_drugabsence$get_qi(xvalue="x1", qi="fd")
illicit.fd <- z5.illicit_drugabsence$get_qi(xvalue="x1", qi="fd")
mixed.fd <- z5.mixed_drugabsence$get_qi(xvalue="x1", qi="fd")

fd.table <- as.data.frame(cbind(marij.fd, pharma.fd, illicit.fd,mixed.fd))
kable(head(fd.table))

V1	V2	V3	V4
0.1743799	0.2419429	0.5183997	0.4787108
0.1352712	0.2464555	0.4153260	0.4875062
0.1654886	0.3006257	0.4967814	0.4659910
0.1366238	0.2736484	0.7207119	0.4772435
0.1395281	0.3486921	0.3927542	0.4506084
0.1460869	0.2506594	0.4878452	0.5218407

Distribution of Simulated First Differences Between Users & Non-Users by Kind of Drug Use

fd.table.long <- fd.table %>% 
  rename("Marijuana"=V1,
         "Pharmaceuticals"=V2,
         "Illicit"=V3,
         "Mixed"=V4)%>%
  gather(drugtype, simv, 1:4)


  ggplot(data=fd.table.long,aes(simv)) + geom_histogram() + facet_grid(~drugtype)

#Groupwise Summary
fd.table.long %>% 
  group_by(drugtype) %>% 
  summarise(mean = mean(simv), sd = sd(simv))%>%
  kable()

drugtype	mean	sd
Illicit	0.4731628	0.0749738
Marijuana	0.1588840	0.0115742
Mixed	0.4846912	0.0293016
Pharmaceuticals	0.2810678	0.0331629

Illicit & Mixed Drug users are at the top, showing similar effect(~ 47-48) of drug use on selective absence from work, followed by Pharmaceutical drug misusers. Marijuana use was the smallest driver of Selective Absence. Marijuana also showed the smallest standard deviation value for Selective Absence when compared to other kinds of drug users, implying that the absenteesims rates among Marijuana users were more consistent than the absenteesim rates of illicit drug users (who showed the highest sd value). This can be observed in the distributions of the simulated FD values between these groups & non-users. Marijuana users show a high peak and a narrow distribution, compared with illicit drug users who show a wider, more blunted distribution.