Opioid Crisis in the US: A State-Level Analysis Using Panel df

Aayush

2021-09-12

1. df Overview

1.1 Missing values

The dataset has no missing values

which(is.na(df))

## integer(0)

1.2 Summary Statistics

summary(df)

##     state              stateid        year            t           cpi       
##  Length:663         Min.   : 1   Min.   :2006   Min.   : 6   Min.   :198.3  
##  Class :character   1st Qu.:13   1st Qu.:2009   1st Qu.: 9   1st Qu.:211.1  
##  Mode  :character   Median :26   Median :2012   Median :12   Median :226.7  
##                     Mean   :26   Mean   :2012   Mean   :12   Mean   :224.0  
##                     3rd Qu.:39   3rd Qu.:2015   3rd Qu.:15   3rd Qu.:233.9  
##                     Max.   :51   Max.   :2018   Max.   :18   Max.   :247.9  
##  mcare_millions  medicaid_spend_actual   overdoses        population      
##  Min.   :  418   Min.   :4.090e+08     Min.   :  10.0   Min.   :  522667  
##  1st Qu.: 2490   1st Qu.:2.075e+09     1st Qu.: 122.5   1st Qu.: 1666720  
##  Median : 6808   Median :5.120e+09     Median : 370.0   Median : 4357847  
##  Mean   :10587   Mean   :8.584e+09     Mean   : 552.9   Mean   : 6145898  
##  3rd Qu.:12426   3rd Qu.:9.636e+09     3rd Qu.: 699.5   3rd Qu.: 6889846  
##  Max.   :77603   Max.   :8.553e+10     Max.   :4293.0   Max.   :39461588  
##     hhincome       state_gdp       unemployment_pct labor_participation_pct
##  Min.   :37714   Min.   :  26802   Min.   : 2.400   Min.   :49.30          
##  1st Qu.:53071   1st Qu.:  78045   1st Qu.: 4.300   1st Qu.:59.10          
##  Median :58793   Median : 186849   Median : 5.400   Median :62.40          
##  Mean   :59593   Mean   : 321957   Mean   : 5.939   Mean   :62.35          
##  3rd Qu.:65624   3rd Qu.: 417780   3rd Qu.: 7.300   3rd Qu.:65.70          
##  Max.   :86345   Max.   :2721651   Max.   :13.700   Max.   :73.20          
##   insured_pct     grad_hs_pct    is_manufacturing_state post_recession  
##  Min.   :75.26   Min.   :78.70   Min.   :0.0000         Min.   :0.0000  
##  1st Qu.:84.75   1st Qu.:84.90   1st Qu.:0.0000         1st Qu.:1.0000  
##  Median :88.10   Median :88.10   Median :0.0000         Median :1.0000  
##  Mean   :87.91   Mean   :87.42   Mean   :0.2157         Mean   :0.8462  
##  3rd Qu.:91.20   3rd Qu.:90.20   3rd Qu.:0.0000         3rd Qu.:1.0000  
##  Max.   :96.80   Max.   :93.00   Max.   :1.0000         Max.   :1.0000  
##  per_capita_state_tax_revenue pct_change_str       medicaidml     
##  Min.   :  905.9              Min.   :-0.77900   Min.   :  408.9  
##  1st Qu.: 3546.5              1st Qu.: 0.00750   1st Qu.: 2075.2  
##  Median : 4067.2              Median : 0.02700   Median : 5120.2  
##  Mean   : 4531.7              Mean   : 0.02559   Mean   : 8584.1  
##  3rd Qu.: 4976.3              3rd Qu.: 0.04700   3rd Qu.: 9636.1  
##  Max.   :14609.0              Max.   : 1.01000   Max.   :85531.0  
##  tmcaremcaidml      tmcaremcaidmlreal Prescription.Rate    ovd_cost      
##  Min.   :   941.4   Min.   :  1177    Min.   : 25.00    Min.   :   6353  
##  1st Qu.:  4743.9   1st Qu.:  5112    1st Qu.: 60.15    1st Qu.:  89125  
##  Median : 12436.2   Median : 14190    Median : 75.20    Median : 324819  
##  Mean   : 19375.8   Mean   : 21247    Mean   : 77.48    Mean   : 421166  
##  3rd Qu.: 22596.7   3rd Qu.: 24453    3rd Qu.: 90.25    3rd Qu.: 596855  
##  Max.   :167578.6   Max.   :167579    Max.   :146.90    Max.   :2921932

1.3 Normalizing overdose deaths

The prescription rates data is dispensing rate per 100 people

# Normalizing opioid deaths per 100,000 people in the U.S
df$ovd_rate = df$overdoses / df$population * 100

2 EDA

2.1 overdoses by year

# Calculating sum of overdoses by state 
ovd_sum = tapply(df$overdoses, df$year, FUN=sum)

fig <-  plot_ly(x = year, y = ovd_sum, type = 'bar') %>%
  layout(title = 'Sum of Opioid Overdose Deaths, 2006-2018',
         plot_bgcolor='#e5ecf6')
fig

2.2 prescription rates by year

PR_sum = tapply(df$Prescription.Rate, df$year, FUN=sum)
fig2 <-  plot_ly(x = year, y = PR_sum, type = 'bar') %>%
  layout(title = 'Sum of Prescription Rates, 2006-2018')
fig2

2.3 Overdoses by State

Ovd_by_state = tapply(df$overdoses, df$state, FUN=sum)
Ovd_by_state = data.frame(Ovd_by_state)

2.4:Boxplots to find df outliers – What do do with this information?

library(ggplot2)
ggplot(data = df,
       mapping = aes(x = reorder(state, overdoses), y = overdoses, fill = state)) +
  geom_boxplot() +
  labs(x=NULL, y="overdoses") +
  coord_flip() +
  geom_hline(yintercept = 493, linetype="dashed", color = "red", size = 1) +
  theme_bw()

2.5: Scatter plots to analyze predictors.

library(gridExtra)
library(cowplot)
sc1 = ggplot(data = df, aes(x =ovd_rate, y = medicaidml)) + geom_point() + geom_smooth(method = lm, fill="blue", color="blue", se = FALSE) + xlab("overdose") + ylab("medicade in millions") + ggtitle("overdoses ~ medicade($ million)")
sc2 = ggplot(data = df, aes(x = overdoses, y = unemployment_pct)) + geom_point() + geom_smooth(method = lm, fill="blue", color="blue", se = FALSE) + xlab("overdose") + ylab("unemployed %") + ggtitle("overdoses ~ unemployment %")
sc3 = ggplot(data = df, aes(x =overdoses, y = Prescription.Rate)) + geom_point() + geom_smooth(method = lm, fill="blue", color="red", se = FALSE) + xlab("overdose") + ylab("Prescription Rate") + ggtitle("overdoses ~ Prescription rate")
sc4 = ggplot(data = df, aes(x =overdoses, y = labor_participation_pct)) + geom_point() + geom_smooth(method = lm, fill="blue", color="blue", se = FALSE) + xlab("overdose") + ylab("HH income") + ggtitle(" overdoses ~ LFPR")

plot_grid(sc1, sc2,sc3,sc4)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

3. Modeling

We will work with a p df frame – meaning panel df. With the indivisual states and time periods

We compare a single state by itself and are not concerned with variation between states.

# We will convert the df frame to a panel df frame. 
library(plm)
df.p = pdata.frame(df, index = c("state", "year"))

3.1 Fixed effects model | Within model as it only focuses on the variation within the indivisual observations.

library(stargazer)
fixeff = plm(ovd_rate~Prescription.Rate + hhincome + unemployment_pct + 
               labor_participation_pct + grad_hs_pct, data = df.p, model = "within")
stargazer(fixeff, type='text')

## 
## ===================================================
##                             Dependent variable:    
##                         ---------------------------
##                                  ovd_rate          
## ---------------------------------------------------
## Prescription.Rate               -0.0001***         
##                                  (0.00002)         
##                                                    
## hhincome                          0.00000          
##                                  (0.00000)         
##                                                    
## unemployment_pct                 -0.00000          
##                                  (0.0001)          
##                                                    
## labor_participation_pct          -0.0002*          
##                                  (0.0001)          
##                                                    
## grad_hs_pct                      0.002***          
##                                  (0.0002)          
##                                                    
## ---------------------------------------------------
## Observations                        663            
## R2                                 0.435           
## Adjusted R2                        0.384           
## F Statistic               93.525*** (df = 5; 607)  
## ===================================================
## Note:                   *p<0.1; **p<0.05; ***p<0.01

3.2 Random effects Model

raneff = plm(ovd_rate~Prescription.Rate + hhincome + unemployment_pct + labor_participation_pct + grad_hs_pct , data = df.p, model = "random")
stargazer(raneff, type='text')

## 
## ===================================================
##                             Dependent variable:    
##                         ---------------------------
##                                  ovd_rate          
## ---------------------------------------------------
## Prescription.Rate               -0.0001***         
##                                  (0.00002)         
##                                                    
## hhincome                         -0.00000          
##                                  (0.00000)         
##                                                    
## unemployment_pct                -0.0005***         
##                                  (0.0001)          
##                                                    
## labor_participation_pct          -0.001***         
##                                  (0.0001)          
##                                                    
## grad_hs_pct                      0.001***          
##                                  (0.0001)          
##                                                    
## Constant                          -0.017           
##                                   (0.015)          
##                                                    
## ---------------------------------------------------
## Observations                        663            
## R2                                 0.361           
## Adjusted R2                        0.356           
## F Statistic                     371.444***         
## ===================================================
## Note:                   *p<0.1; **p<0.05; ***p<0.01

3.3 First diffrence model

firstdiff = plm(ovd_rate~Prescription.Rate + hhincome + unemployment_pct + 
               labor_participation_pct + grad_hs_pct, data = df.p, model = "fd")
stargazer(firstdiff, type='text')

## 
## ===================================================
##                             Dependent variable:    
##                         ---------------------------
##                                  ovd_rate          
## ---------------------------------------------------
## Prescription.Rate               -0.00005***        
##                                  (0.00002)         
##                                                    
## hhincome                         -0.00000          
##                                  (0.00000)         
##                                                    
## unemployment_pct                  -0.0001          
##                                  (0.0001)          
##                                                    
## labor_participation_pct           0.00003          
##                                  (0.0001)          
##                                                    
## grad_hs_pct                       0.0003           
##                                  (0.0002)          
##                                                    
## Constant                         0.001***          
##                                  (0.0001)          
##                                                    
## ---------------------------------------------------
## Observations                        612            
## R2                                 0.034           
## Adjusted R2                        0.026           
## F Statistic               4.224*** (df = 5; 606)   
## ===================================================
## Note:                   *p<0.1; **p<0.05; ***p<0.01

stargazer(firstdiff, raneff, type='text', title="Results", align=TRUE)

## 
## Results
## =========================================================
##                                Dependent variable:       
##                         ---------------------------------
##                                     ovd_rate             
##                                  (1)              (2)    
## ---------------------------------------------------------
## Prescription.Rate            -0.00005***       -0.0001***
##                               (0.00002)        (0.00002) 
##                                                          
## hhincome                       -0.00000         -0.00000 
##                               (0.00000)        (0.00000) 
##                                                          
## unemployment_pct               -0.0001         -0.0005***
##                                (0.0001)         (0.0001) 
##                                                          
## labor_participation_pct        0.00003         -0.001*** 
##                                (0.0001)         (0.0001) 
##                                                          
## grad_hs_pct                     0.0003          0.001*** 
##                                (0.0002)         (0.0001) 
##                                                          
## Constant                       0.001***          -0.017  
##                                (0.0001)         (0.015)  
##                                                          
## ---------------------------------------------------------
## Observations                     612              663    
## R2                              0.034            0.361   
## Adjusted R2                     0.026            0.356   
## F Statistic             4.224*** (df = 5; 606) 371.444***
## =========================================================
## Note:                         *p<0.1; **p<0.05; ***p<0.01

3.4 Hausman test comparing random and fixed effects

phtest(fixeff, raneff)

## 
##  Hausman Test
## 
## data:  ovd_rate ~ Prescription.Rate + hhincome + unemployment_pct +  ...
## chisq = 47.672, df = 5, p-value = 4.145e-09
## alternative hypothesis: one model is inconsistent

3.5 fixed effects with a lag

fixeff.lag = plm(overdoses~lag(overdoses)+Prescription.Rate, data = df.p, model = "within")
stargazer(fixeff.lag, type='text')

## 
## =============================================
##                       Dependent variable:    
##                   ---------------------------
##                            overdoses         
## ---------------------------------------------
## lag(overdoses)             1.035***          
##                             (0.020)          
##                                              
## Prescription.Rate           -0.114           
##                             (0.526)          
##                                              
## ---------------------------------------------
## Observations                  612            
## R2                           0.861           
## Adjusted R2                  0.848           
## F Statistic       1,728.083*** (df = 2; 559) 
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01

3.6 Cluster standard errors

library(lmtest)
coeftest(fixeff, vcovHC(fixeff, type="HC0", cluster ="group"))

## 
## t test of coefficients:
## 
##                            Estimate  Std. Error t value  Pr(>|t|)    
## Prescription.Rate       -1.2770e-04  3.3429e-05 -3.8202 0.0001471 ***
## hhincome                 2.0629e-08  7.5518e-08  0.2732 0.7848211    
## unemployment_pct        -2.0906e-06  2.3433e-04 -0.0089 0.9928847    
## labor_participation_pct -1.9475e-04  2.5440e-04 -0.7655 0.4442470    
## grad_hs_pct              1.8095e-03  4.9099e-04  3.6854 0.0002488 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1