library(readr)
library(ggplot2)
library(MASS)
library(sandwich)
library(msm)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: survival
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CASE = col_character(),
##   SPEAKER_OR = col_character(),
##   SENATE_COM = col_character(),
##   HOUSE_COM = col_character(),
##   GROUP_TYPE = col_character(),
##   HEALTH_CJ = col_character(),
##   FED_STATE = col_character()
## )
## See spec(...) for full column specifications.

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

First must check to see if the model is 0-inflated There are a lot of zeros–will likely need a zero inflated model.

Check for overdispersion

mpd <- glm(Dissertation_Dataset$Problem_Demand~Dissertation_Dataset$HEALTH_CJ+offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family=poisson)
summary(mpd)
## 
## Call:
## glm(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.551988   0.007847 -325.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03
## Dissertation_Dataset$HEALTH_CJOther            0.537455   0.015500   34.68
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7
dispersiontest(mpd)
## 
##  Overdispersion test
## 
## data:  mpd
## z = 3.0807, p-value = 0.001033
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   678.9701

OVERDISPERSION IS AN ISSUE. We can run an NB and zero-inflated NB next to see if we need a zero-inflated model.

NB

require(MASS)
require(pscl)
## Loading required package: pscl
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
require(boot)
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
## 
##     aml
## The following object is masked from 'package:car':
## 
##     logit
## The following object is masked from 'package:msm':
## 
##     cav
m1 <- glm.nb(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(m1)
## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.1482280511, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.55893  -1.41633  -0.45537  -0.00702   1.84518  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -2.4600     0.2905  -8.468
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.7417     0.4810  -1.542
## Dissertation_Dataset$HEALTH_CJOther             0.2567     0.6048   0.424
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.123    
## Dissertation_Dataset$HEALTH_CJOther              0.671    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1482) family taken to be 1)
## 
##     Null deviance: 154.15  on 149  degrees of freedom
## Residual deviance: 151.24  on 147  degrees of freedom
## AIC: 1411.2
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.1482 
##           Std. Err.:  0.0182 
## 
##  2 x log-likelihood:  -1403.1660

ZINB with REGRESSORS (HEALTH_CJ) in Zero Inflation Model

m1zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m1zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

ZINB with REGRESSORS (TOTAL_WORDS) in Zero Inflation Model

I thought perhaps changing the regressors would help, but TOTAL_WORDS doesn’t seem to perform any better.

m1zi3 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`), dist="negbin", EM=TRUE)
summary(m1zi3)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`), 
##     dist = "negbin", EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66209 -0.60863 -0.45309  0.09482  5.29049 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07200    0.14252 -14.538
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02164    0.30559  -0.071
## Dissertation_Dataset$HEALTH_CJOther            0.21014    0.29118   0.722
## Log(theta)                                    -0.08867    0.14700  -0.603
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.944    
## Dissertation_Dataset$HEALTH_CJOther              0.470    
## Log(theta)                                       0.546    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                         Estimate Std. Error z value
## (Intercept)                              -2.1457     2.2950  -0.935
## log(Dissertation_Dataset$`TOTAL WORDS`)   0.2390     0.2975   0.803
##                                         Pr(>|z|)
## (Intercept)                                0.350
## log(Dissertation_Dataset$`TOTAL WORDS`)    0.422
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9151 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -670.8 on 6 Df

ZINB SIMPLE INFLATION (NO REGRESSORS FOR ZERO COMPONENT)

m1zi2 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m1zi2)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.61694 -0.61630 -0.46552  0.05874  5.28355 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07161    0.14239 -14.549
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02191    0.30532  -0.072
## Dissertation_Dataset$HEALTH_CJOther            0.21031    0.29091   0.723
## Log(theta)                                    -0.08677    0.14649  -0.592
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.943    
## Dissertation_Dataset$HEALTH_CJOther              0.470    
## Log(theta)                                       0.554    
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.3089     0.1666  -1.855   0.0637 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9169 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -671.1 on 5 Df

COMPARING THE SIMPLE ZERO-INFLATION MODEL with the ZINB with Health_CJ as Regressor

vuong(m1zi, m1zi2)
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A p-value
## Raw                  0.97615785 model1 > model2 0.16449
## AIC-corrected        0.57886892 model1 > model2 0.28134
## BIC-corrected       -0.01917711 model2 > model1 0.49235

(Vuong, Q.H. 1989. Likelihood ratio tests for model selection and non-nested hypotheses. Econometrica. 57:307-333.)

It appears that despite the negative intercept, the Vuong statistic thinks that the ZINB with the regressor Health_CJ performs better…

summary(m1zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

INTERPRETATION: The theta estimate is not significant suggesting that the ZIPoisson model may be more appropriate.

ZIP

mps2zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mps2zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5351 -1.1243 -0.5930  0.1125 12.5517 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.241700   0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.208383   0.018092   11.52
## Dissertation_Dataset$HEALTH_CJOther            0.578490   0.015500   37.32
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5127     0.2455 -34.678
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5142     0.4066   3.724
## Dissertation_Dataset$HEALTH_CJOther             0.2561     0.5242   0.489
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000196 ***
## Dissertation_Dataset$HEALTH_CJOther           0.625108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 9 
## Log-likelihood: -1.552e+04 on 6 Df

The ZIP still gives us large negative intercepts …However, the coefficients are significant which would be great!

Let’s run the vuong statistic comparing the ZIP with the ZINB.

vuong(mps2zi, m1zi)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                    -6.97482 model2 > model1 1.5313e-12
## AIC-corrected          -6.97482 model2 > model1 1.5313e-12
## BIC-corrected          -6.97482 model2 > model1 1.5313e-12

INTERPRETATION: It still perfers the ZINB with Health_CJ as the regressor

Simple ZIP without regressors

mps3zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mps3zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1580 -1.1535 -0.8531  0.3403 12.0100 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.241700   0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.208383   0.018092   11.52
## Dissertation_Dataset$HEALTH_CJOther            0.578490   0.015500   37.32
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.2955     0.1651   -1.79   0.0735 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 9 
## Log-likelihood: -1.552e+04 on 4 Df

Just as with the ZINB without regressors, the ZIP without regressors also does not have the high negative intercept.

vuong(mps3zi, m1zi)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                   -6.982748 model2 > model1 1.4473e-12
## AIC-corrected         -6.981275 model2 > model1 1.4626e-12
## BIC-corrected         -6.979078 model2 > model1 1.4856e-12

But sadly, the Vuong statistic stil prefers the ZINB with Health_CJ as the regressor.

summary(m1zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

INTERPRETATION: There was no statistically signiicant difference between the rates at which health, cj and others defined the problem as demand.

Problem is Supply

mps <- glm(Problem_Supply~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mps)

Overdispersion. ###NB

m2 <- glm.nb(Dissertation_Dataset\(Problem_Supply ~ Dissertation_Dataset\)HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS)))

The NB model is not converging which could be because we have so many 0s and so few non-zeros. So lets just go to the zeroinflated NB.

ZINB with Regressors

m2zi <- zeroinfl(Dissertation_Dataset$Problem_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m2zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9182 -0.6202 -0.4256  0.4798  3.1584 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.67583    0.15474 -10.830
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.28014    0.22262   1.258
## Dissertation_Dataset$HEALTH_CJOther           -0.79410    0.31007  -2.561
## Log(theta)                                    -0.07233    0.13757  -0.526
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2083    
## Dissertation_Dataset$HEALTH_CJOther             0.0104 *  
## Log(theta)                                      0.5991    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.0200     0.2351 -34.115
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -2.2631     0.5980  -3.785
## Dissertation_Dataset$HEALTH_CJOther             0.1075     0.5097   0.211
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000154 ***
## Dissertation_Dataset$HEALTH_CJOther           0.832940    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9302 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -807.6 on 7 Df
exp(-0.79410)
## [1] 0.4519878

INTERPRETATION: There was no statistically significant difference between the degree to which health and law enforcement actors defined the problem as a supply problem (p=0.2). Other actors defined the problem as a supply problem at rate 0.45 times that of health actors (p=0.01).

ZINB without Regressors

m2zi55 <- zeroinfl(Dissertation_Dataset$Problem_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m2zi55)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7043 -0.7031 -0.4319  0.4587  2.7011 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.67516    0.15373 -10.897
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.28245    0.22119   1.277
## Dissertation_Dataset$HEALTH_CJOther           -0.78978    0.30803  -2.564
## Log(theta)                                    -0.05896    0.13361  -0.441
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2016    
## Dissertation_Dataset$HEALTH_CJOther             0.0103 *  
## Log(theta)                                      0.6590    
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7700     0.1772  -4.345 1.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9427 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -811.4 on 5 Df

Fixes the large negative intercept.

ZIP without regressors

zifpss <-zeroinfl(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(zifpss)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4560 -1.4455 -0.8636  0.8767  5.3759 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -1.641405   0.006612 -248.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.314710   0.008709   36.14
## Dissertation_Dataset$HEALTH_CJOther           -0.939094   0.022872  -41.06
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7538     0.1750  -4.306 1.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 8 
## Log-likelihood: -2.227e+04 on 4 Df

The median residual is close to 0 which suggests we are ok regarding overdispersion.

exp(0.314710)
## [1] 1.369862
exp(-0.939095)
## [1] 0.3909815

INTERPRETATION:Law enforcement actors defined the problem as a supply problem at a rate that was 1.37 times greater than health actors (p<0.001). Other actors defined the problem as a supply problem at rate 0.391 times that of health actors (p<0.001).

vuong(m2zi, zifpss)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 5 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                    7.884363 model1 > model2 1.5543e-15
## AIC-corrected          7.883409 model1 > model2 1.5543e-15
## BIC-corrected          7.881990 model1 > model2 1.6653e-15

However, when comparing the Vuong z-statistic the ZINB model is better than the ZIP

Demand is the Solution

mdd <- glm(Solutions_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mdd)
## 
##  Overdispersion test
## 
## data:  mdd
## z = 7.4918, p-value = 3.396e-14
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   548.6184

Overdispersion is a problem. ###NB

mdd2 <- glm.nb(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mdd2)
## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.542540841, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0014  -0.8093  -0.2305   0.3149   1.5528  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -1.1242     0.1519  -7.403
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.5659     0.2514  -2.251
## Dissertation_Dataset$HEALTH_CJOther             0.0218     0.3162   0.069
##                                               Pr(>|z|)    
## (Intercept)                                   1.33e-13 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0244 *  
## Dissertation_Dataset$HEALTH_CJOther             0.9450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.5425) family taken to be 1)
## 
##     Null deviance: 190.42  on 149  degrees of freedom
## Residual deviance: 185.22  on 147  degrees of freedom
## AIC: 2184.7
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.5425 
##           Std. Err.:  0.0578 
## 
##  2 x log-likelihood:  -2176.6570

ZINB with regressors

mdd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(mdd2zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1046 -0.7501 -0.2926  0.5477  3.6396 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.01947    0.10298  -9.900
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48070    0.17532  -2.742
## Dissertation_Dataset$HEALTH_CJOther           -0.08279    0.20602  -0.402
## Log(theta)                                     0.27171    0.11249   2.415
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.00611 ** 
## Dissertation_Dataset$HEALTH_CJOther            0.68778    
## Log(theta)                                     0.01572 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                                 Estimate Std. Error
## (Intercept)                                   -1.004e+01  3.792e-01
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  6.166e-01  5.499e-01
## Dissertation_Dataset$HEALTH_CJOther           -2.390e+01  1.145e+05
##                                               z value Pr(>|z|)    
## (Intercept)                                   -26.486   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.121    0.262    
## Dissertation_Dataset$HEALTH_CJOther             0.000    1.000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.3122 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -1049 on 7 Df

ZINB without regressors

mdd3zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(mdd3zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0149 -0.7620 -0.2644  0.5248  3.7014 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.01907    0.10289  -9.905
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48118    0.17517  -2.747
## Dissertation_Dataset$HEALTH_CJOther           -0.08319    0.20585  -0.404
## Log(theta)                                     0.27339    0.11198   2.441
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.00602 ** 
## Dissertation_Dataset$HEALTH_CJOther            0.68613    
## Log(theta)                                     0.01463 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1287     0.2653  -8.023 1.03e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.3144 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -1052 on 5 Df

ZIP without regressors

mdd4zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mdd4zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##    Min     1Q Median     3Q    Max 
## -2.889 -2.143 -0.721  1.351 13.241 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error
## (Intercept)                                   -0.943065   0.003743
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.800287   0.008406
## Dissertation_Dataset$HEALTH_CJOther           -0.035526   0.008799
##                                                z value Pr(>|z|)    
## (Intercept)                                   -251.936  < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -95.203  < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             -4.038  5.4e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1252     0.2645  -8.035 9.37e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 7 
## Log-likelihood: -2.988e+04 on 4 Df

vuong(mdd4zi, mdd2zi)

exp(-0.48070)

INTERPRETATION of : There was no significant difference between the rate at which health groups and other groups supported demand solutions (p=0.688). However, law enforcement groups supported demand solutions at a rate 0.618 times that of health groups (p=0.006).
(Theta estimate is significant indicating that ZINB is appropriate.)

Supply is the Solution

## 
##  Overdispersion test
## 
## data:  msd
## z = 8.5203, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   409.8836

Overdispersion is an issue. ###NB

msd2 <- glm.nb(Dissertation_Dataset\(Solutions_Supply ~ Dissertation_Dataset\)HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS)))

The NB model is not converging so off to the zeroinflated NB.

ZINB with Regressors

msd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(msd2zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2698 -0.7914 -0.1692  0.4407  2.8403 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.31314    0.09821 -13.370
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.26094    0.15139   1.724
## Dissertation_Dataset$HEALTH_CJOther           -0.52994    0.21739  -2.438
## Log(theta)                                     0.51736    0.11868   4.359
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0848 .  
## Dissertation_Dataset$HEALTH_CJOther             0.0148 *  
## Log(theta)                                     1.3e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -9.0453     0.2739 -33.030
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -2.6743     1.0542  -2.537
## Dissertation_Dataset$HEALTH_CJOther             0.9944     0.5249   1.895
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0112 *  
## Dissertation_Dataset$HEALTH_CJOther             0.0582 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.6776 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -972.7 on 7 Df
exp(0.26094)
exp(-0.52994)

INTERPRETATION: Law Enforcement actors supported supply solutions at 1.3 times that of health actors (p=0.08). While other actors supported supply solutions at 0.589 that of health actors (p=0.015).
(The estimated theta parameter is significant indicating that the ZINB is better than a ZI-Poisson.)

ZINB without Regressors

msd33zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(msd33zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0278 -0.8142 -0.2470  0.5246  2.8515 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -1.3131     0.0982 -13.372
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2610     0.1514   1.724
## Dissertation_Dataset$HEALTH_CJOther            -0.5302     0.2174  -2.439
## Log(theta)                                      0.5177     0.1185   4.369
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0847 .  
## Dissertation_Dataset$HEALTH_CJOther             0.0147 *  
## Log(theta)                                    1.25e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5171     0.2127  -7.134 9.75e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.6782 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -975.5 on 5 Df

ZIP without Regressors

msd44zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(msd44zi)
## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1309 -1.6560 -0.4207  0.9533  6.3461 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -1.368246   0.004828 -283.43
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.446757   0.006597   67.72
## Dissertation_Dataset$HEALTH_CJOther           -0.709249   0.017802  -39.84
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5163     0.2125  -7.135 9.69e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 7 
## Log-likelihood: -2.227e+04 on 4 Df