library(readr)
library(ggplot2)
library(MASS)
library(sandwich)
library(msm)
library(AER)

## Loading required package: car

## Loading required package: carData

## Loading required package: lmtest

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: survival

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CASE = col_character(),
##   SPEAKER_OR = col_character(),
##   SENATE_COM = col_character(),
##   HOUSE_COM = col_character(),
##   GROUP_TYPE = col_character(),
##   HEALTH_CJ = col_character(),
##   FED_STATE = col_character()
## )

## See spec(...) for full column specifications.

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

First must check to see if the model is 0-inflated There are a lot of zeros–will likely need a zero inflated model.

Check for overdispersion

mpd <- glm(Dissertation_Dataset$Problem_Demand~Dissertation_Dataset$HEALTH_CJ+offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family=poisson)
summary(mpd)

## 
## Call:
## glm(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -31.337  -15.547  -10.138    0.826   75.883  
## 
## Coefficients:
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.551988   0.007847 -325.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.959371   0.018091  -53.03
## Dissertation_Dataset$HEALTH_CJOther            0.537455   0.015500   34.68
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 61154  on 149  degrees of freedom
## Residual deviance: 55391  on 147  degrees of freedom
## AIC: 55986
## 
## Number of Fisher Scoring iterations: 7

dispersiontest(mpd)

## 
##  Overdispersion test
## 
## data:  mpd
## z = 3.0807, p-value = 0.001033
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   678.9701

OVERDISPERSION IS AN ISSUE. We can run an NB and zero-inflated NB next to see if we need a zero-inflated model.

NB

require(MASS)
require(pscl)

## Loading required package: pscl

## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

require(boot)

## Loading required package: boot

## 
## Attaching package: 'boot'

## The following object is masked from 'package:survival':
## 
##     aml

## The following object is masked from 'package:car':
## 
##     logit

## The following object is masked from 'package:msm':
## 
##     cav

m1 <- glm.nb(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(m1)

## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.1482280511, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.55893  -1.41633  -0.45537  -0.00702   1.84518  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -2.4600     0.2905  -8.468
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.7417     0.4810  -1.542
## Dissertation_Dataset$HEALTH_CJOther             0.2567     0.6048   0.424
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.123    
## Dissertation_Dataset$HEALTH_CJOther              0.671    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1482) family taken to be 1)
## 
##     Null deviance: 154.15  on 149  degrees of freedom
## Residual deviance: 151.24  on 147  degrees of freedom
## AIC: 1411.2
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.1482 
##           Std. Err.:  0.0182 
## 
##  2 x log-likelihood:  -1403.1660

ZINB with REGRESSORS (HEALTH_CJ) in Zero Inflation Model

m1zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m1zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

ZINB with REGRESSORS (TOTAL_WORDS) in Zero Inflation Model

I thought perhaps changing the regressors would help, but TOTAL_WORDS doesn’t seem to perform any better.

m1zi3 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`), dist="negbin", EM=TRUE)
summary(m1zi3)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`), 
##     dist = "negbin", EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66209 -0.60863 -0.45309  0.09482  5.29049 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07200    0.14252 -14.538
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02164    0.30559  -0.071
## Dissertation_Dataset$HEALTH_CJOther            0.21014    0.29118   0.722
## Log(theta)                                    -0.08867    0.14700  -0.603
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.944    
## Dissertation_Dataset$HEALTH_CJOther              0.470    
## Log(theta)                                       0.546    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                         Estimate Std. Error z value
## (Intercept)                              -2.1457     2.2950  -0.935
## log(Dissertation_Dataset$`TOTAL WORDS`)   0.2390     0.2975   0.803
##                                         Pr(>|z|)
## (Intercept)                                0.350
## log(Dissertation_Dataset$`TOTAL WORDS`)    0.422
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9151 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -670.8 on 6 Df

ZINB SIMPLE INFLATION (NO REGRESSORS FOR ZERO COMPONENT)

m1zi2 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m1zi2)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.61694 -0.61630 -0.46552  0.05874  5.28355 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07161    0.14239 -14.549
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02191    0.30532  -0.072
## Dissertation_Dataset$HEALTH_CJOther            0.21031    0.29091   0.723
## Log(theta)                                    -0.08677    0.14649  -0.592
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.943    
## Dissertation_Dataset$HEALTH_CJOther              0.470    
## Log(theta)                                       0.554    
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.3089     0.1666  -1.855   0.0637 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9169 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -671.1 on 5 Df

COMPARING THE SIMPLE ZERO-INFLATION MODEL with the ZINB with Health_CJ as Regressor

vuong(m1zi, m1zi2)

## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A p-value
## Raw                  0.97615785 model1 > model2 0.16449
## AIC-corrected        0.57886892 model1 > model2 0.28134
## BIC-corrected       -0.01917711 model2 > model1 0.49235

(Vuong, Q.H. 1989. Likelihood ratio tests for model selection and non-nested hypotheses. Econometrica. 57:307-333.)

It appears that despite the negative intercept, the Vuong statistic thinks that the ZINB with the regressor Health_CJ performs better…

summary(m1zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

INTERPRETATION: The theta estimate is not significant suggesting that the ZIPoisson model may be more appropriate.

ZIP

mps2zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mps2zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5351 -1.1243 -0.5930  0.1125 12.5517 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.241700   0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.208383   0.018092   11.52
## Dissertation_Dataset$HEALTH_CJOther            0.578490   0.015500   37.32
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5127     0.2455 -34.678
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5142     0.4066   3.724
## Dissertation_Dataset$HEALTH_CJOther             0.2561     0.5242   0.489
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000196 ***
## Dissertation_Dataset$HEALTH_CJOther           0.625108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 9 
## Log-likelihood: -1.552e+04 on 6 Df

The ZIP still gives us large negative intercepts …However, the coefficients are significant which would be great!

Let’s run the vuong statistic comparing the ZIP with the ZINB.

vuong(mps2zi, m1zi)

## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                    -6.97482 model2 > model1 1.5313e-12
## AIC-corrected          -6.97482 model2 > model1 1.5313e-12
## BIC-corrected          -6.97482 model2 > model1 1.5313e-12

INTERPRETATION: It still perfers the ZINB with Health_CJ as the regressor

Simple ZIP without regressors

mps3zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~  Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mps3zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1580 -1.1535 -0.8531  0.3403 12.0100 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -2.241700   0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.208383   0.018092   11.52
## Dissertation_Dataset$HEALTH_CJOther            0.578490   0.015500   37.32
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.2955     0.1651   -1.79   0.0735 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 9 
## Log-likelihood: -1.552e+04 on 4 Df

Just as with the ZINB without regressors, the ZIP without regressors also does not have the high negative intercept.

vuong(mps3zi, m1zi)

## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                   -6.982748 model2 > model1 1.4473e-12
## AIC-corrected         -6.981275 model2 > model1 1.4626e-12
## BIC-corrected         -6.979078 model2 > model1 1.4856e-12

But sadly, the Vuong statistic stil prefers the ZINB with Health_CJ as the regressor.

summary(m1zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83938 -0.53081 -0.36468 -0.02574  6.04491 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -2.07929    0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691    0.30943  -0.022
## Dissertation_Dataset$HEALTH_CJOther            0.21152    0.29472   0.718
## Log(theta)                                    -0.11373    0.15477  -0.735
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement    0.982    
## Dissertation_Dataset$HEALTH_CJOther              0.473    
## Log(theta)                                       0.462    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.5520     0.2530  -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.5440     0.4118    3.75
## Dissertation_Dataset$HEALTH_CJOther             0.2583     0.5378    0.48
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther           0.631051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.8925 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -666.2 on 7 Df

INTERPRETATION: There was no statistically signiicant difference between the rates at which health, cj and others defined the problem as demand.

Problem is Supply

mps <- glm(Problem_Supply~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mps)

Overdispersion. ###NB

m2 <- glm.nb(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS)))

The NB model is not converging which could be because we have so many 0s and so few non-zeros. So lets just go to the zeroinflated NB.

ZINB with Regressors

m2zi <- zeroinfl(Dissertation_Dataset$Problem_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m2zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9182 -0.6202 -0.4256  0.4798  3.1584 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.67583    0.15474 -10.830
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.28014    0.22262   1.258
## Dissertation_Dataset$HEALTH_CJOther           -0.79410    0.31007  -2.561
## Log(theta)                                    -0.07233    0.13757  -0.526
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2083    
## Dissertation_Dataset$HEALTH_CJOther             0.0104 *  
## Log(theta)                                      0.5991    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -8.0200     0.2351 -34.115
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -2.2631     0.5980  -3.785
## Dissertation_Dataset$HEALTH_CJOther             0.1075     0.5097   0.211
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000154 ***
## Dissertation_Dataset$HEALTH_CJOther           0.832940    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9302 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -807.6 on 7 Df

exp(-0.79410)

## [1] 0.4519878

INTERPRETATION: There was no statistically significant difference between the degree to which health and law enforcement actors defined the problem as a supply problem (p=0.2). Other actors defined the problem as a supply problem at rate 0.45 times that of health actors (p=0.01).

ZINB without Regressors

m2zi55 <- zeroinfl(Dissertation_Dataset$Problem_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m2zi55)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7043 -0.7031 -0.4319  0.4587  2.7011 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.67516    0.15373 -10.897
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.28245    0.22119   1.277
## Dissertation_Dataset$HEALTH_CJOther           -0.78978    0.30803  -2.564
## Log(theta)                                    -0.05896    0.13361  -0.441
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2016    
## Dissertation_Dataset$HEALTH_CJOther             0.0103 *  
## Log(theta)                                      0.6590    
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7700     0.1772  -4.345 1.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 0.9427 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -811.4 on 5 Df

Fixes the large negative intercept.

ZIP without regressors

zifpss <-zeroinfl(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(zifpss)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4560 -1.4455 -0.8636  0.8767  5.3759 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -1.641405   0.006612 -248.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.314710   0.008709   36.14
## Dissertation_Dataset$HEALTH_CJOther           -0.939094   0.022872  -41.06
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7538     0.1750  -4.306 1.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 8 
## Log-likelihood: -2.227e+04 on 4 Df

The median residual is close to 0 which suggests we are ok regarding overdispersion.

exp(0.314710)

## [1] 1.369862

exp(-0.939095)

## [1] 0.3909815

INTERPRETATION:Law enforcement actors defined the problem as a supply problem at a rate that was 1.37 times greater than health actors (p<0.001). Other actors defined the problem as a supply problem at rate 0.391 times that of health actors (p<0.001).

vuong(m2zi, zifpss)

## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 5 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A    p-value
## Raw                    7.884363 model1 > model2 1.5543e-15
## AIC-corrected          7.883409 model1 > model2 1.5543e-15
## BIC-corrected          7.881990 model1 > model2 1.6653e-15

However, when comparing the Vuong z-statistic the ZINB model is better than the ZIP

Demand is the Solution

mdd <- glm(Solutions_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mdd)

## 
##  Overdispersion test
## 
## data:  mdd
## z = 7.4918, p-value = 3.396e-14
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   548.6184

Overdispersion is a problem. ###NB

mdd2 <- glm.nb(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mdd2)

## 
## Call:
## glm.nb(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.542540841, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0014  -0.8093  -0.2305   0.3149   1.5528  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                    -1.1242     0.1519  -7.403
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -0.5659     0.2514  -2.251
## Dissertation_Dataset$HEALTH_CJOther             0.0218     0.3162   0.069
##                                               Pr(>|z|)    
## (Intercept)                                   1.33e-13 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0244 *  
## Dissertation_Dataset$HEALTH_CJOther             0.9450    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.5425) family taken to be 1)
## 
##     Null deviance: 190.42  on 149  degrees of freedom
## Residual deviance: 185.22  on 147  degrees of freedom
## AIC: 2184.7
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.5425 
##           Std. Err.:  0.0578 
## 
##  2 x log-likelihood:  -2176.6570

ZINB with regressors

mdd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(mdd2zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1046 -0.7501 -0.2926  0.5477  3.6396 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.01947    0.10298  -9.900
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48070    0.17532  -2.742
## Dissertation_Dataset$HEALTH_CJOther           -0.08279    0.20602  -0.402
## Log(theta)                                     0.27171    0.11249   2.415
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.00611 ** 
## Dissertation_Dataset$HEALTH_CJOther            0.68778    
## Log(theta)                                     0.01572 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                                 Estimate Std. Error
## (Intercept)                                   -1.004e+01  3.792e-01
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  6.166e-01  5.499e-01
## Dissertation_Dataset$HEALTH_CJOther           -2.390e+01  1.145e+05
##                                               z value Pr(>|z|)    
## (Intercept)                                   -26.486   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   1.121    0.262    
## Dissertation_Dataset$HEALTH_CJOther             0.000    1.000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.3122 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -1049 on 7 Df

ZINB without regressors

mdd3zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(mdd3zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0149 -0.7620 -0.2644  0.5248  3.7014 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.01907    0.10289  -9.905
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48118    0.17517  -2.747
## Dissertation_Dataset$HEALTH_CJOther           -0.08319    0.20585  -0.404
## Log(theta)                                     0.27339    0.11198   2.441
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.00602 ** 
## Dissertation_Dataset$HEALTH_CJOther            0.68613    
## Log(theta)                                     0.01463 *  
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1287     0.2653  -8.023 1.03e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.3144 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -1052 on 5 Df

ZIP without regressors

mdd4zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mdd4zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##    Min     1Q Median     3Q    Max 
## -2.889 -2.143 -0.721  1.351 13.241 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error
## (Intercept)                                   -0.943065   0.003743
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.800287   0.008406
## Dissertation_Dataset$HEALTH_CJOther           -0.035526   0.008799
##                                                z value Pr(>|z|)    
## (Intercept)                                   -251.936  < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -95.203  < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             -4.038  5.4e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.1252     0.2645  -8.035 9.37e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 7 
## Log-likelihood: -2.988e+04 on 4 Df

vuong(mdd4zi, mdd2zi)

exp(-0.48070)

INTERPRETATION of : There was no significant difference between the rate at which health groups and other groups supported demand solutions (p=0.688). However, law enforcement groups supported demand solutions at a rate 0.618 times that of health groups (p=0.006).
(Theta estimate is significant indicating that ZINB is appropriate.)

Supply is the Solution

## 
##  Overdispersion test
## 
## data:  msd
## z = 8.5203, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   409.8836

Overdispersion is an issue. ###NB

msd2 <- glm.nb(Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS)))

The NB model is not converging so off to the zeroinflated NB.

ZINB with Regressors

msd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(msd2zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2698 -0.7914 -0.1692  0.4407  2.8403 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                   -1.31314    0.09821 -13.370
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.26094    0.15139   1.724
## Dissertation_Dataset$HEALTH_CJOther           -0.52994    0.21739  -2.438
## Log(theta)                                     0.51736    0.11868   4.359
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0848 .  
## Dissertation_Dataset$HEALTH_CJOther             0.0148 *  
## Log(theta)                                     1.3e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -9.0453     0.2739 -33.030
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  -2.6743     1.0542  -2.537
## Dissertation_Dataset$HEALTH_CJOther             0.9944     0.5249   1.895
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0112 *  
## Dissertation_Dataset$HEALTH_CJOther             0.0582 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.6776 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -972.7 on 7 Df

exp(0.26094)
exp(-0.52994)

INTERPRETATION: Law Enforcement actors supported supply solutions at 1.3 times that of health actors (p=0.08). While other actors supported supply solutions at 0.589 that of health actors (p=0.015).
(The estimated theta parameter is significant indicating that the ZINB is better than a ZI-Poisson.)

ZINB without Regressors

msd33zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(msd33zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin", 
##     EM = TRUE)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0278 -0.8142 -0.2470  0.5246  2.8515 
## 
## Count model coefficients (negbin with log link):
##                                               Estimate Std. Error z value
## (Intercept)                                    -1.3131     0.0982 -13.372
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.2610     0.1514   1.724
## Dissertation_Dataset$HEALTH_CJOther            -0.5302     0.2174  -2.439
## Log(theta)                                      0.5177     0.1185   4.369
##                                               Pr(>|z|)    
## (Intercept)                                    < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   0.0847 .  
## Dissertation_Dataset$HEALTH_CJOther             0.0147 *  
## Log(theta)                                    1.25e-05 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5171     0.2127  -7.134 9.75e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Theta = 1.6782 
## Number of iterations in BFGS optimization: 1 
## Log-likelihood: -975.5 on 5 Df

ZIP without Regressors

msd44zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~  Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(msd44zi)

## 
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + 
##     offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1309 -1.6560 -0.4207  0.9533  6.3461 
## 
## Count model coefficients (poisson with log link):
##                                                Estimate Std. Error z value
## (Intercept)                                   -1.368246   0.004828 -283.43
## Dissertation_Dataset$HEALTH_CJLaw Enforcement  0.446757   0.006597   67.72
## Dissertation_Dataset$HEALTH_CJOther           -0.709249   0.017802  -39.84
##                                               Pr(>|z|)    
## (Intercept)                                     <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement   <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther             <2e-16 ***
## 
## Zero-inflation model coefficients (binomial with logit link):
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5163     0.2125  -7.135 9.69e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 7 
## Log-likelihood: -2.227e+04 on 4 Df

3_8_19_Revised_Dissertation

Taleed El-Sabawi

3/8/2019

Is there a statistically significant difference between Health_CJ actors in Problem_Demand? (MPD)

Check for overdispersion

NB

ZINB with REGRESSORS (HEALTH_CJ) in Zero Inflation Model

ZINB with REGRESSORS (TOTAL_WORDS) in Zero Inflation Model

ZINB SIMPLE INFLATION (NO REGRESSORS FOR ZERO COMPONENT)

COMPARING THE SIMPLE ZERO-INFLATION MODEL with the ZINB with Health_CJ as Regressor

ZIP

Simple ZIP without regressors

Problem is Supply

ZINB with Regressors

ZINB without Regressors

ZIP without regressors

Demand is the Solution

ZINB with regressors

ZINB without regressors

ZIP without regressors

Supply is the Solution

ZINB with Regressors

ZINB without Regressors

ZIP without Regressors