library(readr)
library(ggplot2)
library(MASS)
library(sandwich)
library(msm)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: survival
## Parsed with column specification:
## cols(
## .default = col_double(),
## CASE = col_character(),
## SPEAKER_OR = col_character(),
## SENATE_COM = col_character(),
## HOUSE_COM = col_character(),
## GROUP_TYPE = col_character(),
## HEALTH_CJ = col_character(),
## FED_STATE = col_character()
## )
## See spec(...) for full column specifications.
First must check to see if the model is 0-inflated There are a lot of zeros–will likely need a zero inflated model.
mpd <- glm(Dissertation_Dataset$Problem_Demand~Dissertation_Dataset$HEALTH_CJ+offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family=poisson)
summary(mpd)
##
## Call:
## glm(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), family = poisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -31.337 -15.547 -10.138 0.826 75.883
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -2.551988 0.007847 -325.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.959371 0.018091 -53.03
## Dissertation_Dataset$HEALTH_CJOther 0.537455 0.015500 34.68
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 61154 on 149 degrees of freedom
## Residual deviance: 55391 on 147 degrees of freedom
## AIC: 55986
##
## Number of Fisher Scoring iterations: 7
dispersiontest(mpd)
##
## Overdispersion test
##
## data: mpd
## z = 3.0807, p-value = 0.001033
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 678.9701
OVERDISPERSION IS AN ISSUE. We can run an NB and zero-inflated NB next to see if we need a zero-inflated model.
require(MASS)
require(pscl)
## Loading required package: pscl
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
require(boot)
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:msm':
##
## cav
m1 <- glm.nb(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(m1)
##
## Call:
## glm.nb(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.1482280511,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.55893 -1.41633 -0.45537 -0.00702 1.84518
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -2.4600 0.2905 -8.468
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.7417 0.4810 -1.542
## Dissertation_Dataset$HEALTH_CJOther 0.2567 0.6048 0.424
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.123
## Dissertation_Dataset$HEALTH_CJOther 0.671
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1482) family taken to be 1)
##
## Null deviance: 154.15 on 149 degrees of freedom
## Residual deviance: 151.24 on 147 degrees of freedom
## AIC: 1411.2
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1482
## Std. Err.: 0.0182
##
## 2 x log-likelihood: -1403.1660
m1zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m1zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.83938 -0.53081 -0.36468 -0.02574 6.04491
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -2.07929 0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691 0.30943 -0.022
## Dissertation_Dataset$HEALTH_CJOther 0.21152 0.29472 0.718
## Log(theta) -0.11373 0.15477 -0.735
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.982
## Dissertation_Dataset$HEALTH_CJOther 0.473
## Log(theta) 0.462
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -8.5520 0.2530 -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 1.5440 0.4118 3.75
## Dissertation_Dataset$HEALTH_CJOther 0.2583 0.5378 0.48
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther 0.631051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.8925
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -666.2 on 7 Df
I thought perhaps changing the regressors would help, but TOTAL_WORDS doesn’t seem to perform any better.
m1zi3 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`), dist="negbin", EM=TRUE)
summary(m1zi3)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | log(Dissertation_Dataset$`TOTAL WORDS`),
## dist = "negbin", EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.66209 -0.60863 -0.45309 0.09482 5.29049
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -2.07200 0.14252 -14.538
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02164 0.30559 -0.071
## Dissertation_Dataset$HEALTH_CJOther 0.21014 0.29118 0.722
## Log(theta) -0.08867 0.14700 -0.603
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.944
## Dissertation_Dataset$HEALTH_CJOther 0.470
## Log(theta) 0.546
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -2.1457 2.2950 -0.935
## log(Dissertation_Dataset$`TOTAL WORDS`) 0.2390 0.2975 0.803
## Pr(>|z|)
## (Intercept) 0.350
## log(Dissertation_Dataset$`TOTAL WORDS`) 0.422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.9151
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -670.8 on 6 Df
m1zi2 <- zeroinfl(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m1zi2)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.61694 -0.61630 -0.46552 0.05874 5.28355
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -2.07161 0.14239 -14.549
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.02191 0.30532 -0.072
## Dissertation_Dataset$HEALTH_CJOther 0.21031 0.29091 0.723
## Log(theta) -0.08677 0.14649 -0.592
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.943
## Dissertation_Dataset$HEALTH_CJOther 0.470
## Log(theta) 0.554
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3089 0.1666 -1.855 0.0637 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.9169
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -671.1 on 5 Df
vuong(m1zi, m1zi2)
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw 0.97615785 model1 > model2 0.16449
## AIC-corrected 0.57886892 model1 > model2 0.28134
## BIC-corrected -0.01917711 model2 > model1 0.49235
(Vuong, Q.H. 1989. Likelihood ratio tests for model selection and non-nested hypotheses. Econometrica. 57:307-333.)
It appears that despite the negative intercept, the Vuong statistic thinks that the ZINB with the regressor Health_CJ performs better…
summary(m1zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.83938 -0.53081 -0.36468 -0.02574 6.04491
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -2.07929 0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691 0.30943 -0.022
## Dissertation_Dataset$HEALTH_CJOther 0.21152 0.29472 0.718
## Log(theta) -0.11373 0.15477 -0.735
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.982
## Dissertation_Dataset$HEALTH_CJOther 0.473
## Log(theta) 0.462
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -8.5520 0.2530 -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 1.5440 0.4118 3.75
## Dissertation_Dataset$HEALTH_CJOther 0.2583 0.5378 0.48
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther 0.631051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.8925
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -666.2 on 7 Df
INTERPRETATION: The theta estimate is not significant suggesting that the ZIPoisson model may be more appropriate.
mps2zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mps2zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.5351 -1.1243 -0.5930 0.1125 12.5517
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value
## (Intercept) -2.241700 0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.208383 0.018092 11.52
## Dissertation_Dataset$HEALTH_CJOther 0.578490 0.015500 37.32
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther <2e-16 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -8.5127 0.2455 -34.678
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 1.5142 0.4066 3.724
## Dissertation_Dataset$HEALTH_CJOther 0.2561 0.5242 0.489
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000196 ***
## Dissertation_Dataset$HEALTH_CJOther 0.625108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 9
## Log-likelihood: -1.552e+04 on 6 Df
The ZIP still gives us large negative intercepts …However, the coefficients are significant which would be great!
Let’s run the vuong statistic comparing the ZIP with the ZINB.
vuong(mps2zi, m1zi)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw -6.97482 model2 > model1 1.5313e-12
## AIC-corrected -6.97482 model2 > model1 1.5313e-12
## BIC-corrected -6.97482 model2 > model1 1.5313e-12
INTERPRETATION: It still perfers the ZINB with Health_CJ as the regressor
mps3zi <- zeroinfl(Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mps3zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.1580 -1.1535 -0.8531 0.3403 12.0100
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value
## (Intercept) -2.241700 0.007847 -285.69
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.208383 0.018092 11.52
## Dissertation_Dataset$HEALTH_CJOther 0.578490 0.015500 37.32
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther <2e-16 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2955 0.1651 -1.79 0.0735 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 9
## Log-likelihood: -1.552e+04 on 4 Df
Just as with the ZINB without regressors, the ZIP without regressors also does not have the high negative intercept.
vuong(mps3zi, m1zi)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 4 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw -6.982748 model2 > model1 1.4473e-12
## AIC-corrected -6.981275 model2 > model1 1.4626e-12
## BIC-corrected -6.979078 model2 > model1 1.4856e-12
But sadly, the Vuong statistic stil prefers the ZINB with Health_CJ as the regressor.
summary(m1zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.83938 -0.53081 -0.36468 -0.02574 6.04491
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -2.07929 0.14438 -14.401
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.00691 0.30943 -0.022
## Dissertation_Dataset$HEALTH_CJOther 0.21152 0.29472 0.718
## Log(theta) -0.11373 0.15477 -0.735
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.982
## Dissertation_Dataset$HEALTH_CJOther 0.473
## Log(theta) 0.462
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -8.5520 0.2530 -33.81
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 1.5440 0.4118 3.75
## Dissertation_Dataset$HEALTH_CJOther 0.2583 0.5378 0.48
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000177 ***
## Dissertation_Dataset$HEALTH_CJOther 0.631051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.8925
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -666.2 on 7 Df
INTERPRETATION: There was no statistically signiicant difference between the rates at which health, cj and others defined the problem as demand.
mps <- glm(Problem_Supply~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mps)
Overdispersion. ###NB
m2 <- glm.nb(Dissertation_Dataset\(Problem_Supply ~ Dissertation_Dataset\)HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS
)))
The NB model is not converging which could be because we have so many 0s and so few non-zeros. So lets just go to the zeroinflated NB.
m2zi <- zeroinfl(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(m2zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.9182 -0.6202 -0.4256 0.4798 3.1584
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.67583 0.15474 -10.830
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.28014 0.22262 1.258
## Dissertation_Dataset$HEALTH_CJOther -0.79410 0.31007 -2.561
## Log(theta) -0.07233 0.13757 -0.526
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.2083
## Dissertation_Dataset$HEALTH_CJOther 0.0104 *
## Log(theta) 0.5991
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -8.0200 0.2351 -34.115
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -2.2631 0.5980 -3.785
## Dissertation_Dataset$HEALTH_CJOther 0.1075 0.5097 0.211
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.000154 ***
## Dissertation_Dataset$HEALTH_CJOther 0.832940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.9302
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -807.6 on 7 Df
exp(-0.79410)
## [1] 0.4519878
INTERPRETATION: There was no statistically significant difference between the degree to which health and law enforcement actors defined the problem as a supply problem (p=0.2). Other actors defined the problem as a supply problem at rate 0.45 times that of health actors (p=0.01).
m2zi55 <- zeroinfl(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist="negbin", EM=TRUE)
summary(m2zi55)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.7043 -0.7031 -0.4319 0.4587 2.7011
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.67516 0.15373 -10.897
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.28245 0.22119 1.277
## Dissertation_Dataset$HEALTH_CJOther -0.78978 0.30803 -2.564
## Log(theta) -0.05896 0.13361 -0.441
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.2016
## Dissertation_Dataset$HEALTH_CJOther 0.0103 *
## Log(theta) 0.6590
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7700 0.1772 -4.345 1.39e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 0.9427
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -811.4 on 5 Df
Fixes the large negative intercept.
zifpss <-zeroinfl(Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(zifpss)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Problem_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.4560 -1.4455 -0.8636 0.8767 5.3759
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value
## (Intercept) -1.641405 0.006612 -248.24
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.314710 0.008709 36.14
## Dissertation_Dataset$HEALTH_CJOther -0.939094 0.022872 -41.06
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther <2e-16 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7538 0.1750 -4.306 1.66e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 8
## Log-likelihood: -2.227e+04 on 4 Df
The median residual is close to 0 which suggests we are ok regarding overdispersion.
exp(0.314710)
## [1] 1.369862
exp(-0.939095)
## [1] 0.3909815
INTERPRETATION:Law enforcement actors defined the problem as a supply problem at a rate that was 1.37 times greater than health actors (p<0.001). Other actors defined the problem as a supply problem at rate 0.391 times that of health actors (p<0.001).
vuong(m2zi, zifpss)
## NA or numerical zeros or ones encountered in fitted probabilities
## dropping these 5 cases, but proceed with caution
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw 7.884363 model1 > model2 1.5543e-15
## AIC-corrected 7.883409 model1 > model2 1.5543e-15
## BIC-corrected 7.881990 model1 > model2 1.6653e-15
However, when comparing the Vuong z-statistic the ZINB model is better than the ZIP
mdd <- glm(Solutions_Demand~HEALTH_CJ+offset(log(`TOTAL WORDS`)), family=poisson)
dispersiontest(mdd)
##
## Overdispersion test
##
## data: mdd
## z = 7.4918, p-value = 3.396e-14
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 548.6184
Overdispersion is a problem. ###NB
mdd2 <- glm.nb(Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)))
summary(mdd2)
##
## Call:
## glm.nb(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), init.theta = 0.542540841,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0014 -0.8093 -0.2305 0.3149 1.5528
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -1.1242 0.1519 -7.403
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.5659 0.2514 -2.251
## Dissertation_Dataset$HEALTH_CJOther 0.0218 0.3162 0.069
## Pr(>|z|)
## (Intercept) 1.33e-13 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.0244 *
## Dissertation_Dataset$HEALTH_CJOther 0.9450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.5425) family taken to be 1)
##
## Null deviance: 190.42 on 149 degrees of freedom
## Residual deviance: 185.22 on 147 degrees of freedom
## AIC: 2184.7
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.5425
## Std. Err.: 0.0578
##
## 2 x log-likelihood: -2176.6570
mdd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(mdd2zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.1046 -0.7501 -0.2926 0.5477 3.6396
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.01947 0.10298 -9.900
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48070 0.17532 -2.742
## Dissertation_Dataset$HEALTH_CJOther -0.08279 0.20602 -0.402
## Log(theta) 0.27171 0.11249 2.415
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.00611 **
## Dissertation_Dataset$HEALTH_CJOther 0.68778
## Log(theta) 0.01572 *
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error
## (Intercept) -1.004e+01 3.792e-01
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 6.166e-01 5.499e-01
## Dissertation_Dataset$HEALTH_CJOther -2.390e+01 1.145e+05
## z value Pr(>|z|)
## (Intercept) -26.486 <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 1.121 0.262
## Dissertation_Dataset$HEALTH_CJOther 0.000 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.3122
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -1049 on 7 Df
mdd3zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(mdd3zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.0149 -0.7620 -0.2644 0.5248 3.7014
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.01907 0.10289 -9.905
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.48118 0.17517 -2.747
## Dissertation_Dataset$HEALTH_CJOther -0.08319 0.20585 -0.404
## Log(theta) 0.27339 0.11198 2.441
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.00602 **
## Dissertation_Dataset$HEALTH_CJOther 0.68613
## Log(theta) 0.01463 *
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1287 0.2653 -8.023 1.03e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.3144
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -1052 on 5 Df
mdd4zi <- zeroinfl(Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(mdd4zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Demand ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.889 -2.143 -0.721 1.351 13.241
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error
## (Intercept) -0.943065 0.003743
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -0.800287 0.008406
## Dissertation_Dataset$HEALTH_CJOther -0.035526 0.008799
## z value Pr(>|z|)
## (Intercept) -251.936 < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -95.203 < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther -4.038 5.4e-05 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1252 0.2645 -8.035 9.37e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 7
## Log-likelihood: -2.988e+04 on 4 Df
vuong(mdd4zi, mdd2zi)
exp(-0.48070)
INTERPRETATION of : There was no significant difference between the rate at which health groups and other groups supported demand solutions (p=0.688). However, law enforcement groups supported demand solutions at a rate 0.618 times that of health groups (p=0.006).
(Theta estimate is significant indicating that ZINB is appropriate.)
##
## Overdispersion test
##
## data: msd
## z = 8.5203, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 409.8836
Overdispersion is an issue. ###NB
msd2 <- glm.nb(Dissertation_Dataset\(Solutions_Supply ~ Dissertation_Dataset\)HEALTH_CJ + offset(log(Dissertation_Dataset$TOTAL WORDS
)))
The NB model is not converging so off to the zeroinflated NB.
msd2zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist="negbin", EM=TRUE)
summary(msd2zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)), dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.2698 -0.7914 -0.1692 0.4407 2.8403
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.31314 0.09821 -13.370
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.26094 0.15139 1.724
## Dissertation_Dataset$HEALTH_CJOther -0.52994 0.21739 -2.438
## Log(theta) 0.51736 0.11868 4.359
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.0848 .
## Dissertation_Dataset$HEALTH_CJOther 0.0148 *
## Log(theta) 1.3e-05 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value
## (Intercept) -9.0453 0.2739 -33.030
## Dissertation_Dataset$HEALTH_CJLaw Enforcement -2.6743 1.0542 -2.537
## Dissertation_Dataset$HEALTH_CJOther 0.9944 0.5249 1.895
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.0112 *
## Dissertation_Dataset$HEALTH_CJOther 0.0582 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.6776
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -972.7 on 7 Df
exp(0.26094)
exp(-0.52994)
INTERPRETATION: Law Enforcement actors supported supply solutions at 1.3 times that of health actors (p=0.08). While other actors supported supply solutions at 0.589 that of health actors (p=0.015).
(The estimated theta parameter is significant indicating that the ZINB is better than a ZI-Poisson.)
msd33zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1, dist="negbin", EM=TRUE)
summary(msd33zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1, dist = "negbin",
## EM = TRUE)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -1.0278 -0.8142 -0.2470 0.5246 2.8515
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value
## (Intercept) -1.3131 0.0982 -13.372
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.2610 0.1514 1.724
## Dissertation_Dataset$HEALTH_CJOther -0.5302 0.2174 -2.439
## Log(theta) 0.5177 0.1185 4.369
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.0847 .
## Dissertation_Dataset$HEALTH_CJOther 0.0147 *
## Log(theta) 1.25e-05 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5171 0.2127 -7.134 9.75e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.6782
## Number of iterations in BFGS optimization: 1
## Log-likelihood: -975.5 on 5 Df
msd44zi <- zeroinfl(Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ + offset(log(Dissertation_Dataset$`TOTAL WORDS`))|1)
summary(msd44zi)
##
## Call:
## zeroinfl(formula = Dissertation_Dataset$Solutions_Supply ~ Dissertation_Dataset$HEALTH_CJ +
## offset(log(Dissertation_Dataset$`TOTAL WORDS`)) | 1)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.1309 -1.6560 -0.4207 0.9533 6.3461
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value
## (Intercept) -1.368246 0.004828 -283.43
## Dissertation_Dataset$HEALTH_CJLaw Enforcement 0.446757 0.006597 67.72
## Dissertation_Dataset$HEALTH_CJOther -0.709249 0.017802 -39.84
## Pr(>|z|)
## (Intercept) <2e-16 ***
## Dissertation_Dataset$HEALTH_CJLaw Enforcement <2e-16 ***
## Dissertation_Dataset$HEALTH_CJOther <2e-16 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5163 0.2125 -7.135 9.69e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 7
## Log-likelihood: -2.227e+04 on 4 Df