library(car)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(haven)
dat<-read_xpt("C:/Users/Monica/Documents/CSparks_StatsII/CNTY05.xpt")

a) My count outcome variable will be the self-reporting poor physical health days in the past month. This is the wording in the survey, “During the past 30 days, for about how many days did poor physical or mental health keep you from doing your usual activities, such as self-care, work, or recreation?

b) My research question is as follows: How do threats of violence and acts of violence from one’s intimate partner, marital status and education level affect the number of self-reported poor physical health days?

c) My predictor variables are 1) the threat of violence; 2) the act of violence; 3) marital status; and 4)education level

d) An offset term is necessary because the observations do not have the same level of exposure or risk.

Rename variables

nams<-names(dat)
head(nams, n=10)
##  [1] "_CNTYNAM" "_STATE"   "FMONTH"   "IDATE"    "IMONTH"   "IDAY"    
##  [7] "IYEAR"    "INTVID"   "DISPCODE" "SEQNO"
myvariables<-tolower(names(dat))
names(dat)<-myvariables

Recode for poor mental health (outcome variable), with 1= low number of self-reported poor mental health days, 2= moderate number of self-reported poor mental health days, and 3= high number of self-reported poor mental health days

dat$poorhealth<-recode (dat$poorhlth,recodes= "88=0; 77=NA; 99=NA")
hist(dat$poorhealth)

summary(dat$poorhealth)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    4.32    3.00   30.00   81341

Recode for intimate partner threat (predictor variable)

dat$threat<-recode (dat$ipvthrat, recodes = "7:9=NA; 1='YESthreat'; 2='NOthreat'")

Recode for intimate partner hurt (predictor variable)

dat$hurt<-recode(dat$ipvphhrt, recodes = "7:9=NA; 1='YEShurt'; 2='NOhurt'")

Recode for marital status (predictor variable)

dat$marst<-recode(dat$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
dat$marst<-relevel(dat$marst, ref='married')

Recode for education level

dat$educ<-recode(dat$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor.result=T)
dat$educ<-relevel(dat$educ, ref='2hsgrad')

Create survey design with subset

options (survey.lonely.psu= "adjust")
library (dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dat$strata<-dat$`_ststr`
dat$weight<-dat$`_cntywt`
mysub<-dat%>%
  select(threat, hurt, marst, educ, poorhealth, weight, strata)%>%
  filter(complete.cases(.))
options (survey.lonely.psu= "adjust")
mydesign<-svydesign(ids = ~1, strata=~strata, weights=~weight, data = mysub)
options(survey.lonely.psu="adjust")
mydesign<-svydesign(ids=~1, strata=~`_ststr`, weights=~`_cntywt`, data= dat[is.na(dat$`_cntywt`)==F,])

Poisson regression model

svyhist(~poorhealth, mydesign)

svyby(~poorhealth, ~educ+marst, mydesign, svymean, na.rm=T)
##                        educ     marst poorhealth         se
## 2hsgrad.married     2hsgrad   married   4.069918 0.15006838
## 0Prim.married         0Prim   married   5.616256 0.77534379
## 1somehs.married     1somehs   married   4.753525 0.34769098
## 3somecol.married   3somecol   married   3.527889 0.13921007
## 4colgrad.married   4colgrad   married   2.398252 0.08182773
## 2hsgrad.cohab       2hsgrad     cohab   3.215482 0.41226458
## 0Prim.cohab           0Prim     cohab   2.815098 0.71515707
## 1somehs.cohab       1somehs     cohab   5.174254 1.50698845
## 3somecol.cohab     3somecol     cohab   2.980839 0.33898022
## 4colgrad.cohab     4colgrad     cohab   2.416472 0.26800120
## 2hsgrad.divorced    2hsgrad  divorced   5.893915 0.34953037
## 0Prim.divorced        0Prim  divorced   7.775936 1.67000048
## 1somehs.divorced    1somehs  divorced   7.728887 0.75608184
## 3somecol.divorced  3somecol  divorced   5.240588 0.25411632
## 4colgrad.divorced  4colgrad  divorced   4.292953 0.24932354
## 2hsgrad.nm          2hsgrad        nm   3.416436 0.20398341
## 0Prim.nm              0Prim        nm   3.810604 0.70835439
## 1somehs.nm          1somehs        nm   4.170520 0.44238739
## 3somecol.nm        3somecol        nm   3.389757 0.19225051
## 4colgrad.nm        4colgrad        nm   2.385224 0.11651013
## 2hsgrad.separated   2hsgrad separated   6.824594 0.75061220
## 0Prim.separated       0Prim separated   4.571044 1.09218044
## 1somehs.separated   1somehs separated   7.380729 1.20354147
## 3somecol.separated 3somecol separated   6.818606 1.05091741
## 4colgrad.separated 4colgrad separated   4.797955 0.71387902
## 2hsgrad.widowed     2hsgrad   widowed   5.826150 0.29683858
## 0Prim.widowed         0Prim   widowed   9.166265 1.38831079
## 1somehs.widowed     1somehs   widowed   6.939729 0.57991900
## 3somecol.widowed   3somecol   widowed   5.386401 0.37225798
## 4colgrad.widowed   4colgrad   widowed   4.012188 0.33553341
svyby(~poorhealth, ~hurt+threat, mydesign, svymean, na.rm=T)
##                      hurt    threat poorhealth        se
## NOhurt.NOthreat    NOhurt  NOthreat   3.606539 0.1441362
## YEShurt.NOthreat  YEShurt  NOthreat   4.261640 0.5620292
## NOhurt.YESthreat   NOhurt YESthreat   3.921826 0.7701679
## YEShurt.YESthreat YEShurt YESthreat   5.414269 0.3215802

Poisson glm fit to survey data

fit1<-svyglm(poorhealth~educ + marst + hurt, design=mydesign, family=poisson)
summary(fit1)
## 
## Call:
## svyglm(formula = poorhealth ~ educ + marst + hurt, design = mydesign, 
##     family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~`_ststr`, weights = ~`_cntywt`, 
##     data = dat[is.na(dat$`_cntywt`) == F, ])
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.38910    0.06570  21.143  < 2e-16 ***
## educ0Prim      -0.03814    0.16200  -0.235 0.813864    
## educ1somehs     0.38791    0.12958   2.994 0.002762 ** 
## educ3somecol   -0.12978    0.08042  -1.614 0.106597    
## educ4colgrad   -0.55029    0.07541  -7.297 3.08e-13 ***
## marstcohab      0.07368    0.17360   0.424 0.671281    
## marstdivorced   0.49101    0.08081   6.076 1.26e-09 ***
## marstnm        -0.10347    0.09311  -1.111 0.266468    
## marstseparated  0.52671    0.17562   2.999 0.002711 ** 
## marstwidowed    0.31809    0.09393   3.387 0.000709 ***
## hurtYEShurt     0.24008    0.06767   3.548 0.000390 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 9.964496)
## 
## Number of Fisher Scoring iterations: 6

Poisson model “risk ratios”

round(exp(summary(fit1)$coef[-1,1]), 3)
##      educ0Prim    educ1somehs   educ3somecol   educ4colgrad     marstcohab 
##          0.963          1.474          0.878          0.577          1.076 
##  marstdivorced        marstnm marstseparated   marstwidowed    hurtYEShurt 
##          1.634          0.902          1.693          1.375          1.271

Based on the results above, people with less than a HS degree had 47% higher mean counts of poor health days than those with a HS degree. People who were divorced or separated had 60% higher mean counts of poor health days than those who are married. People who were hurt by their intimate partner, had 27% higher mean counts of poor health days than those who were not hurt by their intimate partner.

Check for overdispersion

fit2<-glm(poorhealth~ educ +marst +hurt, data=dat, family=poisson)
summary(fit2)
## 
## Call:
## glm(formula = poorhealth ~ educ + marst + hurt, family = poisson, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.8021  -2.8600  -2.3040  -0.3231   9.9941  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     1.408518   0.008270 170.308  < 2e-16 ***
## educ0Prim       0.356645   0.020164  17.687  < 2e-16 ***
## educ1somehs     0.323785   0.013445  24.082  < 2e-16 ***
## educ3somecol   -0.108043   0.009681 -11.160  < 2e-16 ***
## educ4colgrad   -0.432376   0.010242 -42.217  < 2e-16 ***
## marstcohab     -0.164591   0.024037  -6.847 7.52e-12 ***
## marstdivorced   0.376403   0.010075  37.361  < 2e-16 ***
## marstnm        -0.007879   0.011169  -0.705    0.481    
## marstseparated  0.369696   0.020802  17.772  < 2e-16 ***
## marstwidowed    0.280114   0.012436  22.524  < 2e-16 ***
## hurtYEShurt     0.303407   0.008590  35.321  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 190006  on 16128  degrees of freedom
## Residual deviance: 181422  on 16118  degrees of freedom
##   (155554 observations deleted due to missingness)
## AIC: 205853
## 
## Number of Fisher Scoring iterations: 6
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 3.354979
1-pchisq(fit2$deviance, df= fit2$df.residual)
## [1] 0
fit3<-glm(poorhealth~educ + marst + hurt, data=dat, family=quasipoisson)
summary(fit3)
## 
## Call:
## glm(formula = poorhealth ~ educ + marst + hurt, family = quasipoisson, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.8021  -2.8600  -2.3040  -0.3231   9.9941  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.408518   0.033094  42.561  < 2e-16 ***
## educ0Prim       0.356645   0.080689   4.420 9.93e-06 ***
## educ1somehs     0.323785   0.053802   6.018 1.80e-09 ***
## educ3somecol   -0.108043   0.038739  -2.789  0.00529 ** 
## educ4colgrad   -0.432376   0.040983 -10.550  < 2e-16 ***
## marstcohab     -0.164591   0.096185  -1.711  0.08707 .  
## marstdivorced   0.376403   0.040314   9.337  < 2e-16 ***
## marstnm        -0.007879   0.044694  -0.176  0.86007    
## marstseparated  0.369696   0.083239   4.441 9.00e-06 ***
## marstwidowed    0.280114   0.049764   5.629 1.84e-08 ***
## hurtYEShurt     0.303407   0.034373   8.827  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 16.01235)
## 
##     Null deviance: 190006  on 16128  degrees of freedom
## Residual deviance: 181422  on 16118  degrees of freedom
##   (155554 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

Negative Binomial Model

clx2 <-   function(fm, dfcw,  cluster){
    require(sandwich);require(lmtest)
    if(class(fm)=="zeroinfl"|class(fm)=="hurdle") {
    M <- length(unique(cluster))   
    N <- length(cluster)           
    K <- dim(fm$vcov)[1]                    
    dfc <- (M/(M-1))*((N-1)/(N-K))  
    uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum, na.rm=T));
    vcovCL <- dfc[1]*sandwich(fm, meat=crossprod(uj)/N)*dfcw 
    list(summary=coeftest(fm, vcovCL))}
    else if(class(fm)!="zeroinfl"){
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
    uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum, na.rm=T));
    rcse.cov <- dfc * sandwich(fm, meat = crossprod(uj)/N)
    rcse.se <- coeftest(fm, rcse.cov)
    return(list( rcse.se))}
}
mysub$wts<-mysub$weight/mean(mysub$weight, na.rm=T)
fit.pois<-glm(poorhealth~ educ + marst + hurt, data=mysub, weights=wts, family=poisson)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library (sandwich)
clx2(fit.pois,cluster=mysub$strata)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
## 
## z test of coefficients:
## 
##                 Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)     1.390246   0.054497 25.5105 < 2.2e-16 ***
## educ0Prim      -0.038996   0.197125 -0.1978 0.8431840    
## educ1somehs     0.385069   0.143362  2.6860 0.0072314 ** 
## educ3somecol   -0.131394   0.079033 -1.6625 0.0964091 .  
## educ4colgrad   -0.550142   0.084526 -6.5086 7.588e-11 ***
## marstcohab      0.073360   0.147647  0.4969 0.6192876    
## marstdivorced   0.489678   0.059747  8.1959 2.487e-16 ***
## marstnm        -0.105209   0.070562 -1.4910 0.1359574    
## marstseparated  0.526804   0.178992  2.9432 0.0032488 ** 
## marstwidowed    0.319234   0.084079  3.7969 0.0001465 ***
## hurtYEShurt     0.240300   0.051700  4.6480 3.352e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#coeftest(fit.pois, vcov=vcovHC(fit.pois, type="HC1",cluster=dat$strata))
summary(fit1)
## 
## Call:
## svyglm(formula = poorhealth ~ educ + marst + hurt, design = mydesign, 
##     family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~`_ststr`, weights = ~`_cntywt`, 
##     data = dat[is.na(dat$`_cntywt`) == F, ])
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.38910    0.06570  21.143  < 2e-16 ***
## educ0Prim      -0.03814    0.16200  -0.235 0.813864    
## educ1somehs     0.38791    0.12958   2.994 0.002762 ** 
## educ3somecol   -0.12978    0.08042  -1.614 0.106597    
## educ4colgrad   -0.55029    0.07541  -7.297 3.08e-13 ***
## marstcohab      0.07368    0.17360   0.424 0.671281    
## marstdivorced   0.49101    0.08081   6.076 1.26e-09 ***
## marstnm        -0.10347    0.09311  -1.111 0.266468    
## marstseparated  0.52671    0.17562   2.999 0.002711 ** 
## marstwidowed    0.31809    0.09393   3.387 0.000709 ***
## hurtYEShurt     0.24008    0.06767   3.548 0.000390 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 9.964496)
## 
## Number of Fisher Scoring iterations: 6

Fit the Negative Binomial GLM

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
fit.nb2<-glm.nb(poorhealth~educ + marst + hurt,data=mysub, weights=wts)
clx2(fit.nb2,cluster =mysub$strata)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
## 
## z test of coefficients:
## 
##                 Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)     1.382655   0.055104 25.0918 < 2.2e-16 ***
## educ0Prim       0.027460   0.199320  0.1378 0.8904249    
## educ1somehs     0.424025   0.164326  2.5804 0.0098688 ** 
## educ3somecol   -0.110328   0.087055 -1.2673 0.2050388    
## educ4colgrad   -0.547741   0.086053 -6.3652 1.951e-10 ***
## marstcohab     -0.018621   0.127597 -0.1459 0.8839740    
## marstdivorced   0.470488   0.059609  7.8929 2.953e-15 ***
## marstnm        -0.134161   0.067946 -1.9745 0.0483207 *  
## marstseparated  0.522112   0.185305  2.8176 0.0048387 ** 
## marstwidowed    0.304322   0.083030  3.6652 0.0002471 ***
## hurtYEShurt     0.279230   0.056954  4.9027 9.454e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="strata"))
## 
## z test of coefficients:
## 
##                 Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)     1.382655   0.066962 20.6484 < 2.2e-16 ***
## educ0Prim       0.027460   0.171767  0.1599  0.872986    
## educ1somehs     0.424025   0.150068  2.8255  0.004720 ** 
## educ3somecol   -0.110328   0.084630 -1.3037  0.192352    
## educ4colgrad   -0.547741   0.077348 -7.0815 1.426e-12 ***
## marstcohab     -0.018621   0.156907 -0.1187  0.905534    
## marstdivorced   0.470488   0.080806  5.8224 5.800e-09 ***
## marstnm        -0.134161   0.084987 -1.5786  0.114424    
## marstseparated  0.522112   0.187808  2.7800  0.005435 ** 
## marstwidowed    0.304322   0.096142  3.1653  0.001549 ** 
## hurtYEShurt     0.279230   0.068783  4.0596 4.917e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Compare fits of the alternative models using AIC

AIC(fit1)
##       eff.p         AIC    deltabar 
##    409.0190 107891.1454     40.9019
AIC(fit.nb2)
## [1] 63811.99

Based on the AIC, the negative binomial is the best fitting model.