library(car)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(haven)
dat<-read_xpt("C:/Users/Monica/Documents/CSparks_StatsII/CNTY05.xpt")
a) My count outcome variable will be the self-reporting poor physical health days in the past month. This is the wording in the survey, “During the past 30 days, for about how many days did poor physical or mental health keep you from doing your usual activities, such as self-care, work, or recreation?
b) My research question is as follows: How do threats of violence and acts of violence from one’s intimate partner, marital status and education level affect the number of self-reported poor physical health days?
c) My predictor variables are 1) the threat of violence; 2) the act of violence; 3) marital status; and 4)education level
d) An offset term is necessary because the observations do not have the same level of exposure or risk.
Rename variables
nams<-names(dat)
head(nams, n=10)
## [1] "_CNTYNAM" "_STATE" "FMONTH" "IDATE" "IMONTH" "IDAY"
## [7] "IYEAR" "INTVID" "DISPCODE" "SEQNO"
myvariables<-tolower(names(dat))
names(dat)<-myvariables
Recode for poor mental health (outcome variable), with 1= low number of self-reported poor mental health days, 2= moderate number of self-reported poor mental health days, and 3= high number of self-reported poor mental health days
dat$poorhealth<-recode (dat$poorhlth,recodes= "88=0; 77=NA; 99=NA")
hist(dat$poorhealth)

summary(dat$poorhealth)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 4.32 3.00 30.00 81341
Recode for intimate partner threat (predictor variable)
dat$threat<-recode (dat$ipvthrat, recodes = "7:9=NA; 1='YESthreat'; 2='NOthreat'")
Recode for intimate partner hurt (predictor variable)
dat$hurt<-recode(dat$ipvphhrt, recodes = "7:9=NA; 1='YEShurt'; 2='NOhurt'")
Recode for marital status (predictor variable)
dat$marst<-recode(dat$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
dat$marst<-relevel(dat$marst, ref='married')
Recode for education level
dat$educ<-recode(dat$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor.result=T)
dat$educ<-relevel(dat$educ, ref='2hsgrad')
Create survey design with subset
options (survey.lonely.psu= "adjust")
library (dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dat$strata<-dat$`_ststr`
dat$weight<-dat$`_cntywt`
mysub<-dat%>%
select(threat, hurt, marst, educ, poorhealth, weight, strata)%>%
filter(complete.cases(.))
options (survey.lonely.psu= "adjust")
mydesign<-svydesign(ids = ~1, strata=~strata, weights=~weight, data = mysub)
options(survey.lonely.psu="adjust")
mydesign<-svydesign(ids=~1, strata=~`_ststr`, weights=~`_cntywt`, data= dat[is.na(dat$`_cntywt`)==F,])
Poisson regression model
svyhist(~poorhealth, mydesign)

svyby(~poorhealth, ~educ+marst, mydesign, svymean, na.rm=T)
## educ marst poorhealth se
## 2hsgrad.married 2hsgrad married 4.069918 0.15006838
## 0Prim.married 0Prim married 5.616256 0.77534379
## 1somehs.married 1somehs married 4.753525 0.34769098
## 3somecol.married 3somecol married 3.527889 0.13921007
## 4colgrad.married 4colgrad married 2.398252 0.08182773
## 2hsgrad.cohab 2hsgrad cohab 3.215482 0.41226458
## 0Prim.cohab 0Prim cohab 2.815098 0.71515707
## 1somehs.cohab 1somehs cohab 5.174254 1.50698845
## 3somecol.cohab 3somecol cohab 2.980839 0.33898022
## 4colgrad.cohab 4colgrad cohab 2.416472 0.26800120
## 2hsgrad.divorced 2hsgrad divorced 5.893915 0.34953037
## 0Prim.divorced 0Prim divorced 7.775936 1.67000048
## 1somehs.divorced 1somehs divorced 7.728887 0.75608184
## 3somecol.divorced 3somecol divorced 5.240588 0.25411632
## 4colgrad.divorced 4colgrad divorced 4.292953 0.24932354
## 2hsgrad.nm 2hsgrad nm 3.416436 0.20398341
## 0Prim.nm 0Prim nm 3.810604 0.70835439
## 1somehs.nm 1somehs nm 4.170520 0.44238739
## 3somecol.nm 3somecol nm 3.389757 0.19225051
## 4colgrad.nm 4colgrad nm 2.385224 0.11651013
## 2hsgrad.separated 2hsgrad separated 6.824594 0.75061220
## 0Prim.separated 0Prim separated 4.571044 1.09218044
## 1somehs.separated 1somehs separated 7.380729 1.20354147
## 3somecol.separated 3somecol separated 6.818606 1.05091741
## 4colgrad.separated 4colgrad separated 4.797955 0.71387902
## 2hsgrad.widowed 2hsgrad widowed 5.826150 0.29683858
## 0Prim.widowed 0Prim widowed 9.166265 1.38831079
## 1somehs.widowed 1somehs widowed 6.939729 0.57991900
## 3somecol.widowed 3somecol widowed 5.386401 0.37225798
## 4colgrad.widowed 4colgrad widowed 4.012188 0.33553341
svyby(~poorhealth, ~hurt+threat, mydesign, svymean, na.rm=T)
## hurt threat poorhealth se
## NOhurt.NOthreat NOhurt NOthreat 3.606539 0.1441362
## YEShurt.NOthreat YEShurt NOthreat 4.261640 0.5620292
## NOhurt.YESthreat NOhurt YESthreat 3.921826 0.7701679
## YEShurt.YESthreat YEShurt YESthreat 5.414269 0.3215802
Poisson glm fit to survey data
fit1<-svyglm(poorhealth~educ + marst + hurt, design=mydesign, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = poorhealth ~ educ + marst + hurt, design = mydesign,
## family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~`_ststr`, weights = ~`_cntywt`,
## data = dat[is.na(dat$`_cntywt`) == F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.38910 0.06570 21.143 < 2e-16 ***
## educ0Prim -0.03814 0.16200 -0.235 0.813864
## educ1somehs 0.38791 0.12958 2.994 0.002762 **
## educ3somecol -0.12978 0.08042 -1.614 0.106597
## educ4colgrad -0.55029 0.07541 -7.297 3.08e-13 ***
## marstcohab 0.07368 0.17360 0.424 0.671281
## marstdivorced 0.49101 0.08081 6.076 1.26e-09 ***
## marstnm -0.10347 0.09311 -1.111 0.266468
## marstseparated 0.52671 0.17562 2.999 0.002711 **
## marstwidowed 0.31809 0.09393 3.387 0.000709 ***
## hurtYEShurt 0.24008 0.06767 3.548 0.000390 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 9.964496)
##
## Number of Fisher Scoring iterations: 6
Poisson model “risk ratios”
round(exp(summary(fit1)$coef[-1,1]), 3)
## educ0Prim educ1somehs educ3somecol educ4colgrad marstcohab
## 0.963 1.474 0.878 0.577 1.076
## marstdivorced marstnm marstseparated marstwidowed hurtYEShurt
## 1.634 0.902 1.693 1.375 1.271
Based on the results above, people with less than a HS degree had 47% higher mean counts of poor health days than those with a HS degree. People who were divorced or separated had 60% higher mean counts of poor health days than those who are married. People who were hurt by their intimate partner, had 27% higher mean counts of poor health days than those who were not hurt by their intimate partner.
Check for overdispersion
fit2<-glm(poorhealth~ educ +marst +hurt, data=dat, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = poorhealth ~ educ + marst + hurt, family = poisson,
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.8021 -2.8600 -2.3040 -0.3231 9.9941
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.408518 0.008270 170.308 < 2e-16 ***
## educ0Prim 0.356645 0.020164 17.687 < 2e-16 ***
## educ1somehs 0.323785 0.013445 24.082 < 2e-16 ***
## educ3somecol -0.108043 0.009681 -11.160 < 2e-16 ***
## educ4colgrad -0.432376 0.010242 -42.217 < 2e-16 ***
## marstcohab -0.164591 0.024037 -6.847 7.52e-12 ***
## marstdivorced 0.376403 0.010075 37.361 < 2e-16 ***
## marstnm -0.007879 0.011169 -0.705 0.481
## marstseparated 0.369696 0.020802 17.772 < 2e-16 ***
## marstwidowed 0.280114 0.012436 22.524 < 2e-16 ***
## hurtYEShurt 0.303407 0.008590 35.321 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 190006 on 16128 degrees of freedom
## Residual deviance: 181422 on 16118 degrees of freedom
## (155554 observations deleted due to missingness)
## AIC: 205853
##
## Number of Fisher Scoring iterations: 6
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 3.354979
1-pchisq(fit2$deviance, df= fit2$df.residual)
## [1] 0
fit3<-glm(poorhealth~educ + marst + hurt, data=dat, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = poorhealth ~ educ + marst + hurt, family = quasipoisson,
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.8021 -2.8600 -2.3040 -0.3231 9.9941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.408518 0.033094 42.561 < 2e-16 ***
## educ0Prim 0.356645 0.080689 4.420 9.93e-06 ***
## educ1somehs 0.323785 0.053802 6.018 1.80e-09 ***
## educ3somecol -0.108043 0.038739 -2.789 0.00529 **
## educ4colgrad -0.432376 0.040983 -10.550 < 2e-16 ***
## marstcohab -0.164591 0.096185 -1.711 0.08707 .
## marstdivorced 0.376403 0.040314 9.337 < 2e-16 ***
## marstnm -0.007879 0.044694 -0.176 0.86007
## marstseparated 0.369696 0.083239 4.441 9.00e-06 ***
## marstwidowed 0.280114 0.049764 5.629 1.84e-08 ***
## hurtYEShurt 0.303407 0.034373 8.827 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 16.01235)
##
## Null deviance: 190006 on 16128 degrees of freedom
## Residual deviance: 181422 on 16118 degrees of freedom
## (155554 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
Negative Binomial Model
clx2 <- function(fm, dfcw, cluster){
require(sandwich);require(lmtest)
if(class(fm)=="zeroinfl"|class(fm)=="hurdle") {
M <- length(unique(cluster))
N <- length(cluster)
K <- dim(fm$vcov)[1]
dfc <- (M/(M-1))*((N-1)/(N-K))
uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum, na.rm=T));
vcovCL <- dfc[1]*sandwich(fm, meat=crossprod(uj)/N)*dfcw
list(summary=coeftest(fm, vcovCL))}
else if(class(fm)!="zeroinfl"){
M <- length(unique(cluster))
N <- length(cluster)
K <- fm$rank
dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
uj <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum, na.rm=T));
rcse.cov <- dfc * sandwich(fm, meat = crossprod(uj)/N)
rcse.se <- coeftest(fm, rcse.cov)
return(list( rcse.se))}
}
mysub$wts<-mysub$weight/mean(mysub$weight, na.rm=T)
fit.pois<-glm(poorhealth~ educ + marst + hurt, data=mysub, weights=wts, family=poisson)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library (sandwich)
clx2(fit.pois,cluster=mysub$strata)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.390246 0.054497 25.5105 < 2.2e-16 ***
## educ0Prim -0.038996 0.197125 -0.1978 0.8431840
## educ1somehs 0.385069 0.143362 2.6860 0.0072314 **
## educ3somecol -0.131394 0.079033 -1.6625 0.0964091 .
## educ4colgrad -0.550142 0.084526 -6.5086 7.588e-11 ***
## marstcohab 0.073360 0.147647 0.4969 0.6192876
## marstdivorced 0.489678 0.059747 8.1959 2.487e-16 ***
## marstnm -0.105209 0.070562 -1.4910 0.1359574
## marstseparated 0.526804 0.178992 2.9432 0.0032488 **
## marstwidowed 0.319234 0.084079 3.7969 0.0001465 ***
## hurtYEShurt 0.240300 0.051700 4.6480 3.352e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#coeftest(fit.pois, vcov=vcovHC(fit.pois, type="HC1",cluster=dat$strata))
summary(fit1)
##
## Call:
## svyglm(formula = poorhealth ~ educ + marst + hurt, design = mydesign,
## family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~`_ststr`, weights = ~`_cntywt`,
## data = dat[is.na(dat$`_cntywt`) == F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.38910 0.06570 21.143 < 2e-16 ***
## educ0Prim -0.03814 0.16200 -0.235 0.813864
## educ1somehs 0.38791 0.12958 2.994 0.002762 **
## educ3somecol -0.12978 0.08042 -1.614 0.106597
## educ4colgrad -0.55029 0.07541 -7.297 3.08e-13 ***
## marstcohab 0.07368 0.17360 0.424 0.671281
## marstdivorced 0.49101 0.08081 6.076 1.26e-09 ***
## marstnm -0.10347 0.09311 -1.111 0.266468
## marstseparated 0.52671 0.17562 2.999 0.002711 **
## marstwidowed 0.31809 0.09393 3.387 0.000709 ***
## hurtYEShurt 0.24008 0.06767 3.548 0.000390 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 9.964496)
##
## Number of Fisher Scoring iterations: 6
Fit the Negative Binomial GLM
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.nb2<-glm.nb(poorhealth~educ + marst + hurt,data=mysub, weights=wts)
clx2(fit.nb2,cluster =mysub$strata)
## Warning in if (class(fm) == "zeroinfl" | class(fm) == "hurdle") {: the
## condition has length > 1 and only the first element will be used
## Warning in if (class(fm) != "zeroinfl") {: the condition has length > 1 and
## only the first element will be used
## [[1]]
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.382655 0.055104 25.0918 < 2.2e-16 ***
## educ0Prim 0.027460 0.199320 0.1378 0.8904249
## educ1somehs 0.424025 0.164326 2.5804 0.0098688 **
## educ3somecol -0.110328 0.087055 -1.2673 0.2050388
## educ4colgrad -0.547741 0.086053 -6.3652 1.951e-10 ***
## marstcohab -0.018621 0.127597 -0.1459 0.8839740
## marstdivorced 0.470488 0.059609 7.8929 2.953e-15 ***
## marstnm -0.134161 0.067946 -1.9745 0.0483207 *
## marstseparated 0.522112 0.185305 2.8176 0.0048387 **
## marstwidowed 0.304322 0.083030 3.6652 0.0002471 ***
## hurtYEShurt 0.279230 0.056954 4.9027 9.454e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="strata"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.382655 0.066962 20.6484 < 2.2e-16 ***
## educ0Prim 0.027460 0.171767 0.1599 0.872986
## educ1somehs 0.424025 0.150068 2.8255 0.004720 **
## educ3somecol -0.110328 0.084630 -1.3037 0.192352
## educ4colgrad -0.547741 0.077348 -7.0815 1.426e-12 ***
## marstcohab -0.018621 0.156907 -0.1187 0.905534
## marstdivorced 0.470488 0.080806 5.8224 5.800e-09 ***
## marstnm -0.134161 0.084987 -1.5786 0.114424
## marstseparated 0.522112 0.187808 2.7800 0.005435 **
## marstwidowed 0.304322 0.096142 3.1653 0.001549 **
## hurtYEShurt 0.279230 0.068783 4.0596 4.917e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compare fits of the alternative models using AIC
AIC(fit1)
## eff.p AIC deltabar
## 409.0190 107891.1454 40.9019
AIC(fit.nb2)
## [1] 63811.99
Based on the AIC, the negative binomial is the best fitting model.