DEM 7283 Homework 4

Using ECLS-K data, I will assess the relationship between number of spanks in the last week and poverty status in children in the United States, using parent age, parent education, parent employment status, parent race/ethnicity, and child sex as control variables.

I will not use an offset term because all respondents had equal exposure to the outcome variable.

#load required packages
library(car)

## Loading required package: carData

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(sjPlot)

## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

## Learn more about sjPlot with 'browseVignettes("sjPlot")'.

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)

#load eclsk data
load("~/OneDrive/UTSA/7283 Stats II/eclsk_k5.Rdata")

#extract variables for assignment
mydat<-c("childid", "x_chsex_r", "p2spank", "x2povty", "x2par1age", "x12par1ed_i", "x2par1rac", "x1par1emp", "w2p0", "w2p0str", "w2p0psu")

eclsk<-data.frame(eclskk5[,mydat])

rm(eclskk5)

Recode Variables

#recode number of spanks in the last week
eclsk$spanks<-Recode(eclsk$p2spank, recodes = "-9:-7=NA; 95=NA; else=eclsk$p2spank")

#recode poverty status
eclsk$pov<-Recode(eclsk$x2povty, recodes ="1=1; 2:3=0; -9=NA", as.factor=TRUE)

#recode age
eclsk$p_age<-cut(eclsk$x2par1age, breaks=c(0,29,39,49,59,77))

#parent 1 education
eclsk$educ<-Recode(eclsk$x12par1ed_i, recodes = "1='0prim'; 2='1somehs'; 3='2hsgrad'; 4:5='3somecol'; 6:9='4colgrad'; -9=NA", as.factor=TRUE)
eclsk$educ<-relevel(eclsk$educ, ref='2hsgrad')

#parent employment
eclsk$no_work<-Recode(eclsk$x1par1emp, recodes= "1:2='working'; 3:4='not working'; -9=NA", as.factor=TRUE)
eclsk$no_work<-relevel(eclsk$no_work, ref='working')

#race
eclsk$p_raceth<-Recode(eclsk$x2par1rac, recodes="1='nh white'; 2='nh black'; 3:4='hispanic'; 5:7='nh other'; 8='nh multirace'; -9=NA", as.factor=TRUE)
eclsk$p_raceth<-relevel(eclsk$p_raceth, ref='nh white')

#recode sex
eclsk$male<-Recode(eclsk$x_chsex_r, recodes = "1=1; 2=0; -9=NA", as.factor=TRUE)

Survey Design

hist(eclsk$spanks)

summary(eclsk$spanks)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.32    0.00   30.00    5921

sub<-eclsk%>%
  select(spanks, pov, p_age, educ, no_work, p_raceth, male, w2p0, w2p0str, w2p0psu) %>%
  filter( complete.cases(.))

#survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~w2p0str, weights=~w2p0, data =sub )

Poisson Regression Descriptives

svyhist(~spanks, des)

#svyby(~spanks, ~pov+p_age+educ+no_work+p_raceth+male, des, svymean, na.rm=T)
svyby(~spanks, ~p_raceth, des, svymean, na.rm=T)

##                  p_raceth    spanks         se
## nh white         nh white 0.2687673 0.01347558
## hispanic         hispanic 0.3348648 0.02516819
## nh black         nh black 0.4949787 0.03571524
## nh multirace nh multirace 0.5834319 0.13184670
## nh other         nh other 0.3377578 0.04587759

svyby(~spanks, ~educ+p_raceth, des, svymean, na.rm=T)

##                           educ     p_raceth    spanks         se
## 2hsgrad.nh white       2hsgrad     nh white 0.3537165 0.03880669
## 0prim.nh white           0prim     nh white 0.2963972 0.17356116
## 1somehs.nh white       1somehs     nh white 0.5024347 0.08429973
## 3somecol.nh white     3somecol     nh white 0.2895105 0.02162280
## 4colgrad.nh white     4colgrad     nh white 0.1989401 0.01925597
## 2hsgrad.hispanic       2hsgrad     hispanic 0.3611144 0.04113573
## 0prim.hispanic           0prim     hispanic 0.2606573 0.03330845
## 1somehs.hispanic       1somehs     hispanic 0.4032762 0.04950619
## 3somecol.hispanic     3somecol     hispanic 0.3916752 0.07935732
## 4colgrad.hispanic     4colgrad     hispanic 0.1830080 0.03674134
## 2hsgrad.nh black       2hsgrad     nh black 0.5248289 0.07241001
## 0prim.nh black           0prim     nh black 0.5624828 0.41355533
## 1somehs.nh black       1somehs     nh black 0.8720341 0.17595411
## 3somecol.nh black     3somecol     nh black 0.4418735 0.04704044
## 4colgrad.nh black     4colgrad     nh black 0.4159876 0.07430782
## 2hsgrad.nh multirace   2hsgrad nh multirace 0.4190412 0.16940925
## 1somehs.nh multirace   1somehs nh multirace 1.9407365 0.74039547
## 3somecol.nh multirace 3somecol nh multirace 0.6024193 0.22343482
## 4colgrad.nh multirace 4colgrad nh multirace 0.2017352 0.07043891
## 2hsgrad.nh other       2hsgrad     nh other 0.2826851 0.08444640
## 0prim.nh other           0prim     nh other 0.4666520 0.25816092
## 1somehs.nh other       1somehs     nh other 0.4718945 0.35763991
## 3somecol.nh other     3somecol     nh other 0.3055967 0.08496077
## 4colgrad.nh other     4colgrad     nh other 0.3476994 0.05976437

svyby(~spanks, ~p_age, des, svymean, na.rm=T)

##           p_age    spanks         se
## (0,29]   (0,29] 0.4556227 0.02500889
## (29,39] (29,39] 0.2824246 0.01429291
## (39,49] (39,49] 0.2340990 0.02387768
## (49,59] (49,59] 0.2924590 0.10057166
## (59,77] (59,77] 0.2125407 0.08306490

svyby(~spanks, ~male, des, svymean, na.rm=T)

##   male    spanks         se
## 0    0 0.2945032 0.01608986
## 1    1 0.3392501 0.01528557

svyby(~spanks, ~no_work, des, svymean, na.rm=T)

##                 no_work    spanks         se
## working         working 0.3167839 0.01478560
## not working not working 0.3193031 0.01630169

svyby(~spanks, ~pov, des, svymean, na.rm=T)

##   pov    spanks         se
## 0   0 0.2756922 0.01168046
## 1   1 0.4478861 0.02717166

#I was curious to see each individual variable

Poisson GLM

#Poisson glm fit to survey data
fit1<-svyglm(spanks~factor(pov)+factor(p_age)+factor(educ)+factor(no_work)+factor(p_raceth)+factor(male), design=des, family=poisson)
summary(fit1)

## 
## Call:
## svyglm(formula = spanks ~ factor(pov) + factor(p_age) + factor(educ) + 
##     factor(no_work) + factor(p_raceth) + factor(male), design = des, 
##     family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~w2p0str, weights = ~w2p0, data = sub)
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.026318   0.123866  -8.286  < 2e-16 ***
## factor(pov)1                  0.238328   0.092206   2.585 0.009760 ** 
## factor(p_age)(29,39]         -0.275927   0.082941  -3.327 0.000882 ***
## factor(p_age)(39,49]         -0.414735   0.119107  -3.482 0.000500 ***
## factor(p_age)(49,59]         -0.348998   0.350653  -0.995 0.319626    
## factor(p_age)(59,77]         -0.713187   0.412026  -1.731 0.083497 .  
## factor(educ)0prim            -0.241114   0.165720  -1.455 0.145715    
## factor(educ)1somehs           0.263534   0.119471   2.206 0.027418 *  
## factor(educ)3somecol         -0.066678   0.097833  -0.682 0.495542    
## factor(educ)4colgrad         -0.301120   0.109104  -2.760 0.005792 ** 
## factor(no_work)not working   -0.119219   0.072923  -1.635 0.102111    
## factor(p_raceth)hispanic      0.005121   0.106544   0.048 0.961669    
## factor(p_raceth)nh black      0.398764   0.093485   4.266 2.01e-05 ***
## factor(p_raceth)nh multirace  0.611519   0.226157   2.704 0.006864 ** 
## factor(p_raceth)nh other      0.258660   0.146547   1.765 0.077591 .  
## factor(male)1                 0.145737   0.071240   2.046 0.040811 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 3.326954)
## 
## Number of Fisher Scoring iterations: 6

scale1<-sqrt(fit1$deviance/fit1$df.residual)
scale1

## [1] 1.112089

1-pchisq(fit1$deviance, df = fit1$df.residual)

## [1] 0

#poisson model "risk ratios"
round(exp(summary(fit1)$coef[-1,1]), 3)

##                 factor(pov)1         factor(p_age)(29,39] 
##                        1.269                        0.759 
##         factor(p_age)(39,49]         factor(p_age)(49,59] 
##                        0.661                        0.705 
##         factor(p_age)(59,77]            factor(educ)0prim 
##                        0.490                        0.786 
##          factor(educ)1somehs         factor(educ)3somecol 
##                        1.302                        0.935 
##         factor(educ)4colgrad   factor(no_work)not working 
##                        0.740                        0.888 
##     factor(p_raceth)hispanic     factor(p_raceth)nh black 
##                        1.005                        1.490 
## factor(p_raceth)nh multirace     factor(p_raceth)nh other 
##                        1.843                        1.295 
##                factor(male)1 
##                        1.157

#check for dispersion
fit2<-glm(spanks~factor(pov)+factor(p_age)+factor(educ)+factor(no_work)+factor(p_raceth)+factor(male), data=sub, family=poisson)
summary(fit2)

## 
## Call:
## glm(formula = spanks ~ factor(pov) + factor(p_age) + factor(educ) + 
##     factor(no_work) + factor(p_raceth) + factor(male), family = poisson, 
##     data = sub)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4532  -0.8296  -0.7126  -0.5991  14.0132  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -1.02733    0.05735 -17.914  < 2e-16 ***
## factor(pov)1                  0.27009    0.04540   5.949 2.70e-09 ***
## factor(p_age)(29,39]         -0.30955    0.04248  -7.288 3.15e-13 ***
## factor(p_age)(39,49]         -0.49146    0.05969  -8.233  < 2e-16 ***
## factor(p_age)(49,59]         -0.39161    0.16316  -2.400  0.01639 *  
## factor(p_age)(59,77]         -0.73040    0.33522  -2.179  0.02934 *  
## factor(educ)0prim            -0.15522    0.10450  -1.485  0.13744    
## factor(educ)1somehs           0.25566    0.06682   3.826  0.00013 ***
## factor(educ)3somecol         -0.01786    0.04916  -0.363  0.71632    
## factor(educ)4colgrad         -0.24810    0.05672  -4.374 1.22e-05 ***
## factor(no_work)not working   -0.18103    0.03896  -4.647 3.37e-06 ***
## factor(p_raceth)hispanic      0.03236    0.05262   0.615  0.53862    
## factor(p_raceth)nh black      0.36113    0.05241   6.891 5.55e-12 ***
## factor(p_raceth)nh multirace  0.52114    0.11114   4.689 2.74e-06 ***
## factor(p_raceth)nh other      0.26450    0.06744   3.922 8.78e-05 ***
## factor(male)1                 0.19482    0.03614   5.391 7.02e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 12671  on 9746  degrees of freedom
## Residual deviance: 12190  on 9731  degrees of freedom
## AIC: 16211
## 
## Number of Fisher Scoring iterations: 6

scale<-sqrt(fit2$deviance/fit2$df.residual)
scale

## [1] 1.119256

1-pchisq(fit2$deviance, df = fit2$df.residual)

## [1] 0

#Oh no! My model does not fit the data. Poisson model is not a good chioce :'(

Modeling Overdistribution via a Quasi distribution

fit3<-glm(spanks~factor(pov)+factor(p_age)+factor(educ)+factor(no_work)+factor(p_raceth)+factor(male), data=sub, family=quasipoisson)
summary(fit3)

## 
## Call:
## glm(formula = spanks ~ factor(pov) + factor(p_age) + factor(educ) + 
##     factor(no_work) + factor(p_raceth) + factor(male), family = quasipoisson, 
##     data = sub)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4532  -0.8296  -0.7126  -0.5991  14.0132  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.02733    0.10673  -9.625  < 2e-16 ***
## factor(pov)1                  0.27009    0.08449   3.197 0.001395 ** 
## factor(p_age)(29,39]         -0.30955    0.07905  -3.916 9.07e-05 ***
## factor(p_age)(39,49]         -0.49146    0.11109  -4.424 9.79e-06 ***
## factor(p_age)(49,59]         -0.39161    0.30365  -1.290 0.197192    
## factor(p_age)(59,77]         -0.73040    0.62387  -1.171 0.241724    
## factor(educ)0prim            -0.15522    0.19448  -0.798 0.424818    
## factor(educ)1somehs           0.25566    0.12436   2.056 0.039832 *  
## factor(educ)3somecol         -0.01786    0.09149  -0.195 0.845197    
## factor(educ)4colgrad         -0.24810    0.10556  -2.350 0.018780 *  
## factor(no_work)not working   -0.18103    0.07250  -2.497 0.012545 *  
## factor(p_raceth)hispanic      0.03236    0.09794   0.330 0.741105    
## factor(p_raceth)nh black      0.36113    0.09753   3.703 0.000215 ***
## factor(p_raceth)nh multirace  0.52114    0.20684   2.520 0.011767 *  
## factor(p_raceth)nh other      0.26450    0.12551   2.107 0.035110 *  
## factor(male)1                 0.19482    0.06726   2.897 0.003781 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 3.463639)
## 
##     Null deviance: 12671  on 9746  degrees of freedom
## Residual deviance: 12190  on 9731  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

#check for dispersion
scale<-sqrt(fit3$deviance/fit3$df.residual)
scale

## [1] 1.119256

1-pchisq(fit3$deviance, df = fit3$df.residual)

## [1] 0

##Still not a good fit :<

Negative Binomial

coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="w2p0str"))

## 
## z test of coefficients:
## 
##                               Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                  -1.027334   0.110232 -9.3197 < 2.2e-16 ***
## factor(pov)1                  0.270091   0.085857  3.1458  0.001656 ** 
## factor(p_age)(29,39]         -0.309554   0.076837 -4.0287 5.608e-05 ***
## factor(p_age)(39,49]         -0.491463   0.111964 -4.3895 1.136e-05 ***
## factor(p_age)(49,59]         -0.391612   0.302594 -1.2942  0.195602    
## factor(p_age)(59,77]         -0.730402   0.384232 -1.9009  0.057310 .  
## factor(educ)0prim            -0.155221   0.156657 -0.9908  0.321766    
## factor(educ)1somehs           0.255665   0.111875  2.2853  0.022297 *  
## factor(educ)3somecol         -0.017864   0.089071 -0.2006  0.841044    
## factor(educ)4colgrad         -0.248096   0.096977 -2.5583  0.010519 *  
## factor(no_work)not working   -0.181029   0.066418 -2.7256  0.006418 ** 
## factor(p_raceth)hispanic      0.032359   0.102445  0.3159  0.752107    
## factor(p_raceth)nh black      0.361129   0.085681  4.2148 2.500e-05 ***
## factor(p_raceth)nh multirace  0.521145   0.196482  2.6524  0.007993 ** 
## factor(p_raceth)nh other      0.264497   0.143458  1.8437  0.065223 .  
## factor(male)1                 0.194823   0.066828  2.9153  0.003554 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit Negative Binomial GLM

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

fit.nb1<-glm.nb(spanks~factor(pov),
              data=sub,
              weights=w2p0/mean(w2p0, na.rm=T))
fit.nb2<-glm.nb(spanks~factor(pov)+factor(p_age)+factor(educ)+factor(no_work)+factor(p_raceth)+factor(male),
              data=sub,
              weights=w2p0/mean(w2p0, na.rm=T))

#clx2(fit.nb2,cluster =sub$ststr)
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="w2p0str")) 
tests2<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="w2p0str")) 

library(stargazer)
stargazer(fit.nb1, fit.nb2,style="demography", type ="text", t.auto=F,p.auto=F,coef=list(tests1[,1],tests2[,1]),  se =list(tests1[, 2], tests2[, 2]), p=list(tests1[,4],tests2[,4]))

## 
## --------------------------------------------------------------
##                                           spanks              
##                                  Model 1          Model 2     
## --------------------------------------------------------------
## factor(pov)1                     0.485***          0.234*     
##                                  (0.095)          (0.095)     
## factor(p_age)(29,39]                             -0.301***    
##                                                   (0.084)     
## factor(p_age)(39,49]                             -0.447***    
##                                                   (0.124)     
## factor(p_age)(49,59]                               -0.311     
##                                                   (0.382)     
## factor(p_age)(59,77]                               -0.642     
##                                                   (0.404)     
## factor(educ)0prim                                  -0.294     
##                                                   (0.164)     
## factor(educ)1somehs                                0.206      
##                                                   (0.119)     
## factor(educ)3somecol                               -0.073     
##                                                   (0.100)     
## factor(educ)4colgrad                              -0.325**    
##                                                   (0.112)     
## factor(no_work)not working                        -0.150*     
##                                                   (0.075)     
## factor(p_raceth)hispanic                           0.038      
##                                                   (0.105)     
## factor(p_raceth)nh black                          0.431***    
##                                                   (0.097)     
## factor(p_raceth)nh multirace                       0.558*     
##                                                   (0.249)     
## factor(p_raceth)nh other                           0.311*     
##                                                   (0.143)     
## factor(male)1                                      0.146*     
##                                                   (0.074)     
## Constant                        -1.288***        -0.993***    
##                                  (0.126)          (0.126)     
## N                                 9,747            9,747      
## Log Likelihood                  -6,597.576       -6,533.293   
## theta                        0.216*** (0.010) 0.232*** (0.011)
## AIC                             13,199.150       13,098.590   
## --------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001

Zero Inflated for fun

library(pscl)

## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

fit4<-zeroinfl(spanks~factor(pov)+factor(p_age)+factor(educ)+factor(no_work)+factor(p_raceth)+factor(male), dist='poisson', data=sub)

summary(fit4)

## 
## Call:
## zeroinfl(formula = spanks ~ factor(pov) + factor(p_age) + factor(educ) + 
##     factor(no_work) + factor(p_raceth) + factor(male), data = sub, 
##     dist = "poisson")
## 
## Pearson residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7140 -0.4163 -0.3507 -0.2999 32.4725 
## 
## Count model coefficients (poisson with log link):
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   0.379253   0.077071   4.921 8.62e-07 ***
## factor(pov)1                  0.184389   0.062313   2.959 0.003086 ** 
## factor(p_age)(29,39]         -0.220749   0.057442  -3.843 0.000122 ***
## factor(p_age)(39,49]         -0.105643   0.079714  -1.325 0.185081    
## factor(p_age)(49,59]         -0.025266   0.217339  -0.116 0.907454    
## factor(p_age)(59,77]         -1.123434   0.768448  -1.462 0.143754    
## factor(educ)0prim            -0.173027   0.151185  -1.144 0.252427    
## factor(educ)1somehs           0.196497   0.088148   2.229 0.025803 *  
## factor(educ)3somecol          0.068036   0.067423   1.009 0.312932    
## factor(educ)4colgrad          0.105932   0.078972   1.341 0.179794    
## factor(no_work)not working   -0.208989   0.054290  -3.849 0.000118 ***
## factor(p_raceth)hispanic     -0.035710   0.071954  -0.496 0.619685    
## factor(p_raceth)nh black     -0.111896   0.072078  -1.552 0.120559    
## factor(p_raceth)nh multirace  0.285578   0.136984   2.085 0.037091 *  
## factor(p_raceth)nh other      0.299159   0.088689   3.373 0.000743 ***
## factor(male)1                 0.001744   0.049423   0.035 0.971846    
## 
## Zero-inflation model coefficients (binomial with logit link):
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   1.103521   0.108742  10.148  < 2e-16 ***
## factor(pov)1                 -0.124968   0.088463  -1.413 0.157757    
## factor(p_age)(29,39]          0.124089   0.081900   1.515 0.129741    
## factor(p_age)(39,49]          0.491769   0.107073   4.593 4.37e-06 ***
## factor(p_age)(49,59]          0.463923   0.288772   1.607 0.108156    
## factor(p_age)(59,77]         -0.691963   1.373320  -0.504 0.614359    
## factor(educ)0prim            -0.004847   0.208477  -0.023 0.981451    
## factor(educ)1somehs          -0.082317   0.132795  -0.620 0.535336    
## factor(educ)3somecol          0.128398   0.095052   1.351 0.176752    
## factor(educ)4colgrad          0.484764   0.105383   4.600 4.22e-06 ***
## factor(no_work)not working   -0.034221   0.074014  -0.462 0.643822    
## factor(p_raceth)hispanic     -0.092286   0.098564  -0.936 0.349112    
## factor(p_raceth)nh black     -0.696672   0.109207  -6.379 1.78e-10 ***
## factor(p_raceth)nh multirace -0.265920   0.218449  -1.217 0.223486    
## factor(p_raceth)nh other      0.018925   0.120417   0.157 0.875119    
## factor(male)1                -0.255319   0.067482  -3.783 0.000155 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Number of iterations in BFGS optimization: 39 
## Log-likelihood: -6840 on 32 Df

stargazer(fit1, fit.nb2, fit4, style="io", type="text", ci=T)

## 
## -------------------------------------------------------------------------------
##                                                    spanks                      
##                              survey-weighted      negative      zero-inflated  
##                                  Poisson          binomial        count data   
##                                    (1)              (2)              (3)       
## -------------------------------------------------------------------------------
## FACTOR(POV)1                     0.238***         0.234***         0.184***    
##                               (0.058, 0.419)   (0.089, 0.379)   (0.062, 0.307) 
## FACTOR(P_AGE)(29,39]            -0.276***        -0.301***        -0.221***    
##                              (-0.438, -0.113) (-0.436, -0.166) (-0.333, -0.108)
## FACTOR(P_AGE)(39,49]            -0.415***        -0.447***          -0.106     
##                              (-0.648, -0.181) (-0.625, -0.269) (-0.262, 0.051) 
## FACTOR(P_AGE)(49,59]              -0.349           -0.311           -0.025     
##                              (-1.036, 0.338)  (-0.790, 0.169)  (-0.451, 0.401) 
## FACTOR(P_AGE)(59,77]             -0.713*           -0.642           -1.123     
##                              (-1.521, 0.094)  (-1.554, 0.269)  (-2.630, 0.383) 
## FACTOR(EDUC)0PRIM                 -0.241          -0.294*           -0.173     
##                              (-0.566, 0.084)  (-0.613, 0.025)  (-0.469, 0.123) 
## FACTOR(EDUC)1SOMEHS              0.264**           0.206*          0.196**     
##                               (0.029, 0.498)  (-0.021, 0.434)   (0.024, 0.369) 
## FACTOR(EDUC)3SOMECOL              -0.067           -0.073           0.068      
##                              (-0.258, 0.125)  (-0.226, 0.080)  (-0.064, 0.200) 
## FACTOR(EDUC)4COLGRAD            -0.301***        -0.325***          0.106      
##                              (-0.515, -0.087) (-0.496, -0.154) (-0.049, 0.261) 
## FACTOR(NO_WORK)NOT WORKING        -0.119          -0.150**        -0.209***    
##                              (-0.262, 0.024)  (-0.269, -0.030) (-0.315, -0.103)
## FACTOR(P_RACETH)HISPANIC          0.005            0.038            -0.036     
##                              (-0.204, 0.214)  (-0.124, 0.200)  (-0.177, 0.105) 
## FACTOR(P_RACETH)NH BLACK         0.399***         0.431***          -0.112     
##                               (0.216, 0.582)   (0.264, 0.598)  (-0.253, 0.029) 
## FACTOR(P_RACETH)NH MULTIRACE     0.612***         0.558***         0.286**     
##                               (0.168, 1.055)   (0.156, 0.961)   (0.017, 0.554) 
## FACTOR(P_RACETH)NH OTHER          0.259*          0.311**          0.299***    
##                              (-0.029, 0.546)   (0.059, 0.562)   (0.125, 0.473) 
## FACTOR(MALE)1                    0.146**          0.146***          0.002      
##                               (0.006, 0.285)   (0.036, 0.257)  (-0.095, 0.099) 
## CONSTANT                        -1.026***        -0.993***         0.379***    
##                              (-1.269, -0.784) (-1.171, -0.814)  (0.228, 0.530) 
## Observations                      9,747            9,747            9,747      
## Log likelihood                  -7,990.895       -6,533.293       -6,839.602   
## theta                                         0.232*** (0.011)                 
## Akaike information criterion    16,013.790       13,098.590                    
## -------------------------------------------------------------------------------
## Notes:                       ***p < .01; **p < .05; *p < .1

Based on the AIC values, the best fitted model is fit1, but all models returned a chisquare value of 0 meaning the models do not fit the data. If looking at negative binomial models only, the best fit is fit.nb2

In this model, we see that those in poverty had higher mean counts of spanks given to their children in the past week compared to respondents who are not in poverty. We also see that those aged 29-49 had lower mean counts than parents aged 19-28. College graduates also had lower mean counts of spanks compared to their High Schoo counterparts. Non-Hispanic blacks had higher spank counts compared to Non-Hispanic Whites. Last, male children were more likely to have higher spank counts than female children.

Check fit using AIC

AIC(fit1)

##        eff.p          AIC     deltabar 
##    53.636166 12054.198402     3.575744

AIC(fit2)

## [1] 16211.33

fit3$deviance+2*length(fit3$coefficients)

## [1] 12222.36

AIC(fit.nb1)

## [1] 13199.15

AIC(fit.nb2)

## [1] 13098.59

AIC(fit4)

## [1] 13743.2

summary(fit.nb2)

## 
## Call:
## glm.nb(formula = spanks ~ factor(pov) + factor(p_age) + factor(educ) + 
##     factor(no_work) + factor(p_raceth) + factor(male), data = sub, 
##     weights = w2p0/mean(w2p0, na.rm = T), init.theta = 0.2321516258, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5281  -0.6518  -0.5425  -0.4020   6.6761  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -0.99273    0.09099 -10.910  < 2e-16 ***
## factor(pov)1                  0.23400    0.07419   3.154 0.001610 ** 
## factor(p_age)(29,39]         -0.30075    0.06895  -4.362 1.29e-05 ***
## factor(p_age)(39,49]         -0.44719    0.09094  -4.918 8.76e-07 ***
## factor(p_age)(49,59]         -0.31074    0.24457  -1.271 0.203883    
## factor(p_age)(59,77]         -0.64209    0.46504  -1.381 0.167358    
## factor(educ)0prim            -0.29386    0.16262  -1.807 0.070753 .  
## factor(educ)1somehs           0.20609    0.11604   1.776 0.075736 .  
## factor(educ)3somecol         -0.07326    0.07801  -0.939 0.347662    
## factor(educ)4colgrad         -0.32509    0.08709  -3.733 0.000189 ***
## factor(no_work)not working   -0.14964    0.06081  -2.461 0.013855 *  
## factor(p_raceth)hispanic      0.03778    0.08262   0.457 0.647481    
## factor(p_raceth)nh black      0.43055    0.08519   5.054 4.33e-07 ***
## factor(p_raceth)nh multirace  0.55834    0.20531   2.720 0.006538 ** 
## factor(p_raceth)nh other      0.31056    0.12838   2.419 0.015561 *  
## factor(male)1                 0.14638    0.05648   2.592 0.009553 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.2322) family taken to be 1)
## 
##     Null deviance: 4962.2  on 9746  degrees of freedom
## Residual deviance: 4768.0  on 9731  degrees of freedom
## AIC: 13099
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.2322 
##           Std. Err.:  0.0105 
## 
##  2 x log-likelihood:  -13064.5860