1 input data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta3 <- read.csv("victims.csv")

dta3 <- mutate(dta3, 
               Race = factor(Race))
str(dta3)
## 'data.frame':    1308 obs. of  2 variables:
##  $ nV  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Race: Factor w/ 2 levels "Black","White": 2 2 2 2 2 2 2 2 2 2 ...

2 descriptive analysis

t1 <- table(dta3$nV, dta3$Race)

# ftable(mytable)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kbl(t1)%>%
  kable_styling(bootstrap_options = c("striped", "hover")
              , full_width = F)%>%
  add_header_above(c("Number of Victims", "Race" = 2))
Number of Victims
Race
Black White
0 119 1070
1 16 60
2 12 14
3 7 4
4 3 0
5 2 0
6 0 1

3 mean and variance

knitr::kable(aggregate(nV ~ Race, data=dta3, FUN=mean))
Race nV
Black 0.5220126
White 0.0922541
knitr::kable(aggregate(nV ~ Race, data=dta3, FUN=var))
Race nV
Black 1.1498288
White 0.1552448

4 Parameter estimates

sjPlot::tab_model(m0 <- glm(nV~ Race, data=dta3, 
                            family=poisson(link=log)), show.se=T, show.r2=F, show.obs=F)
  nV
Predictors Incidence Rate Ratios std. Error CI p
(Intercept) 0.52 0.11 0.42 – 0.64 <0.001
Race [White] 0.18 0.15 0.13 – 0.24 <0.001

5 Goodness of fit and overdispersion

1 - pchisq(deviance(m0), df.residual(m0))
## [1] 1
performance::check_overdispersion(m0)
## # Overdispersion test
## 
##        dispersion ratio =    1.746
##   Pearson's Chi-Squared = 2279.873
##                 p-value =  < 0.001
## Overdispersion detected.
plot(log(fitted(m0)), log((dta3$nV-fitted(m0))^2), 
     xlab=expression(hat(mu)),
     ylab=expression((y-hat(mu))^2))
grid()
abline(0, 1, col=2)

6 Dispersion parameter

knitr::kable(as.data.frame(summary(m0, dispersion=1.7462)$coef))
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.6500636 0.1450466 -4.481757 7.4e-06
RaceWhite -1.7331446 0.1936804 -8.948479 0.0e+00

7 Testing for terms in the model with dispersion

drop1(m0, test="F")
## Warning in drop1.glm(m0, test = "F"): F test assumes 'quasipoisson' family
## Single term deletions
## 
## Model:
## nV ~ Race
##        Df Deviance    AIC F value    Pr(>F)    
## <none>      844.71 1122.0                      
## Race    1   962.80 1238.1  182.58 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8 Stepwise by AIC

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
MASS::stepAIC(glm.nb(nV ~ Race, data=dta3))
## Start:  AIC=999.8
## nV ~ Race
## 
##        Df    AIC
## <none>     999.8
## - Race  1 1049.4
## 
## Call:  glm.nb(formula = nV ~ Race, data = dta3, init.theta = 0.2023119205, 
##     link = log)
## 
## Coefficients:
## (Intercept)    RaceWhite  
##     -0.6501      -1.7331  
## 
## Degrees of Freedom: 1307 Total (i.e. Null);  1306 Residual
## Null Deviance:       471.6 
## Residual Deviance: 412.6     AIC: 1002

9

sjPlot::tab_model(m3 <- glm.nb(nV ~ Race, data=dta3), show.se=T, show.r2=F, show.obs=F, transform=NULL)
  nV
Predictors Log-Mean std. Error CI p
(Intercept) -0.65 0.21 -1.05 – -0.23 0.002
Race [White] -1.73 0.24 -2.21 – -1.27 <0.001