library(dplyr)
library(DAAG)
library(ggplot2)
library(texreg)
library(visreg)
library(DAAG)
data(nassCDS)
head(nassCDS)
## dvcat weight dead airbag seatbelt frontal sex ageOFocc yearacc yearVeh
## 1 25-39 25.069 alive none belted 1 f 26 1997 1990
## 2 10-24 25.069 alive airbag belted 1 f 72 1997 1995
## 3 10-24 32.379 alive none none 1 f 69 1997 1988
## 4 25-39 495.444 alive airbag belted 1 f 53 1997 1995
## 5 25-39 25.069 alive none belted 1 f 32 1997 1988
## 6 40-54 25.069 alive none belted 1 f 22 1997 1985
## abcat occRole deploy injSeverity caseid
## 1 unavail driver 0 3 2:3:1
## 2 deploy driver 1 1 2:3:2
## 3 unavail driver 0 4 2:5:1
## 4 deploy driver 1 1 2:10:1
## 5 unavail driver 0 3 2:11:1
## 6 unavail driver 0 3 2:11:2
summary(nassCDS)
## dvcat weight dead airbag
## 1-9km/h: 686 Min. : 0.00 alive:25037 none :11798
## 10-24 :12848 1st Qu.: 32.47 dead : 1180 airbag:14419
## 25-39 : 8214 Median : 86.99
## 40-54 : 2977 Mean : 462.81
## 55+ : 1492 3rd Qu.: 364.72
## Max. :57871.59
##
## seatbelt frontal sex ageOFocc yearacc
## none : 7644 Min. :0.0000 f:12248 Min. :16.00 Min. :1997
## belted:18573 1st Qu.:0.0000 m:13969 1st Qu.:22.00 1st Qu.:1998
## Median :1.0000 Median :33.00 Median :2000
## Mean :0.6433 Mean :37.21 Mean :2000
## 3rd Qu.:1.0000 3rd Qu.:48.00 3rd Qu.:2001
## Max. :1.0000 Max. :97.00 Max. :2002
##
## yearVeh abcat occRole deploy
## Min. :1953 Length:26217 Length:26217 Min. :0.000
## 1st Qu.:1989 Class :character Class :character 1st Qu.:0.000
## Median :1994 Mode :character Mode :character Median :0.000
## Mean :1993 Mean :0.337
## 3rd Qu.:1997 3rd Qu.:1.000
## Max. :2003 Max. :1.000
## NA's :1
## injSeverity caseid
## Min. :0.000 Length:26217
## 1st Qu.:1.000 Class :character
## Median :2.000 Mode :character
## Mean :1.716
## 3rd Qu.:3.000
## Max. :6.000
## NA's :153
dim(nassCDS)
## [1] 26217 15
nassCDS <- nassCDS %>%
mutate(dead1 = as.integer(dead))
nassCDS <- nassCDS %>%
select(seatbelt, dead, dead1, dvcat, sex, ageOFocc, everything())
head(nassCDS)
## seatbelt dead dead1 dvcat sex ageOFocc weight airbag frontal yearacc
## 1 belted alive 1 25-39 f 26 25.069 none 1 1997
## 2 belted alive 1 10-24 f 72 25.069 airbag 1 1997
## 3 none alive 1 10-24 f 69 32.379 none 1 1997
## 4 belted alive 1 25-39 f 53 495.444 airbag 1 1997
## 5 belted alive 1 25-39 f 32 25.069 none 1 1997
## 6 belted alive 1 40-54 f 22 25.069 none 1 1997
## yearVeh abcat occRole deploy injSeverity caseid
## 1 1990 unavail driver 0 3 2:3:1
## 2 1995 deploy driver 1 1 2:3:2
## 3 1988 unavail driver 0 4 2:5:1
## 4 1995 deploy driver 1 1 2:10:1
## 5 1988 unavail driver 0 3 2:11:1
## 6 1985 unavail driver 0 3 2:11:2
nassCDS <- nassCDS %>%
mutate(alive = sjmisc::rec(dead1, rec = "2=0; 1=1")) %>%
select(seatbelt, dead, alive, dvcat, sex, ageOFocc, everything()) %>%
select(-dead1)
head(nassCDS)
## seatbelt dead alive dvcat sex ageOFocc weight airbag frontal yearacc
## 1 belted alive 1 25-39 f 26 25.069 none 1 1997
## 2 belted alive 1 10-24 f 72 25.069 airbag 1 1997
## 3 none alive 1 10-24 f 69 32.379 none 1 1997
## 4 belted alive 1 25-39 f 53 495.444 airbag 1 1997
## 5 belted alive 1 25-39 f 32 25.069 none 1 1997
## 6 belted alive 1 40-54 f 22 25.069 none 1 1997
## yearVeh abcat occRole deploy injSeverity caseid
## 1 1990 unavail driver 0 3 2:3:1
## 2 1995 deploy driver 1 1 2:3:2
## 3 1988 unavail driver 0 4 2:5:1
## 4 1995 deploy driver 1 1 2:10:1
## 5 1988 unavail driver 0 3 2:11:1
## 6 1985 unavail driver 0 3 2:11:2
m1 <- glm(alive ~ seatbelt, family = binomial, data = nassCDS)
summary(m1)
##
## Call:
## glm(formula = alive ~ seatbelt, family = binomial, data = nassCDS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6888 0.2336 0.2336 0.4317 0.4317
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.32642 0.04018 57.90 <2e-16 ***
## seatbeltbelted 1.26115 0.06058 20.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9624.2 on 26216 degrees of freedom
## Residual deviance: 9189.5 on 26215 degrees of freedom
## AIC: 9193.5
##
## Number of Fisher Scoring iterations: 6
m2 <- glm(alive ~ seatbelt + dvcat + sex + ageOFocc, family = binomial, data = nassCDS)
summary(m2)
##
## Call:
## glm(formula = alive ~ seatbelt + dvcat + sex + ageOFocc, family = binomial,
## data = nassCDS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3929 0.0988 0.1570 0.2695 1.8442
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.062647 0.150677 26.963 <2e-16 ***
## seatbeltbelted 0.958208 0.066530 14.403 <2e-16 ***
## dvcat.L -3.577705 0.370022 -9.669 <2e-16 ***
## dvcat.Q -0.294910 0.314068 -0.939 0.348
## dvcat.C 0.305418 0.196955 1.551 0.121
## dvcat^4 -0.124516 0.097356 -1.279 0.201
## sexm -0.036597 0.067080 -0.546 0.585
## ageOFocc -0.033226 0.001694 -19.618 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9624.2 on 26216 degrees of freedom
## Residual deviance: 7290.4 on 26209 degrees of freedom
## AIC: 7306.4
##
## Number of Fisher Scoring iterations: 7
m3<- glm(alive ~ seatbelt*dvcat + sex + ageOFocc + airbag, family = binomial, data = nassCDS)
summary(m3)
##
## Call:
## glm(formula = alive ~ seatbelt * dvcat + sex + ageOFocc + airbag,
## family = binomial, data = nassCDS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.5772 0.0715 0.1570 0.2815 1.7809
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.708246 0.152510 24.315 < 2e-16 ***
## seatbeltbelted 3.733637 32.114165 0.116 0.9074
## dvcat.L -2.423593 0.378794 -6.398 1.57e-10 ***
## dvcat.Q -0.758326 0.322960 -2.348 0.0189 *
## dvcat.C 0.343084 0.208395 1.646 0.0997 .
## dvcat^4 -0.104156 0.114099 -0.913 0.3613
## sexm -0.037756 0.066939 -0.564 0.5727
## ageOFocc -0.033224 0.001686 -19.711 < 2e-16 ***
## airbagairbag 0.059794 0.066099 0.905 0.3657
## seatbeltbelted:dvcat.L -9.073584 101.553787 -0.089 0.9288
## seatbeltbelted:dvcat.Q 6.556057 85.828638 0.076 0.9391
## seatbeltbelted:dvcat.C -3.394311 50.777095 -0.067 0.9467
## seatbeltbelted:dvcat^4 1.205024 19.192373 0.063 0.9499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9624.2 on 26216 degrees of freedom
## Residual deviance: 7228.7 on 26204 degrees of freedom
## AIC: 7254.7
##
## Number of Fisher Scoring iterations: 16
Model 3 has the lowest deviance and perhaps it is the best fit to data. Lower values of BIC and AIC indicate better fit.
library(texreg)
htmlreg(list(m1, m2, m3))
| Model 1 | Model 2 | Model 3 | ||
|---|---|---|---|---|
| (Intercept) | 2.33*** | 4.06*** | 3.71*** | |
| (0.04) | (0.15) | (0.15) | ||
| seatbeltbelted | 1.26*** | 0.96*** | 3.73 | |
| (0.06) | (0.07) | (32.11) | ||
| dvcat.L | -3.58*** | -2.42*** | ||
| (0.37) | (0.38) | |||
| dvcat.Q | -0.29 | -0.76* | ||
| (0.31) | (0.32) | |||
| dvcat.C | 0.31 | 0.34 | ||
| (0.20) | (0.21) | |||
| dvcat^4 | -0.12 | -0.10 | ||
| (0.10) | (0.11) | |||
| sexm | -0.04 | -0.04 | ||
| (0.07) | (0.07) | |||
| ageOFocc | -0.03*** | -0.03*** | ||
| (0.00) | (0.00) | |||
| airbagairbag | 0.06 | |||
| (0.07) | ||||
| seatbeltbelted:dvcat.L | -9.07 | |||
| (101.55) | ||||
| seatbeltbelted:dvcat.Q | 6.56 | |||
| (85.83) | ||||
| seatbeltbelted:dvcat.C | -3.39 | |||
| (50.78) | ||||
| seatbeltbelted:dvcat^4 | 1.21 | |||
| (19.19) | ||||
| AIC | 9193.54 | 7306.43 | 7254.70 | |
| BIC | 9209.89 | 7371.82 | 7360.97 | |
| Log Likelihood | -4594.77 | -3645.21 | -3614.35 | |
| Deviance | 9189.54 | 7290.43 | 7228.70 | |
| Num. obs. | 26217 | 26217 | 26217 | |
| p < 0.001, p < 0.01, p < 0.05 | ||||
anova(m1, m2, m3, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: alive ~ seatbelt
## Model 2: alive ~ seatbelt + dvcat + sex + ageOFocc
## Model 3: alive ~ seatbelt * dvcat + sex + ageOFocc + airbag
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 26215 9189.5
## 2 26209 7290.4 6 1899.11 < 2.2e-16 ***
## 3 26204 7228.7 5 61.72 5.347e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(visreg)
visreg(m3, "seatbelt", scale = "response")
## Conditions used in construction of plot
## dvcat: 10-24
## sex: m
## ageOFocc: 33
## airbag: airbag
library(visreg)
visreg(m3, "dvcat", scale = "response")
## Conditions used in construction of plot
## seatbelt: belted
## sex: m
## ageOFocc: 33
## airbag: airbag
library(visreg)
visreg(m3, "ageOFocc", scale = "response")
## Conditions used in construction of plot
## seatbelt: belted
## dvcat: 10-24
## sex: m
## airbag: airbag
Having seat belts fasten increases your chances to survive a crash:
visreg(m3, "dvcat", by = "seatbelt", scale = "response")
Seat belts significantly increase the chances to survive, especially with the increase of age of the victim:
visreg(m3, "seatbelt", by = "ageOFocc", scale = "response")
With the increase of speed of driving and age, the chances of death in a car crash increases:
visreg(m3, "ageOFocc", by = "dvcat", scale = "response")