Logistic Regression
Model 0: Logitstic Regression without Bootstrap (Normal)
fit0 <- glm(deginj ~ data$natinj_1 +
data$natinj_2 +
data$natinj_3 +
data$natinj_4 +
data$natinj_5 +
data$natinj_6 +
data$natinj_7 +
data$partbody_1 +
data$partbody_2 +
data$humfact_1 +
data$humfact_2 +
data$task_1 +
data$event_1 +
data$event_2 +
data$event_3 +
data$event_4 +
data$event_5,
data = data, family = "binomial")
summary(fit0)
##
## Call:
## glm(formula = deginj ~ data$natinj_1 + data$natinj_2 + data$natinj_3 +
## data$natinj_4 + data$natinj_5 + data$natinj_6 + data$natinj_7 +
## data$partbody_1 + data$partbody_2 + data$humfact_1 + data$humfact_2 +
## data$task_1 + data$event_1 + data$event_2 + data$event_3 +
## data$event_4 + data$event_5, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.048 -0.316 0.319 0.567 2.971
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0534 0.1579 0.34 0.73529
## data$natinj_1 -0.7336 0.2148 -3.41 0.00064 ***
## data$natinj_2 -1.5962 0.3152 -5.06 4.1e-07 ***
## data$natinj_3 1.8715 0.1210 15.47 < 2e-16 ***
## data$natinj_4 2.4381 0.4988 4.89 1.0e-06 ***
## data$natinj_5 -0.4762 0.1116 -4.27 2.0e-05 ***
## data$natinj_6 0.2633 0.1580 1.67 0.09568 .
## data$natinj_7 2.9815 0.3628 8.22 < 2e-16 ***
## data$partbody_1 1.1658 0.0997 11.69 < 2e-16 ***
## data$partbody_2 -2.9970 0.2252 -13.31 < 2e-16 ***
## data$humfact_1 -0.2607 0.1487 -1.75 0.07964 .
## data$humfact_2 -0.0906 0.0892 -1.02 0.30938
## data$task_1 0.2967 0.0848 3.50 0.00047 ***
## data$event_1 -0.0681 0.1715 -0.40 0.69135
## data$event_2 -0.4351 0.1573 -2.77 0.00566 **
## data$event_3 0.2323 0.1565 1.48 0.13773
## data$event_4 0.1877 0.2480 0.76 0.44899
## data$event_5 0.6826 0.4715 1.45 0.14773
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 3881.7 on 4827 degrees of freedom
## AIC: 3918
##
## Number of Fisher Scoring iterations: 6
confint(fit0)
## 2.5 % 97.5 %
## (Intercept) -0.254 0.365
## data$natinj_1 -1.158 -0.315
## data$natinj_2 -2.249 -1.005
## data$natinj_3 1.636 2.110
## data$natinj_4 1.490 3.448
## data$natinj_5 -0.695 -0.258
## data$natinj_6 -0.045 0.575
## data$natinj_7 2.325 3.763
## data$partbody_1 0.972 1.363
## data$partbody_2 -3.457 -2.572
## data$humfact_1 -0.551 0.032
## data$humfact_2 -0.265 0.084
## data$task_1 0.131 0.463
## data$event_1 -0.405 0.267
## data$event_2 -0.746 -0.129
## data$event_3 -0.077 0.537
## data$event_4 -0.287 0.688
## data$event_5 -0.257 1.601
## odds ratios and 95% CI
exp(cbind(OR = coef(fit0), confint(fit0)))
## OR 2.5 % 97.5 %
## (Intercept) 1.05 0.775 1.441
## data$natinj_1 0.48 0.314 0.730
## data$natinj_2 0.20 0.105 0.366
## data$natinj_3 6.50 5.134 8.251
## data$natinj_4 11.45 4.436 31.434
## data$natinj_5 0.62 0.499 0.773
## data$natinj_6 1.30 0.956 1.777
## data$natinj_7 19.72 10.226 43.058
## data$partbody_1 3.21 2.643 3.907
## data$partbody_2 0.05 0.032 0.076
## data$humfact_1 0.77 0.576 1.033
## data$humfact_2 0.91 0.767 1.088
## data$task_1 1.35 1.140 1.590
## data$event_1 0.93 0.667 1.306
## data$event_2 0.65 0.474 0.879
## data$event_3 1.26 0.926 1.712
## data$event_4 1.21 0.751 1.990
## data$event_5 1.98 0.773 4.957
# Coef * PN
pro0<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit0$coefficients*pro0
## (Intercept) data$natinj_1 data$natinj_2 data$natinj_3
## 0.013 -0.174 -0.379 0.445
## data$natinj_4 data$natinj_5 data$natinj_6 data$natinj_7
## 0.579 -0.113 0.063 0.708
## data$partbody_1 data$partbody_2 data$humfact_1 data$humfact_2
## 0.277 -0.712 -0.062 -0.022
## data$task_1 data$event_1 data$event_2 data$event_3
## 0.070 -0.016 -0.103 0.055
## data$event_4 data$event_5
## 0.045 0.162
fit0_conf<-confint(fit0)
fit0_conf*pro0
## 2.5 % 97.5 %
## (Intercept) -0.060 0.0867
## data$natinj_1 -0.275 -0.0748
## data$natinj_2 -0.534 -0.2387
## data$natinj_3 0.389 0.5012
## data$natinj_4 0.354 0.8189
## data$natinj_5 -0.165 -0.0612
## data$natinj_6 -0.011 0.1366
## data$natinj_7 0.552 0.8936
## data$partbody_1 0.231 0.3237
## data$partbody_2 -0.821 -0.6108
## data$humfact_1 -0.131 0.0076
## data$humfact_2 -0.063 0.0200
## data$task_1 0.031 0.1101
## data$event_1 -0.096 0.0635
## data$event_2 -0.177 -0.0306
## data$event_3 -0.018 0.1276
## data$event_4 -0.068 0.1634
## data$event_5 -0.061 0.3802
# fitted values
fitted.values0<-predict(fit0,type = "response", na.action = na.omit)
plot(roc(fitted.values0, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values0, factor(data$deginj)))
## [1] 0.88
Model 1: Deginj ~ Nature of Injury
fit1 <- glm(deginj ~ data$natinj_1+
data$natinj_2+
data$natinj_3+
data$natinj_4+
data$natinj_5+
data$natinj_6+
data$natinj_7,
data = data, family = "binomial")
summary(fit1)
##
## Call:
## glm(formula = deginj ~ data$natinj_1 + data$natinj_2 + data$natinj_3 +
## data$natinj_4 + data$natinj_5 + data$natinj_6 + data$natinj_7,
## family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.516 -1.034 0.480 0.529 2.704
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0340 0.0723 -0.47 0.6384
## data$natinj_1 -0.5414 0.1733 -3.12 0.0018 **
## data$natinj_2 -3.5960 0.2803 -12.83 < 2e-16 ***
## data$natinj_3 2.1354 0.1061 20.12 < 2e-16 ***
## data$natinj_4 1.9292 0.2224 8.67 < 2e-16 ***
## data$natinj_5 -0.3145 0.0986 -3.19 0.0014 **
## data$natinj_6 0.9460 0.1419 6.67 2.6e-11 ***
## data$natinj_7 3.1549 0.3482 9.06 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 4458.9 on 4837 degrees of freedom
## AIC: 4475
##
## Number of Fisher Scoring iterations: 6
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) -0.18 0.11
## data$natinj_1 -0.89 -0.20
## data$natinj_2 -4.19 -3.08
## data$natinj_3 1.93 2.35
## data$natinj_4 1.51 2.39
## data$natinj_5 -0.51 -0.12
## data$natinj_6 0.67 1.23
## data$natinj_7 2.53 3.91
## odds ratios and 95% CI
exp(cbind(OR = coef(fit1), confint(fit1)))
## OR 2.5 % 97.5 %
## (Intercept) 0.967 0.839 1.114
## data$natinj_1 0.582 0.413 0.815
## data$natinj_2 0.027 0.015 0.046
## data$natinj_3 8.460 6.882 10.434
## data$natinj_4 6.884 4.529 10.866
## data$natinj_5 0.730 0.602 0.886
## data$natinj_6 2.575 1.956 3.413
## data$natinj_7 23.450 12.557 49.959
# Coef * PN
pro1<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit1$coefficients*pro1
## (Intercept) data$natinj_1 data$natinj_2 data$natinj_3 data$natinj_4
## -0.0081 -0.1286 -0.8541 0.5072 0.4582
## data$natinj_5 data$natinj_6 data$natinj_7
## -0.0747 0.2247 0.7493
fit1_conf<-confint(fit1)
fit1_conf*pro1
## 2.5 % 97.5 %
## (Intercept) -0.042 0.026
## data$natinj_1 -0.210 -0.049
## data$natinj_2 -0.995 -0.733
## data$natinj_3 0.458 0.557
## data$natinj_4 0.359 0.567
## data$natinj_5 -0.121 -0.029
## data$natinj_6 0.159 0.292
## data$natinj_7 0.601 0.929
# fitted values
fitted.values1<-predict(fit1,type = "response", na.action = na.omit)
plot(roc(fitted.values1, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values1, factor(data$deginj)))
## [1] 0.84
Model 2: Deginj ~ Part of Body
fit2 <- glm(deginj ~ data$partbody_1+ data$partbody_2,
data = data, family = "binomial")
summary(fit2)
##
## Call:
## glm(formula = deginj ~ data$partbody_1 + data$partbody_2, family = "binomial",
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.844 -0.338 0.635 0.878 2.404
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7550 0.0407 18.53 <2e-16 ***
## data$partbody_1 0.7438 0.0834 8.92 <2e-16 ***
## data$partbody_2 -3.5869 0.1587 -22.60 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 5023.1 on 4842 degrees of freedom
## AIC: 5029
##
## Number of Fisher Scoring iterations: 5
confint(fit2)
## 2.5 % 97.5 %
## (Intercept) 0.68 0.84
## data$partbody_1 0.58 0.91
## data$partbody_2 -3.91 -3.29
## odds ratios and 95% CI
exp(cbind(OR = coef(fit2), confint(fit2)))
## OR 2.5 % 97.5 %
## (Intercept) 2.128 1.97 2.305
## data$partbody_1 2.104 1.79 2.482
## data$partbody_2 0.028 0.02 0.037
# Coef * PN
pro2<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit2$coefficients*pro2
## (Intercept) data$partbody_1 data$partbody_2
## 0.18 0.18 -0.85
fit2_conf<-confint(fit2)
fit2_conf*pro2
## 2.5 % 97.5 %
## (Intercept) 0.16 0.20
## data$partbody_1 0.14 0.22
## data$partbody_2 -0.93 -0.78
# fitted values
fitted.values2<-predict(fit2,type = "response", na.action = na.omit)
plot(roc(fitted.values2, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values2, factor(data$deginj)))
## [1] 0.74
Model 3: Deginj ~ Human Factor
fit3 <- glm(deginj ~ data$humfact_1+ data$humfact_2,
data = data, family = "binomial")
summary(fit3)
##
## Call:
## glm(formula = deginj ~ data$humfact_1 + data$humfact_2, family = "binomial",
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.53 -1.29 0.86 1.07 1.07
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.8043 0.0502 16.02 < 2e-16 ***
## data$humfact_1 -0.5542 0.1090 -5.08 3.7e-07 ***
## data$humfact_2 -0.5524 0.0641 -8.61 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 6392.0 on 4842 degrees of freedom
## AIC: 6398
##
## Number of Fisher Scoring iterations: 4
confint(fit3)
## 2.5 % 97.5 %
## (Intercept) 0.71 0.90
## data$humfact_1 -0.77 -0.34
## data$humfact_2 -0.68 -0.43
## odds ratios and 95% CI
exp(cbind(OR = coef(fit3), confint(fit3)))
## OR 2.5 % 97.5 %
## (Intercept) 2.24 2.03 2.47
## data$humfact_1 0.57 0.46 0.71
## data$humfact_2 0.58 0.51 0.65
# Coef * PN
pro3<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit3$coefficients*pro3
## (Intercept) data$humfact_1 data$humfact_2
## 0.19 -0.13 -0.13
fit3_conf<-confint(fit3)
fit3_conf*pro3
## 2.5 % 97.5 %
## (Intercept) 0.17 0.215
## data$humfact_1 -0.18 -0.081
## data$humfact_2 -0.16 -0.101
# fitted values
fitted.values3<-predict(fit3,type = "response", na.action = na.omit)
plot(roc(fitted.values3, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values3, factor(data$deginj)))
## [1] 0.56
Model 4: Deginj ~ Task Assigned
fit4 <- glm(deginj ~ data$task_1,
data = data, family = "binomial")
summary(fit4)
##
## Call:
## glm(formula = deginj ~ data$task_1, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.47 -1.32 0.91 1.04 1.04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3338 0.0367 9.09 < 2e-16 ***
## data$task_1 0.3344 0.0619 5.40 6.6e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 6442.9 on 4843 degrees of freedom
## AIC: 6447
##
## Number of Fisher Scoring iterations: 4
confint(fit4)
## 2.5 % 97.5 %
## (Intercept) 0.26 0.41
## data$task_1 0.21 0.46
## odds ratios and 95% CI
exp(cbind(OR = coef(fit4), confint(fit4)))
## OR 2.5 % 97.5 %
## (Intercept) 1.4 1.3 1.5
## data$task_1 1.4 1.2 1.6
# Coef * PN
pro4<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit4$coefficients*pro4
## (Intercept) data$task_1
## 0.079 0.079
fit4_conf<-confint(fit4)
fit4_conf*pro4
## 2.5 % 97.5 %
## (Intercept) 0.062 0.096
## data$task_1 0.051 0.108
# fitted values
fitted.values4<-predict(fit4,type = "response", na.action = na.omit)
plot(roc(fitted.values4, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values4, factor(data$deginj)))
## [1] 0.54
Model 5: Deginj ~ Event Type
fit5 <- glm(deginj ~ data$event_1+ data$event_2+ data$event_3+ data$event_4+ data$event_5,
data = data, family = "binomial")
summary(fit5)
##
## Call:
## glm(formula = deginj ~ data$event_1 + data$event_2 + data$event_3 +
## data$event_4 + data$event_5, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.130 -0.971 0.840 1.007 1.399
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7502 0.0831 9.02 < 2e-16 ***
## data$event_1 -1.2568 0.1033 -12.16 < 2e-16 ***
## data$event_2 -0.3352 0.1005 -3.34 0.00085 ***
## data$event_3 0.1116 0.1032 1.08 0.27924
## data$event_4 1.4093 0.2196 6.42 1.4e-10 ***
## data$event_5 0.8717 0.2106 4.14 3.5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6472.5 on 4844 degrees of freedom
## Residual deviance: 6002.5 on 4839 degrees of freedom
## AIC: 6015
##
## Number of Fisher Scoring iterations: 4
confint(fit5)
## 2.5 % 97.5 %
## (Intercept) 0.589 0.91
## data$event_1 -1.461 -1.06
## data$event_2 -0.533 -0.14
## data$event_3 -0.091 0.31
## data$event_4 0.995 1.86
## data$event_5 0.471 1.30
## odds ratios and 95% CI
exp(cbind(OR = coef(fit5), confint(fit5)))
## OR 2.5 % 97.5 %
## (Intercept) 2.12 1.80 2.50
## data$event_1 0.28 0.23 0.35
## data$event_2 0.72 0.59 0.87
## data$event_3 1.12 0.91 1.37
## data$event_4 4.09 2.71 6.42
## data$event_5 2.39 1.60 3.67
# Coef * PN
pro5<-(nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))*(1-nrow(filter(data, deginj==0))/(nrow(filter(data, deginj==0))+nrow(filter(data, deginj==1))))
fit5$coefficients*pro5
## (Intercept) data$event_1 data$event_2 data$event_3 data$event_4
## 0.178 -0.298 -0.080 0.027 0.335
## data$event_5
## 0.207
fit5_conf<-confint(fit5)
fit5_conf*pro5
## 2.5 % 97.5 %
## (Intercept) 0.140 0.217
## data$event_1 -0.347 -0.251
## data$event_2 -0.127 -0.033
## data$event_3 -0.022 0.074
## data$event_4 0.236 0.442
## data$event_5 0.112 0.309
# fitted values
fitted.values5<-predict(fit5,type = "response", na.action = na.omit)
plot(roc(fitted.values5, factor(data$deginj)), col = "red", lwd = 2, main= 'ROC Curve of Predicting Performance')

auc(roc(fitted.values5, factor(data$deginj)))
## [1] 0.67