First I had to Clean the data as best I could. The dataset below had 181K observations and 135 variables.
gtd <- read.csv("C:/Users/Christian/Desktop/R/gtd/gtd.csv", header=FALSE, row.names=1)
gtd15k <- gtd[166692:181692, ]
gtd15k <- gtd[c( 1 ,9 ,20 ,22 ,29 ,30 ,35 ,36 ,38, 59 ,72 ,99 ,102 ,105 ,108 ,119) ]
gtd.small <- gtd15k[c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) ]
names(gtd.small) <- c("date", "country", "LowCrit", "HighCrit", "AType", "Target", "T2", "TGroup", "Claimed", "NKilled", "PropertyIn", "NWounded", "PropValue", "RansomPaid")
gtd1 <- gtd.small[1:500, ]
gtd2 <- gtd.small[3500:4000, ]
gtd3 <- gtd.small[14500:15000, ]
gtd.fit1 = glm(NKilled~country+NWounded+AType+RansomPaid, family=binomial("logit"), data=gtd1 )
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(gtd.fit1)
##
## Call:
## glm(formula = NKilled ~ country + NWounded + AType + RansomPaid,
## family = binomial("logit"), data = gtd1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.35927 -0.00001 -0.00001 0.00000 2.35482
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value
## (Intercept) -3.992e+01 2.920e+04 -0.001
## country10 -1.388e+00 3.451e+04 0.000
## country11 -3.078e+00 7.353e+04 0.000
## country2 -3.153e+00 4.967e+04 0.000
## country3 1.827e+01 1.542e+04 0.001
## country4 1.390e+01 9.299e+04 0.000
## country5 1.180e+01 1.018e+05 0.000
## country8 1.830e-01 2.513e+04 0.000
## country9 1.262e+01 5.190e+04 0.000
## countryregion 6.448e+01 1.342e+05 0.000
## NWounded0 1.311e+01 2.682e+04 0.000
## NWounded1 1.380e+01 5.196e+04 0.000
## NWounded2 1.422e+01 7.671e+04 0.000
## NWounded4 -2.809e+00 1.328e+05 0.000
## NWoundednkillus NA NA NA
## ATypeAssassination -1.475e+00 3.658e+04 0.000
## ATypeattacktype1_txt NA NA NA
## ATypeBombing/Explosion 3.027e+00 2.888e+04 0.000
## ATypeFacility/Infrastructure Attack 2.175e+00 2.410e+04 0.000
## ATypeHijacking -1.422e+01 6.241e+04 0.000
## ATypeHostage Taking (Barricade Incident) -8.380e-01 7.080e+04 0.000
## ATypeHostage Taking (Kidnapping) 1.893e+01 2.476e+04 0.001
## ATypeUnarmed Assault 2.238e+00 1.344e+05 0.000
## ATypeUnknown -8.091e+00 1.252e+05 0.000
## RansomPaid2 2.436e-01 4.568e+04 0.000
## RansomPaid3 -7.756e-01 1.550e+04 0.000
## RansomPaid4 4.743e-02 2.434e+04 0.000
## RansomPaidpropextent NA NA NA
## Pr(>|z|)
## (Intercept) 0.999
## country10 1.000
## country11 1.000
## country2 1.000
## country3 0.999
## country4 1.000
## country5 1.000
## country8 1.000
## country9 1.000
## countryregion 1.000
## NWounded0 1.000
## NWounded1 1.000
## NWounded2 1.000
## NWounded4 1.000
## NWoundednkillus NA
## ATypeAssassination 1.000
## ATypeattacktype1_txt NA
## ATypeBombing/Explosion 1.000
## ATypeFacility/Infrastructure Attack 1.000
## ATypeHijacking 1.000
## ATypeHostage Taking (Barricade Incident) 1.000
## ATypeHostage Taking (Kidnapping) 0.999
## ATypeUnarmed Assault 1.000
## ATypeUnknown 1.000
## RansomPaid2 1.000
## RansomPaid3 1.000
## RansomPaid4 1.000
## RansomPaidpropextent NA
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 26.0778 on 499 degrees of freedom
## Residual deviance: 7.4813 on 475 degrees of freedom
## AIC: 57.481
##
## Number of Fisher Scoring iterations: 23
gtd.fit2 = glm(NKilled~country+NWounded+AType+RansomPaid, family=binomial("logit"), data=gtd2 )
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(gtd.fit2)
##
## Call:
## glm(formula = NKilled ~ country + NWounded + AType + RansomPaid,
## family = binomial("logit"), data = gtd2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.44270 -0.12861 -0.00001 0.00000 3.09814
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -5.845e+01 1.773e+04 -0.003
## country10 1.877e+01 8.555e+03 0.002
## country11 -1.294e+00 4.263e+04 0.000
## country2 -2.202e+00 2.713e+04 0.000
## country3 -2.286e+00 1.923e+04 0.000
## country4 1.631e+01 1.316e+05 0.000
## country5 -1.010e+00 3.903e+04 0.000
## country6 1.631e+01 1.316e+05 0.000
## country8 1.629e+01 8.555e+03 0.002
## NWounded0 -2.848e+00 2.447e+04 0.000
## NWounded2 -2.229e+01 1.310e+05 0.000
## ATypeAssassination 3.740e+01 1.553e+04 0.002
## ATypeBombing/Explosion 1.757e+01 1.254e+04 0.001
## ATypeFacility/Infrastructure Attack -3.835e+01 2.295e+04 -0.002
## ATypeHijacking 1.893e+01 6.864e+04 0.000
## ATypeHostage Taking (Barricade Incident) 1.759e+01 1.319e+05 0.000
## ATypeHostage Taking (Kidnapping) 1.879e+01 2.751e+04 0.001
## ATypeUnarmed Assault 1.759e+01 1.319e+05 0.000
## ATypeUnknown -1.337e+00 8.572e+04 0.000
## RansomPaid2 6.069e+01 1.747e+04 0.003
## RansomPaid3 1.979e+01 9.153e+03 0.002
## RansomPaid4 5.431e-01 1.536e+04 0.000
## Pr(>|z|)
## (Intercept) 0.997
## country10 0.998
## country11 1.000
## country2 1.000
## country3 1.000
## country4 1.000
## country5 1.000
## country6 1.000
## country8 0.998
## NWounded0 1.000
## NWounded2 1.000
## ATypeAssassination 0.998
## ATypeBombing/Explosion 0.999
## ATypeFacility/Infrastructure Attack 0.999
## ATypeHijacking 1.000
## ATypeHostage Taking (Barricade Incident) 1.000
## ATypeHostage Taking (Kidnapping) 0.999
## ATypeUnarmed Assault 1.000
## ATypeUnknown 1.000
## RansomPaid2 0.997
## RansomPaid3 0.998
## RansomPaid4 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 36.690 on 500 degrees of freedom
## Residual deviance: 18.234 on 479 degrees of freedom
## AIC: 62.234
##
## Number of Fisher Scoring iterations: 23
gtd.fit3 = glm(NKilled~country+NWounded+AType+RansomPaid, family=binomial("logit"), data=gtd3 )
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(gtd.fit3)
##
## Call:
## glm(formula = NKilled ~ country + NWounded + AType + RansomPaid,
## family = binomial("logit"), data = gtd3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9631 -0.3827 -0.3319 -0.1689 2.7224
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -1.977e+01 2.958e+03 -0.007
## country10 1.520e+01 2.958e+03 0.005
## country11 -3.995e-01 4.011e+03 0.000
## country12 -2.696e-02 1.115e+04 0.000
## country2 1.690e+01 2.958e+03 0.006
## country3 1.530e+01 2.958e+03 0.005
## country5 -4.700e-01 5.614e+03 0.000
## country6 -3.721e-01 6.801e+03 0.000
## country8 1.677e+01 2.958e+03 0.006
## NWounded0 -1.620e+01 2.208e+03 -0.007
## ATypeAssassination 2.318e-01 8.080e-01 0.287
## ATypeBombing/Explosion 7.942e-01 5.374e-01 1.478
## ATypeFacility/Infrastructure Attack 4.374e-01 1.132e+00 0.386
## ATypeHijacking -1.538e+01 3.278e+03 -0.005
## ATypeHostage Taking (Barricade Incident) -1.645e+01 4.156e+03 -0.004
## ATypeHostage Taking (Kidnapping) 8.731e-01 7.371e-01 1.184
## ATypeUnarmed Assault -3.651e-01 7.918e+03 0.000
## ATypeUnknown 2.050e+00 8.265e-01 2.480
## RansomPaid2 -1.691e+01 7.329e+03 -0.002
## RansomPaid3 -5.804e-01 6.567e-01 -0.884
## RansomPaid4 2.940e-01 4.847e-01 0.606
## Pr(>|z|)
## (Intercept) 0.9947
## country10 0.9959
## country11 0.9999
## country12 1.0000
## country2 0.9954
## country3 0.9959
## country5 0.9999
## country6 1.0000
## country8 0.9955
## NWounded0 0.9941
## ATypeAssassination 0.7742
## ATypeBombing/Explosion 0.1394
## ATypeFacility/Infrastructure Attack 0.6993
## ATypeHijacking 0.9963
## ATypeHostage Taking (Barricade Incident) 0.9968
## ATypeHostage Taking (Kidnapping) 0.2362
## ATypeUnarmed Assault 1.0000
## ATypeUnknown 0.0131 *
## RansomPaid2 0.9982
## RansomPaid3 0.3768
## RansomPaid4 0.5442
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.09 on 500 degrees of freedom
## Residual deviance: 204.18 on 480 degrees of freedom
## AIC: 246.18
##
## Number of Fisher Scoring iterations: 18
gtd.fit4 = glm(NKilled~country+NWounded, family=binomial("logit"), data=gtd3 )
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(gtd.fit4)
##
## Call:
## glm(formula = NKilled ~ country + NWounded, family = binomial("logit"),
## data = gtd3)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4241 -0.4125 -0.4125 -0.2074 2.7749
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -18.3211 1824.8841 -0.010 0.992
## country10 14.4925 1824.8843 0.008 0.994
## country11 -0.1514 2469.3360 0.000 1.000
## country12 -0.2449 6773.1098 0.000 1.000
## country2 15.8996 1824.8841 0.009 0.993
## country3 14.7516 1824.8842 0.008 0.994
## country5 -0.2449 3440.8087 0.000 1.000
## country6 -0.2449 4184.7111 0.000 1.000
## country8 15.9579 1824.8841 0.009 0.993
## NWounded0 -15.7861 1341.9390 -0.012 0.991
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 227.09 on 500 degrees of freedom
## Residual deviance: 214.21 on 491 degrees of freedom
## AIC: 234.21
##
## Number of Fisher Scoring iterations: 17
coef.gtd1 = coef(gtd.fit1)
gtd1.odds = exp(coef.gtd1)
gtd1.probs = gtd1.odds/(1+gtd1.odds)
coef.all1 = cbind(coef.gtd1, gtd1.odds, gtd1.probs)
coef.all1
## coef.gtd1 gtd1.odds
## (Intercept) -39.91554602 4.622731e-18
## country10 -1.38766297 2.496581e-01
## country11 -3.07805881 4.604856e-02
## country2 -3.15349100 4.270279e-02
## country3 18.27306126 8.627593e+07
## country4 13.90400237 1.092525e+06
## country5 11.79647228 1.327831e+05
## country8 0.18297478 1.200784e+00
## country9 12.62039757 3.026698e+05
## countryregion 64.48161440 1.009275e+28
## NWounded0 13.11140235 4.945494e+05
## NWounded1 13.80251877 9.870922e+05
## NWounded2 14.22040466 1.499144e+06
## NWounded4 -2.80934712 6.024431e-02
## NWoundednkillus NA NA
## ATypeAssassination -1.47509953 2.287560e-01
## ATypeattacktype1_txt NA NA
## ATypeBombing/Explosion 3.02690699 2.063331e+01
## ATypeFacility/Infrastructure Attack 2.17525047 8.804390e+00
## ATypeHijacking -14.21659988 6.695902e-07
## ATypeHostage Taking (Barricade Incident) -0.83796566 4.325897e-01
## ATypeHostage Taking (Kidnapping) 18.93443455 1.671554e+08
## ATypeUnarmed Assault 2.23807515 9.375268e+00
## ATypeUnknown -8.09131883 3.061857e-04
## RansomPaid2 0.24363130 1.275874e+00
## RansomPaid3 -0.77560994 4.604229e-01
## RansomPaid4 0.04743091 1.048574e+00
## RansomPaidpropextent NA NA
## gtd1.probs
## (Intercept) 4.622731e-18
## country10 1.997811e-01
## country11 4.402144e-02
## country2 4.095394e-02
## country3 1.000000e+00
## country4 9.999991e-01
## country5 9.999925e-01
## country8 5.456165e-01
## country9 9.999967e-01
## countryregion 1.000000e+00
## NWounded0 9.999980e-01
## NWounded1 9.999990e-01
## NWounded2 9.999993e-01
## NWounded4 5.682116e-02
## NWoundednkillus NA
## ATypeAssassination 1.861687e-01
## ATypeattacktype1_txt NA
## ATypeBombing/Explosion 9.537750e-01
## ATypeFacility/Infrastructure Attack 8.980049e-01
## ATypeHijacking 6.695898e-07
## ATypeHostage Taking (Barricade Incident) 3.019634e-01
## ATypeHostage Taking (Kidnapping) 1.000000e+00
## ATypeUnarmed Assault 9.036169e-01
## ATypeUnknown 3.060920e-04
## RansomPaid2 5.606083e-01
## RansomPaid3 3.152668e-01
## RansomPaid4 5.118555e-01
## RansomPaidpropextent NA
coef.gtd2 = coef(gtd.fit2)
gtd2.odds = exp(coef.gtd2)
gtd2.probs = gtd2.odds/(1+gtd2.odds)
coef.all2 = cbind(coef.gtd2, gtd2.odds, gtd2.probs)
coef.all2
## coef.gtd2 gtd2.odds
## (Intercept) -58.4468010 4.138823e-26
## country10 18.7718294 1.420698e+08
## country11 -1.2940117 2.741687e-01
## country2 -2.2016820 1.106169e-01
## country3 -2.2861165 1.016605e-01
## country4 16.3131054 1.215323e+07
## country5 -1.0095380 3.643873e-01
## country6 16.3131054 1.215323e+07
## country8 16.2945578 1.192989e+07
## NWounded0 -2.8481011 5.795426e-02
## NWounded2 -22.2925865 2.081866e-10
## ATypeAssassination 37.4014896 1.750897e+16
## ATypeBombing/Explosion 17.5676271 4.261118e+07
## ATypeFacility/Infrastructure Attack -38.3535989 2.204163e-17
## ATypeHijacking 18.9313586 1.666420e+08
## ATypeHostage Taking (Barricade Incident) 17.5861746 4.340888e+07
## ATypeHostage Taking (Kidnapping) 18.7889411 1.445218e+08
## ATypeUnarmed Assault 17.5861746 4.340888e+07
## ATypeUnknown -1.3375016 2.625007e-01
## RansomPaid2 60.6925079 2.282555e+26
## RansomPaid3 19.7936648 3.947112e+08
## RansomPaid4 0.5431458 1.721414e+00
## gtd2.probs
## (Intercept) 4.138823e-26
## country10 1.000000e+00
## country11 2.151746e-01
## country2 9.959955e-02
## country3 9.227933e-02
## country4 9.999999e-01
## country5 2.670703e-01
## country6 9.999999e-01
## country8 9.999999e-01
## NWounded0 5.477955e-02
## NWounded2 2.081866e-10
## ATypeAssassination 1.000000e+00
## ATypeBombing/Explosion 1.000000e+00
## ATypeFacility/Infrastructure Attack 2.204163e-17
## ATypeHijacking 1.000000e+00
## ATypeHostage Taking (Barricade Incident) 1.000000e+00
## ATypeHostage Taking (Kidnapping) 1.000000e+00
## ATypeUnarmed Assault 1.000000e+00
## ATypeUnknown 2.079212e-01
## RansomPaid2 1.000000e+00
## RansomPaid3 1.000000e+00
## RansomPaid4 6.325439e-01
coef.gtd3 = coef(gtd.fit3)
gtd3.odds = exp(coef.gtd3)
gtd3.probs = gtd3.odds/(1+gtd3.odds)
coef.all3 = cbind(coef.gtd3, gtd3.odds, gtd3.probs)
coef.all3
## coef.gtd3 gtd3.odds
## (Intercept) -19.77088607 2.591870e-09
## country10 15.19877339 3.987892e+06
## country11 -0.39953612 6.706311e-01
## country12 -0.02695575 9.734043e-01
## country2 16.89971911 2.185017e+07
## country3 15.29584520 4.394416e+06
## country5 -0.46999566 6.250050e-01
## country6 -0.37210029 6.892851e-01
## country8 16.77413140 1.927138e+07
## NWounded0 -16.20418880 9.175088e-08
## ATypeAssassination 0.23177330 1.260834e+00
## ATypeBombing/Explosion 0.79423604 2.212750e+00
## ATypeFacility/Infrastructure Attack 0.43743169 1.548724e+00
## ATypeHijacking -15.37746616 2.097255e-07
## ATypeHostage Taking (Barricade Incident) -16.44677345 7.198749e-08
## ATypeHostage Taking (Kidnapping) 0.87308527 2.394286e+00
## ATypeUnarmed Assault -0.36512506 6.941098e-01
## ATypeUnknown 2.04974543 7.765924e+00
## RansomPaid2 -16.90550578 4.550217e-08
## RansomPaid3 -0.58037716 5.596872e-01
## RansomPaid4 0.29395511 1.341724e+00
## gtd3.probs
## (Intercept) 2.591870e-09
## country10 9.999997e-01
## country11 4.014238e-01
## country12 4.932615e-01
## country2 1.000000e+00
## country3 9.999998e-01
## country5 3.846173e-01
## country6 4.080336e-01
## country8 9.999999e-01
## NWounded0 9.175087e-08
## ATypeAssassination 5.576853e-01
## ATypeBombing/Explosion 6.887402e-01
## ATypeFacility/Infrastructure Attack 6.076469e-01
## ATypeHijacking 2.097255e-07
## ATypeHostage Taking (Barricade Incident) 7.198749e-08
## ATypeHostage Taking (Kidnapping) 7.053873e-01
## ATypeUnarmed Assault 4.097195e-01
## ATypeUnknown 8.859219e-01
## RansomPaid2 4.550217e-08
## RansomPaid3 3.588458e-01
## RansomPaid4 5.729641e-01
anova(gtd.fit4, gtd.fit2, gtd.fit3)
## Analysis of Deviance Table
##
## Model 1: NKilled ~ country + NWounded
## Model 2: NKilled ~ country + NWounded + AType + RansomPaid
## Model 3: NKilled ~ country + NWounded + AType + RansomPaid
## Resid. Df Resid. Dev Df Deviance
## 1 491 214.212
## 2 479 18.234 12 195.98
## 3 480 204.180 -1 -185.95