require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
require(sandwich)
## Loading required package: sandwich
require(msm)
## Loading required package: msm
## Warning: package 'msm' was built under R version 3.2.5
require(foreign)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 3.2.5
require(MASS)
## Loading required package: MASS
require(gdata)
## Loading required package: gdata
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'gdata'
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#getwd()
HD<-read.csv("HD.csv")
#head(HD)
#attributes(HD)
dim(HD)
## [1] 6200   17
#subsetting for our time frame 1981-2006

HD1<- subset(HD, YEAR >= 1981 & YEAR <2006)
#head(HD1)

dim(HD1)
## [1] 5000   17
#summary(HD1)
length(unique(HD1$CTRY))
## [1] 200
summary(HD1$incidentsusvictims_L1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.0000  0.2469  0.0000 90.0000     488
mean(HD1$incidentsusvictims_L1,na.rm=TRUE)
## [1] 0.2468972
var(HD1$incidentsusvictims_L1,na.rm=TRUE)
## [1] 3.085116
#MODEL4
summary(m4 <- glm.nb(incidentsusvictims_L1 ~ lnpopterr + logGDPCAP+logUSMilitaryAid+logArea+Polity2+CINC+WECON+WOPOL+WOSOC, data = HD1))
## 
## Call:
## glm.nb(formula = incidentsusvictims_L1 ~ lnpopterr + logGDPCAP + 
##     logUSMilitaryAid + logArea + Polity2 + CINC + WECON + WOPOL + 
##     WOSOC, data = HD1, init.theta = 0.1297320665, link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9587  -0.5477  -0.4164  -0.2902   4.6226  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -12.75346    1.17673 -10.838  < 2e-16 ***
## lnpopterr          0.79117    0.07572  10.449  < 2e-16 ***
## logGDPCAP          0.19277    0.05992   3.217 0.001295 ** 
## logUSMilitaryAid   0.03879    0.01038   3.737 0.000186 ***
## logArea           -0.15086    0.05347  -2.822 0.004779 ** 
## Polity2            0.05099    0.01284   3.970 7.18e-05 ***
## CINC             -13.92649    4.51083  -3.087 0.002020 ** 
## WECON             -0.57168    0.17225  -3.319 0.000904 ***
## WOPOL             -0.45718    0.13357  -3.423 0.000620 ***
## WOSOC             -0.11282    0.13720  -0.822 0.410884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.1297) family taken to be 1)
## 
##     Null deviance: 1233.60  on 2925  degrees of freedom
## Residual deviance:  952.59  on 2916  degrees of freedom
##   (2074 observations deleted due to missingness)
## AIC: 2986.3
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.1297 
##           Std. Err.:  0.0111 
## 
##  2 x log-likelihood:  -2964.2570
cov.m4 <- vcovHC(m4, type="HC0")
std.err4 <- sqrt(diag(cov.m4))
r.est4 <- cbind(Estimate= coef(m4), "Robust SE" = std.err4,
                "Pr(>|z|)" = 2 * pnorm(abs(coef(m4)/std.err4), lower.tail=FALSE),
                LL = coef(m4) - 1.96 * std.err4,
                UL = coef(m4) + 1.96 * std.err4)


round(r.est4,3)
##                  Estimate Robust SE Pr(>|z|)      LL     UL
## (Intercept)       -12.753     1.451    0.000 -15.597 -9.910
## lnpopterr           0.791     0.071    0.000   0.652  0.930
## logGDPCAP           0.193     0.066    0.003   0.063  0.322
## logUSMilitaryAid    0.039     0.016    0.015   0.007  0.070
## logArea            -0.151     0.049    0.002  -0.246 -0.056
## Polity2             0.051     0.013    0.000   0.025  0.077
## CINC              -13.926     4.347    0.001 -22.447 -5.406
## WECON              -0.572     0.180    0.001  -0.924 -0.219
## WOPOL              -0.457     0.159    0.004  -0.769 -0.146
## WOSOC              -0.113     0.158    0.476  -0.423  0.197
with(m4, cbind(res.deviance = deviance, df = df.residual,
               p = pchisq(deviance, df.residual, lower.tail=FALSE)))
##      res.deviance   df p
## [1,]     952.5916 2916 1
#To present the regression results as incident rate ratios and 
#their standard errors, together with the confidence interval. 
#To compute the standard error for the incident rate ratios, 
#use the Delta method. To this end, we make use the function deltamethod implemented in R package msm.

s4 <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3), ~ exp(x4),~ exp(x5),~ exp(x6),~ exp(x7),~ exp(x8), ~exp(x9), ~exp(x10)), coef(m4), cov.m4)
## exponentiate old estimates dropping the p values
rexp.est4 <- exp(r.est4[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est4[, "Robust SE"] <- s4

rexp.est4
##                      Estimate    Robust SE           LL           UL
## (Intercept)      2.892305e-06 4.196351e-06 1.683608e-07 0.0000496875
## lnpopterr        2.205980e+00 1.562336e-01 1.920065e+00 2.5344693209
## logGDPCAP        1.212606e+00 8.001887e-02 1.065488e+00 1.3800371599
## logUSMilitaryAid 1.039550e+00 1.659913e-02 1.007519e+00 1.0725986069
## logArea          8.599641e-01 4.172757e-02 7.819467e-01 0.9457654888
## Polity2          1.052308e+00 1.389434e-02 1.025425e+00 1.0798964219
## CINC             8.949615e-07 3.890415e-06 1.784623e-10 0.0044880968
## WECON            5.645760e-01 1.014864e-01 3.969266e-01 0.8030351818
## WOPOL            6.330673e-01 1.006663e-01 4.635482e-01 0.8645795256
## WOSOC            8.933092e-01 1.413145e-01 6.551579e-01 1.2180292190
round(rexp.est4,3)
##                  Estimate Robust SE    LL    UL
## (Intercept)         0.000     0.000 0.000 0.000
## lnpopterr           2.206     0.156 1.920 2.534
## logGDPCAP           1.213     0.080 1.065 1.380
## logUSMilitaryAid    1.040     0.017 1.008 1.073
## logArea             0.860     0.042 0.782 0.946
## Polity2             1.052     0.014 1.025 1.080
## CINC                0.000     0.000 0.000 0.004
## WECON               0.565     0.101 0.397 0.803
## WOPOL               0.633     0.101 0.464 0.865
## WOSOC               0.893     0.141 0.655 1.218
########################################
summary(HD1$WECON)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   1.000   1.000   1.323   2.000   3.000    1290
summary(HD1$WOPOL)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    1.00    2.00    1.72    2.00    3.00    1235
summary(HD1$WOSOC)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   1.000   1.000   1.242   2.000   3.000    1536
library(psych)
## Warning: package 'psych' was built under R version 3.2.5
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
#describe(HD1)

# diagnostics for multicolinarity and relative importance of predictors
x<-as.matrix(cbind(HD1$WECON,HD1$WOPOL,HD1$WOSOC))
head(x)
##      [,1] [,2] [,3]
## [1,]    2    1    2
## [2,]    2    1    2
## [3,]    2    1    2
## [4,]    2    2    2
## [5,]    2    2    2
## [6,]    2    2    2
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.2.5
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
rcorr(x,type="pearson") # type can be pearson or spearman
##      [,1] [,2] [,3]
## [1,] 1.00 0.39 0.72
## [2,] 0.39 1.00 0.45
## [3,] 0.72 0.45 1.00
## 
## n
##      [,1] [,2] [,3]
## [1,] 3710 3700 3431
## [2,] 3700 3765 3453
## [3,] 3431 3453 3464
## 
## P
##      [,1] [,2] [,3]
## [1,]       0    0  
## [2,]  0         0  
## [3,]  0    0
library(Hmisc)
cor(x,use = "complete.obs",method = "spearman")
##           [,1]      [,2]      [,3]
## [1,] 1.0000000 0.3736538 0.7298554
## [2,] 0.3736538 1.0000000 0.4349073
## [3,] 0.7298554 0.4349073 1.0000000
#plot(HD1$WECON,HD1$WOPOL)
oldw <- getOption("warn")
options(warn = -1)
cor.test(HD1$WECON,HD1$WOPOL,method = "spearman", alternative = "greater")
## 
##  Spearman's rank correlation rho
## 
## data:  HD1$WECON and HD1$WOPOL
## S = 5304200000, p-value < 2.2e-16
## alternative hypothesis: true rho is greater than 0
## sample estimates:
##      rho 
## 0.371704
options(warn = oldw)
# Assume that we are fitting a multiple linear regression

fit <- lm(incidentsusvictims_L1 ~ lnpopterr+logGDPCAP+logUSMilitaryAid+logArea+Polity2+CINC+WECON+WOPOL+WOSOC, data = HD1)
# Evaluate Collinearity
# note the model coefficients are not right as our response is count 
#but VIF or tolerance is right as it's calculation doesnt involve outcome variable
# it doesnt effect how predictors are related to each other
library(MASS)
library(car)
## Warning: package 'car' was built under R version 3.2.5
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(fit) # variance inflation factors 
##        lnpopterr        logGDPCAP logUSMilitaryAid          logArea 
##         2.469264         1.761012         1.180366         1.867496 
##          Polity2             CINC            WECON            WOPOL 
##         1.696577         1.696834         2.329693         1.429806 
##            WOSOC 
##         2.680347
sqrt(vif(fit)) > 2 # problem?
##        lnpopterr        logGDPCAP logUSMilitaryAid          logArea 
##            FALSE            FALSE            FALSE            FALSE 
##          Polity2             CINC            WECON            WOPOL 
##            FALSE            FALSE            FALSE            FALSE 
##            WOSOC 
##            FALSE