require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
require(sandwich)
## Loading required package: sandwich
require(msm)
## Loading required package: msm
## Warning: package 'msm' was built under R version 3.2.5
require(foreign)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 3.2.5
require(MASS)
## Loading required package: MASS
require(gdata)
## Loading required package: gdata
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'gdata'
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#getwd()
HD<-read.csv("HD.csv")
#head(HD)
#attributes(HD)
dim(HD)
## [1] 6200 17
#subsetting for our time frame 1981-2006
HD1<- subset(HD, YEAR >= 1981 & YEAR <2006)
#head(HD1)
dim(HD1)
## [1] 5000 17
#summary(HD1)
length(unique(HD1$CTRY))
## [1] 200
summary(HD1$incidentsusvictims_L1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.2469 0.0000 90.0000 488
mean(HD1$incidentsusvictims_L1,na.rm=TRUE)
## [1] 0.2468972
var(HD1$incidentsusvictims_L1,na.rm=TRUE)
## [1] 3.085116
#MODEL4
summary(m4 <- glm.nb(incidentsusvictims_L1 ~ lnpopterr + logGDPCAP+logUSMilitaryAid+logArea+Polity2+CINC+WECON+WOPOL+WOSOC, data = HD1))
##
## Call:
## glm.nb(formula = incidentsusvictims_L1 ~ lnpopterr + logGDPCAP +
## logUSMilitaryAid + logArea + Polity2 + CINC + WECON + WOPOL +
## WOSOC, data = HD1, init.theta = 0.1297320665, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9587 -0.5477 -0.4164 -0.2902 4.6226
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.75346 1.17673 -10.838 < 2e-16 ***
## lnpopterr 0.79117 0.07572 10.449 < 2e-16 ***
## logGDPCAP 0.19277 0.05992 3.217 0.001295 **
## logUSMilitaryAid 0.03879 0.01038 3.737 0.000186 ***
## logArea -0.15086 0.05347 -2.822 0.004779 **
## Polity2 0.05099 0.01284 3.970 7.18e-05 ***
## CINC -13.92649 4.51083 -3.087 0.002020 **
## WECON -0.57168 0.17225 -3.319 0.000904 ***
## WOPOL -0.45718 0.13357 -3.423 0.000620 ***
## WOSOC -0.11282 0.13720 -0.822 0.410884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1297) family taken to be 1)
##
## Null deviance: 1233.60 on 2925 degrees of freedom
## Residual deviance: 952.59 on 2916 degrees of freedom
## (2074 observations deleted due to missingness)
## AIC: 2986.3
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1297
## Std. Err.: 0.0111
##
## 2 x log-likelihood: -2964.2570
cov.m4 <- vcovHC(m4, type="HC0")
std.err4 <- sqrt(diag(cov.m4))
r.est4 <- cbind(Estimate= coef(m4), "Robust SE" = std.err4,
"Pr(>|z|)" = 2 * pnorm(abs(coef(m4)/std.err4), lower.tail=FALSE),
LL = coef(m4) - 1.96 * std.err4,
UL = coef(m4) + 1.96 * std.err4)
round(r.est4,3)
## Estimate Robust SE Pr(>|z|) LL UL
## (Intercept) -12.753 1.451 0.000 -15.597 -9.910
## lnpopterr 0.791 0.071 0.000 0.652 0.930
## logGDPCAP 0.193 0.066 0.003 0.063 0.322
## logUSMilitaryAid 0.039 0.016 0.015 0.007 0.070
## logArea -0.151 0.049 0.002 -0.246 -0.056
## Polity2 0.051 0.013 0.000 0.025 0.077
## CINC -13.926 4.347 0.001 -22.447 -5.406
## WECON -0.572 0.180 0.001 -0.924 -0.219
## WOPOL -0.457 0.159 0.004 -0.769 -0.146
## WOSOC -0.113 0.158 0.476 -0.423 0.197
with(m4, cbind(res.deviance = deviance, df = df.residual,
p = pchisq(deviance, df.residual, lower.tail=FALSE)))
## res.deviance df p
## [1,] 952.5916 2916 1
#To present the regression results as incident rate ratios and
#their standard errors, together with the confidence interval.
#To compute the standard error for the incident rate ratios,
#use the Delta method. To this end, we make use the function deltamethod implemented in R package msm.
s4 <- deltamethod(list(~ exp(x1), ~ exp(x2), ~ exp(x3), ~ exp(x4),~ exp(x5),~ exp(x6),~ exp(x7),~ exp(x8), ~exp(x9), ~exp(x10)), coef(m4), cov.m4)
## exponentiate old estimates dropping the p values
rexp.est4 <- exp(r.est4[, -3])
## replace SEs with estimates for exponentiated coefficients
rexp.est4[, "Robust SE"] <- s4
rexp.est4
## Estimate Robust SE LL UL
## (Intercept) 2.892305e-06 4.196351e-06 1.683608e-07 0.0000496875
## lnpopterr 2.205980e+00 1.562336e-01 1.920065e+00 2.5344693209
## logGDPCAP 1.212606e+00 8.001887e-02 1.065488e+00 1.3800371599
## logUSMilitaryAid 1.039550e+00 1.659913e-02 1.007519e+00 1.0725986069
## logArea 8.599641e-01 4.172757e-02 7.819467e-01 0.9457654888
## Polity2 1.052308e+00 1.389434e-02 1.025425e+00 1.0798964219
## CINC 8.949615e-07 3.890415e-06 1.784623e-10 0.0044880968
## WECON 5.645760e-01 1.014864e-01 3.969266e-01 0.8030351818
## WOPOL 6.330673e-01 1.006663e-01 4.635482e-01 0.8645795256
## WOSOC 8.933092e-01 1.413145e-01 6.551579e-01 1.2180292190
round(rexp.est4,3)
## Estimate Robust SE LL UL
## (Intercept) 0.000 0.000 0.000 0.000
## lnpopterr 2.206 0.156 1.920 2.534
## logGDPCAP 1.213 0.080 1.065 1.380
## logUSMilitaryAid 1.040 0.017 1.008 1.073
## logArea 0.860 0.042 0.782 0.946
## Polity2 1.052 0.014 1.025 1.080
## CINC 0.000 0.000 0.000 0.004
## WECON 0.565 0.101 0.397 0.803
## WOPOL 0.633 0.101 0.464 0.865
## WOSOC 0.893 0.141 0.655 1.218
########################################
summary(HD1$WECON)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 1.000 1.000 1.323 2.000 3.000 1290
summary(HD1$WOPOL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 1.00 2.00 1.72 2.00 3.00 1235
summary(HD1$WOSOC)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 1.000 1.000 1.242 2.000 3.000 1536
library(psych)
## Warning: package 'psych' was built under R version 3.2.5
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
#describe(HD1)
# diagnostics for multicolinarity and relative importance of predictors
x<-as.matrix(cbind(HD1$WECON,HD1$WOPOL,HD1$WOSOC))
head(x)
## [,1] [,2] [,3]
## [1,] 2 1 2
## [2,] 2 1 2
## [3,] 2 1 2
## [4,] 2 2 2
## [5,] 2 2 2
## [6,] 2 2 2
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.2.5
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.2.5
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.2.5
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
rcorr(x,type="pearson") # type can be pearson or spearman
## [,1] [,2] [,3]
## [1,] 1.00 0.39 0.72
## [2,] 0.39 1.00 0.45
## [3,] 0.72 0.45 1.00
##
## n
## [,1] [,2] [,3]
## [1,] 3710 3700 3431
## [2,] 3700 3765 3453
## [3,] 3431 3453 3464
##
## P
## [,1] [,2] [,3]
## [1,] 0 0
## [2,] 0 0
## [3,] 0 0
library(Hmisc)
cor(x,use = "complete.obs",method = "spearman")
## [,1] [,2] [,3]
## [1,] 1.0000000 0.3736538 0.7298554
## [2,] 0.3736538 1.0000000 0.4349073
## [3,] 0.7298554 0.4349073 1.0000000
#plot(HD1$WECON,HD1$WOPOL)
oldw <- getOption("warn")
options(warn = -1)
cor.test(HD1$WECON,HD1$WOPOL,method = "spearman", alternative = "greater")
##
## Spearman's rank correlation rho
##
## data: HD1$WECON and HD1$WOPOL
## S = 5304200000, p-value < 2.2e-16
## alternative hypothesis: true rho is greater than 0
## sample estimates:
## rho
## 0.371704
options(warn = oldw)
# Assume that we are fitting a multiple linear regression
fit <- lm(incidentsusvictims_L1 ~ lnpopterr+logGDPCAP+logUSMilitaryAid+logArea+Polity2+CINC+WECON+WOPOL+WOSOC, data = HD1)
# Evaluate Collinearity
# note the model coefficients are not right as our response is count
#but VIF or tolerance is right as it's calculation doesnt involve outcome variable
# it doesnt effect how predictors are related to each other
library(MASS)
library(car)
## Warning: package 'car' was built under R version 3.2.5
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
vif(fit) # variance inflation factors
## lnpopterr logGDPCAP logUSMilitaryAid logArea
## 2.469264 1.761012 1.180366 1.867496
## Polity2 CINC WECON WOPOL
## 1.696577 1.696834 2.329693 1.429806
## WOSOC
## 2.680347
sqrt(vif(fit)) > 2 # problem?
## lnpopterr logGDPCAP logUSMilitaryAid logArea
## FALSE FALSE FALSE FALSE
## Polity2 CINC WECON WOPOL
## FALSE FALSE FALSE FALSE
## WOSOC
## FALSE