http://www.bethanylacina.com/docs/Lacina_civilwar_severity.pdf http://hdl.handle.net/1902.1/22535
library(tidyverse)
library(haven)
library(DT)
library(stargazer)
library(GGally)
library(car)
data2006 <- read_dta("G:/My Drive/homework/Jenna S/Lacina_JCR_2006_postedJan07.dta")
# https://stackoverflow.com/a/54282131
data.frame(
"name" = names(data2006[ , -c(1:3)]),
"label" = sapply(data2006[ , -c(1:3)], function(x) attr(x, "label")) %>% as.character()
) %>%
datatable(options = list(pageLength = 5))
data2006 %>%
as.data.frame() %>%
stargazer(type = "text")
##
## ===============================================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## -----------------------------------------------------------------------------------------------
## ID 114 2,086.675 615.437 1,010 1,622 2,555 3,211
## ccode 114 554.316 227.977 40 380.8 750 850
## region 114 3.246 1.743 1 2 4.8 6
## secession 114 0.377 0.487 0 0 1 1
## battledeadbest 114 62,677.140 230,855.800 900 3,196.8 40,475 2,097,705
## lnbdb 114 9.362 1.711 6.802 8.070 10.608 14.556
## battledeadlow 114 59,497.240 230,424.900 418 2,594.5 34,386 2,097,705
## lnbdl 114 9.196 1.781 6.035 7.860 10.445 14.556
## battledeadhigh 114 67,705.100 232,758.200 900 3,561.5 49,075 2,097,705
## lnbdh 114 9.450 1.748 6.802 8.178 10.801 14.556
## pop 114 90,287,581.000 193,274,165.000 611,000 5,561,250 52,170,750 929,358,016
## lnpop 114 16.760 1.730 13.323 15.531 17.769 20.650
## dper100k 114 459.743 1,104.403 0.161 14.080 314.112 8,870.539
## begin 114 1,975.500 16.570 1,946 1,963 1,991 2,001
## end 114 1,983.096 18.173 1,946 1,971 1,998 2,002
## duration 114 8.596 9.605 1 2 11.8 55
## lnduration 114 1.557 1.137 0 0.7 2.5 4
## rate 114 9,023.805 30,645.620 65 1,000 5,375 300,000
## lnrate 114 7.805 1.446 4.168 6.908 8.589 12.612
## cw 114 0.667 0.473 0 0 1 1
## intervention 114 0.553 0.499 0 0 1 1
## democ 114 0.175 0.382 0 0 0 1
## lngdp 107 7.029 0.973 3.912 6.341 7.755 9.052
## gdpsq 107 12.960 2.419 3.892 12.088 14.108 17.808
## milper 114 372.070 805.239 1 19.2 250 4,158
## milperpop 114 0.00001 0.00001 0.00000 0.00000 0.00001 0.0001
## lnmilperpop 114 -12.525 1.045 -15.614 -13.313 -11.794 -9.433
## milex 112 1,903,079.000 6,345,521.000 63.000 41,257.250 850,000.000 57,107,000.000
## milqual 112 6,399.334 10,761.100 2.520 1,141.071 6,256.646 67,600.000
## lnmilqual 112 7.950 1.375 0.924 7.040 8.741 11.121
## milq2 112 15.236 3.656 -15.478 14.239 16.146 22.150
## mountain 114 23.271 22.296 0.000 7.300 37.325 81.000
## lnmountain 114 2.688 1.115 0.000 2.116 3.646 4.407
## ethfrac 114 0.495 0.291 0.004 0.227 0.755 0.902
## lnethfrac 114 -1.049 1.070 -5.493 -1.485 -0.281 -0.104
## relfrac 114 0.398 0.197 0.000 0.222 0.551 0.753
## lnrelfrac 113 -1.092 0.705 -3.922 -1.507 -0.596 -0.283
## relpolar 114 0.781 0.416 0 1 1 1
## ethnicpolar 114 0.781 0.416 0 1 1 1
## -----------------------------------------------------------------------------------------------
# add summarize here
model1 <-
data2006 %>%
lm(lnbdb ~ lnduration + lnmilperpop + lnmilqual + lngdp + cw +
lnmountain + democ + ethnicpolar + relpolar, data = .)
model1 %>% summary()
##
## Call:
## lm(formula = lnbdb ~ lnduration + lnmilperpop + lnmilqual + lngdp +
## cw + lnmountain + democ + ethnicpolar + relpolar, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7396 -0.9133 0.0152 0.6963 3.1723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.40574 2.61835 5.502 3.17e-07 ***
## lnduration 0.85772 0.11729 7.313 8.20e-11 ***
## lnmilperpop 0.38708 0.15777 2.453 0.0160 *
## lnmilqual 0.23131 0.12819 1.804 0.0743 .
## lngdp -0.46214 0.20365 -2.269 0.0255 *
## cw 0.72658 0.30468 2.385 0.0191 *
## lnmountain 0.05914 0.11617 0.509 0.6119
## democ -0.76046 0.34052 -2.233 0.0279 *
## ethnicpolar -0.87988 0.32993 -2.667 0.0090 **
## relpolar 0.15785 0.31183 0.506 0.6139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.286 on 95 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.4798, Adjusted R-squared: 0.4306
## F-statistic: 9.737 on 9 and 95 DF, p-value: 2.032e-10
data2006 %>%
select(lnbdb, lnduration, lnmilperpop, lnmilqual, lngdp, cw, lnmountain, democ,
ethnicpolar, relpolar) %>%
ggpairs(axisLabels = "none")
model2 <-
data2006 %>%
lm(lnbdb ~ lnduration + cw + democ + ethnicpolar, data = .)
model2 %>% summary()
##
## Call:
## lm(formula = lnbdb ~ lnduration + cw + democ + ethnicpolar, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6584 -0.8932 -0.0507 0.6921 3.6222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.5965 0.3472 24.763 < 2e-16 ***
## lnduration 0.8572 0.1106 7.751 5.12e-12 ***
## cw 0.5908 0.2686 2.199 0.029976 *
## democ -0.9122 0.3264 -2.795 0.006136 **
## ethnicpolar -1.0283 0.3034 -3.389 0.000978 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.296 on 109 degrees of freedom
## Multiple R-squared: 0.447, Adjusted R-squared: 0.4267
## F-statistic: 22.03 on 4 and 109 DF, p-value: 2.417e-13
data2006 %>%
ggplot(aes(lnduration, lnbdb)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, col = "red", show.legend = TRUE) +
labs(x = "Log of Duration",
y = "Log of Battle Data Best Estimate")
## `geom_smooth()` using formula 'y ~ x'
data2006 %>% lm(lnbdb ~ lnduration, data = .) %>% summary()
##
## Call:
## lm(formula = lnbdb ~ lnduration, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3934 -0.8756 -0.1360 0.7390 4.7763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.0790 0.2291 35.263 < 2e-16 ***
## lnduration 0.8242 0.1190 6.925 2.89e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.438 on 112 degrees of freedom
## Multiple R-squared: 0.2998, Adjusted R-squared: 0.2936
## F-statistic: 47.96 on 1 and 112 DF, p-value: 2.892e-10
crPlots(model2, terms = ~ lnduration)
model2$residuals %>%
as.data.frame() %>%
ggplot(aes(.)) +
geom_histogram() +
ggtitle("Model 2 Redisuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
model2$residuals %>%
as.data.frame() %>%
ggplot(aes(.)) +
geom_density() +
ggtitle("Model 2 Redisuals")
model2$residuals %>%
as.data.frame() %>%
ggplot(aes(1:114, .)) +
geom_hline(yintercept = mean(model2$residuals), col = "red") +
geom_path() +
ggtitle("Model 2 Redisuals") +
labs(x = "Order", y = "Residual")
model2$residuals %>%
as.data.frame() %>%
ggplot(aes(sample = .)) +
geom_qq() +
geom_qq_line() +
ggtitle("QQ Plot of Model 2 Residuals")
data2006 %>%
ggplot(aes(sample = lnbdb)) +
geom_qq() +
geom_qq_line() +
ggtitle("Response = Log of Battle Data Best Estimate") +
labs(subtitle = "QQ Plot")
data2006 %>%
ggplot(aes(lnduration, model2$residuals)) +
geom_point() +
geom_hline(yintercept = mean(model2$residuals), col = "red") +
labs(x = "Log Duration", y = "Model 2 Residuals")
model2 %>%
ggplot(aes(model2$fitted.values, model2$residuals)) +
geom_point() +
geom_hline(yintercept = mean(model2$residuals), col = "red") +
labs(title = "Model 2",
x = "Fitted Values",
y = "Residuals")
https://www.wikiwand.com/en/Variance_inflation_factor#/Interpretation
model1 %>% vif()
## lnduration lnmilperpop lnmilqual lngdp cw lnmountain
## 1.113910 1.693288 1.980714 2.244088 1.248881 1.041766
## democ ethnicpolar relpolar
## 1.135256 1.144773 1.022562