R Notebook

0.1 Model-1 Post-Estimation:
0.2 Model-2: This model adds the salience, or the Territorial Salience, variable
0.3 Model-2: Post-estimation
0.4 Model-3: This model includes economic and border strength variable of the Gibler’s model (Gibler 2007)
0.5 Model-3 Comparison: LOO method
0.6 Model-4: Including all other major border strength control variables in the Gibler’s model (Refer: Gibler 2007)
0.7 Model-4: Post-Estimation
0.8 Model-4: Model Comparison: LOO method

##. Loading dataset into the r environment

library(knitr); library(rmarkdown);
library(rstanarm); library(rstan);
library(tidybayes); 
library(tidyverse); 
library(easystats);

## # Attaching packages
## ✔ insight     0.8.2   ✔ bayestestR  0.5.2
## ✔ performance 0.4.4   ✔ parameters  0.6.0
## ✔ see         0.4.1   ✔ effectsize  0.2.0
## ✔ correlation 0.1.0   ✔ estimate    0.1.0
## ✔ report      0.1.0

library(rmarkdown); 
library(bayesplot); library(projpred); library(loo)
library(haven); library(shinystan)
library(broom); library(ggmcmc)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
library(haven)
#salience = salience
#salience <- read_dta("/Users/Karthy/Downloads/salience.dta")
#head(salience[, 1:10])
#glimpse(salience[, 1:10])

##.Subsetting the data to replicate Gibler’s 2007 model

salient = salience %>% filter(cont == 1, year >= 1945 & year <= 2000)

##. Descriptive Statistics of major variables in the model

library(skimr)

vars = c("jointdem", "icowsal", "settle", "smallgdp", "samecol", "civwar", "lastmid", "logmntpct","capratio")
skim(salience[, vars])

Data summary
Name	salience[, vars]
Number of rows	41223
Number of columns	9
_______________________
Column type frequency:
character	1
numeric	8
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
settle	0	1	1	1	0	2	0

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
jointdem	8322	0.80	0.22	0.41	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▂
icowsal	8260	0.80	1.63	3.32	0.00	0.00	0.00	0.00	12.00	▇▁▁▁▁
smallgdp	24839	0.40	7.35	1.01	3.87	6.57	7.29	8.07	9.83	▁▂▇▆▂
samecol	0	1.00	0.25	0.43	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▂
civwar	0	1.00	0.01	0.12	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▁
lastmid	8260	0.80	24.17	29.74	0.00	5.00	14.00	32.00	185.00	▇▁▁▁▁
logmntpct	24654	0.40	0.33	0.30	0.00	0.07	0.27	0.57	1.00	▇▃▃▂▂
capratio	2222	0.95	0.29	0.27	0.00	0.06	0.20	0.47	1.00	▇▃▂▂▁

glancedata::glance_data(salience[, vars])

## # A tibble: 9 x 11
##   name  type  distinct_values minimum median maximum    mean     sd
##   <chr> <chr>           <int>   <dbl>  <dbl>   <dbl>   <dbl>  <dbl>
## 1 join… nume…               2 0.       0        1     0.220   0.414
## 2 icow… nume…              12 0.       0       12     1.63    3.32 
## 3 sett… nume…               2 0.       0        1     0.329   0.470
## 4 smal… nume…            3578 3.87e+0  7.29     9.83  7.35    1.01 
## 5 same… nume…               2 0.       0        1     0.246   0.430
## 6 civw… nume…               2 0.       0        1     0.0135  0.115
## 7 last… nume…             186 0.      14      185    24.2    29.7  
## 8 logm… nume…             355 0.       0.267    1     0.331   0.299
## 9 capr… nume…           38981 3.98e-5  0.203    1.00  0.291   0.271
## # … with 3 more variables: na_proportion <dbl>, count <chr>,
## #   sample_values <chr>

glancedata::plot_discrete_vars(salience[, vars])

## TableGrob (3 x 3) "arrange": 6 grobs
##          z     cells    name                grob
## jointdem 1 (1-1,1-1) arrange      gtable[layout]
## icowsal  2 (1-1,2-2) arrange      gtable[layout]
## settle   3 (1-1,3-3) arrange      gtable[layout]
## samecol  4 (2-2,1-1) arrange      gtable[layout]
## civwar   5 (2-2,2-2) arrange      gtable[layout]
##          6 (3-3,1-3) arrange text[GRID.text.203]

glancedata::plot_numerical_vars(salience[, vars], plot_type = "histogram")

##. Visualizing correlations among the major variables. The correlation plot visualizes the possible correlation between major variables used in the study.

GGally::ggpairs(salience[, 1:10])

##.Bayesian regression analysis- Model-1 (logistic regression) I test whether settled border increases the chances of bordering countries becoming democracies. For that purpose, I use the rstanarm package, which utilizes Stan program to fit the bayesian model. It uses the Hamiltonian Monte Carlo Sampling to fit the model. It is a single variable model.

mod1 = stan_glm(formula = jointdem ~ settle, chains =4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salience, refresh = 0)


summary(mod1)

## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 32901
##  predictors:   2
## 
## Estimates:
##               mean   sd   10%   50%   90%
## (Intercept) -1.2    0.0 -1.2  -1.2  -1.1 
## settle      -0.3    0.0 -0.4  -0.3  -0.3 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  4649 
## settle        0.0  1.0  4498 
## mean_PPD      0.0  1.0  4965 
## log-posterior 0.0  1.0  3511 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

monitor(mod1$stanfit)

## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                     Q5      Q50      Q95     Mean SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)       -1.2     -1.2     -1.1     -1.2  0     1     4665     4514
## settle            -0.4     -0.3     -0.3     -0.3  0     1     4528     4432
## mean_PPD           0.2      0.2      0.2      0.2  0     1     4970     5641
## log-posterior -17271.6 -17269.4 -17268.7 -17269.7  1     1     3596     4142
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).

##Hamiltonian montecarlo sampling diagnostics suggests the model has not many glaring issues during fitting
check_hmc_diagnostics(mod1$stanfit)

## 
## Divergences:
## 
## Tree depth:
## 
## Energy:

#.Posterior interval of the model
posterior_interval(mod1)

##                     5%        95%
## (Intercept) -1.1812979 -1.1288610
## settle      -0.3709075 -0.2772931

posterior_vs_prior(mod1)

bayestestR::hdi(mod1, ci = c(0.9))

## # Highest Density Interval
## 
## Parameter   |        90% HDI
## ----------------------------
## (Intercept) | [-1.18, -1.13]
## settle      | [-0.37, -0.28]

0.1 Model-1 Post-Estimation:

This section describes the fit of the bayesian model by examining the MCMC (Monte Carlo Marko Chains) convergence in the model. As one can see from the model, the convergence fits better with not much divergences of the multiple chains. First, I visualize the plotting of the intercept and the single variabl in the model: settled borders variable. Followed by that, I ran an auto-correlation plot to check whether there is much correlation between samples. Since HMC uses dependent sampling, the fitted model should not supposed to have a lot of independence. An imperfect autocorrelation plot will have a scattered appearance of various samples.Thus, the autocorrelation plot suggests the model has no autocorrelation issues. Also, the trace plot for the model indicates the model has good matching with various samples. As one can observe from the traceplot graph(3rd graph), there is good matching of various chains in the model. As before suggested by the HMC (Hamiltonian Monte Carlo) diagnostics and Rhat score of one, the model suffers from no divergences of chains during model fitting.

##Bayesplot diagnostic plots
mcmc_acf(mod1)

mcmc_trace(mod1)

mcmc_combo(mod1)

##ggmcmc plots 
gmod1 = ggs(mod1)
ggs_autocorrelation(gmod1)

ggs_traceplot(gmod1)

ggs_density(gmod1)

0.2 Model-2: This model adds the salience, or the Territorial Salience, variable

In the model, I included the salience, the Territorial Salience, variable in the model. The variable has a negative relationship with the presence of joint democracies. The presence of salience decreases the likelihood of joint democracies. The model, like the previous one, has no issues with divergence. The HMS diagnostics tests suggest there are no inherent issues with the model fitting or during the sampling process.

mod2 = stan_glm(formula = jointdem ~ settle + icowsal, chains =4, iter = 1000, warmup = 250, family = binomial(link = "logit"), data = salience, refresh = 0)

summary(mod2)

## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal
##  algorithm:    sampling
##  sample:       3000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 26832
##  predictors:   3
## 
## Estimates:
##               mean   sd   10%   50%   90%
## (Intercept) -1.5    0.0 -1.5  -1.5  -1.4 
## settle       0.1    0.0  0.0   0.1   0.1 
## icowsal     -0.1    0.0 -0.1  -0.1  -0.1 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  2512 
## settle        0.0  1.0  2672 
## icowsal       0.0  1.0  2588 
## mean_PPD      0.0  1.0  2262 
## log-posterior 0.0  1.0  1406 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

print(monitor(mod2$stanfit))

## Inference for the input samples (4 chains: each with iter = 1000; warmup = 0):
## 
##                     Q5      Q50      Q95     Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)       -1.5     -1.5     -1.4     -1.5 0.0     1     2542     1892
## settle             0.0      0.1      0.1      0.1 0.0     1     2637     2061
## icowsal           -0.1     -0.1     -0.1     -0.1 0.0     1     2628     1963
## mean_PPD           0.2      0.2      0.2      0.2 0.0     1     2282     2322
## log-posterior -12432.9 -12430.2 -12429.2 -12430.5 1.2     1     1430     1925
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
##                        mean      se_mean          sd          2.5%
## (Intercept)   -1.451821e+00 4.607438e-04 0.023210818 -1.496372e+00
## settle         6.117146e-02 6.232294e-04 0.031992404 -3.323830e-03
## icowsal       -7.615104e-02 1.086225e-04 0.005549431 -8.724073e-02
## mean_PPD       1.771975e-01 6.790552e-05 0.003241624  1.708399e-01
## log-posterior -1.243053e+04 3.234220e-02 1.214428085 -1.243370e+04
##                         25%           50%           75%         97.5% n_eff
## (Intercept)   -1.467701e+00 -1.451065e+00 -1.436375e+00 -1.405884e+00  2513
## settle         4.086702e-02  6.175227e-02  8.312635e-02  1.229505e-01  2672
## icowsal       -7.994660e-02 -7.622923e-02 -7.244241e-02 -6.533743e-02  2589
## mean_PPD       1.750149e-01  1.771765e-01  1.793754e-01  1.835514e-01  2263
## log-posterior -1.243104e+04 -1.243023e+04 -1.242964e+04 -1.242918e+04  1407
##                    Rhat valid            Q5           Q50           Q95
## (Intercept)   1.0007175     1 -1.490237e+00 -1.451065e+00 -1.414646e+00
## settle        1.0023656     1  7.737261e-03  6.175227e-02  1.133695e-01
## icowsal       0.9998046     1 -8.506677e-02 -7.622923e-02 -6.703335e-02
## mean_PPD      1.0019445     1  1.717725e-01  1.771765e-01  1.826196e-01
## log-posterior 1.0023500     1 -1.243287e+04 -1.243023e+04 -1.242923e+04
##                  MCSE_Q2.5     MCSE_Q25     MCSE_Q50     MCSE_Q75   MCSE_Q97.5
## (Intercept)   0.0007631618 6.367441e-04 5.152920e-04 6.632879e-04 0.0012164650
## settle        0.0016379871 9.135333e-04 6.262725e-04 7.199390e-04 0.0016053683
## icowsal       0.0002336172 1.196279e-04 1.297373e-04 1.747803e-04 0.0004125261
## mean_PPD      0.0001304413 9.317233e-05 7.453787e-05 7.453787e-05 0.0001304413
## log-posterior 0.1884294098 5.668871e-02 3.306831e-02 1.873379e-02 0.0105975363
##                    MCSE_SD Bulk_ESS Tail_ESS
## (Intercept)   3.258325e-04     2542     1892
## settle        4.530872e-04     2637     2061
## icowsal       7.681632e-05     2628     1963
## mean_PPD      4.807250e-05     2282     2322
## log-posterior 2.287412e-02     1430     1925

check_hmc_diagnostics(mod2$stanfit)

## 
## Divergences:

## 0 of 3000 iterations ended with a divergence.

## 
## Tree depth:

## 0 of 3000 iterations saturated the maximum tree depth of 15.

## 
## Energy:

## E-BFMI indicated no pathological behavior.

posterior_vs_prior(mod2)

## 
## Drawing from prior...

0.3 Model-2: Post-estimation

##Bayesplot diagnostic plots
mcmc_acf(mod2)

mcmc_trace(mod2)

mcmc_combo(mod2)

##ggmcmc plots 
gmod2 = ggs(mod2)
ggs_autocorrelation(gmod2)

ggs_traceplot(gmod2)

ggs_density(gmod2)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod2)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod2, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod2)

## Computation of Bayes factors: sampling priors, please wait...

## Loading required namespace: logspline

## Warning in logspline::logspline(posterior): too much data close together

## Warning in logspline::logspline(posterior): re-ran with oldlogspline

## Warning in logspline::logspline(x): too much data close together

## Warning in logspline::logspline(x): re-ran with oldlogspline

plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod2, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod2)

plot(res5)

## Picking joint bandwidth of 0.00269

## Warning: Removed 658 rows containing non-finite values (stat_density_ridges).

#launch_shinystan(mod2)

0.4 Model-3: This model includes economic and border strength variable of the Gibler’s model (Gibler 2007)

This model includes a pertinent economic variable, gdp per capita, that could explain the evolution of joint democracies. This model also includes border strength variables such as capability ratio, years since the last MID outbreak, duration of the dyad and so on. (Refer: Gibler 2007).

mod3 = stan_glm(formula = jointdem ~ settle + icowsal + smallgdp + lastmid + capratio + durdyad, chains =4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salient, refresh = 0)

summary(mod3)

## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal + smallgdp + lastmid + capratio + 
##     durdyad
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 9450
##  predictors:   7
## 
## Estimates:
##               mean   sd    10%   50%   90%
## (Intercept) -17.0    0.4 -17.6 -17.0 -16.5
## settle        0.5    0.1   0.4   0.5   0.7
## icowsal       0.0    0.0   0.0   0.0   0.0
## smallgdp      1.9    0.1   1.8   1.9   1.9
## lastmid       0.0    0.0   0.0   0.0   0.0
## capratio     -0.4    0.1  -0.6  -0.4  -0.2
## durdyad       0.0    0.0   0.0   0.0   0.0
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  3675 
## settle        0.0  1.0  5649 
## icowsal       0.0  1.0  6047 
## smallgdp      0.0  1.0  3904 
## lastmid       0.0  1.0  5989 
## capratio      0.0  1.0  6015 
## durdyad       0.0  1.0  5727 
## mean_PPD      0.0  1.0  7382 
## log-posterior 0.0  1.0  3069 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

print(monitor(mod3$stanfit))

## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                    Q5     Q50     Q95    Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)     -17.8   -17.0   -16.3   -17.0 0.4     1     3697     4825
## settle            0.3     0.5     0.7     0.5 0.1     1     5661     4819
## icowsal           0.0     0.0     0.0     0.0 0.0     1     6073     5544
## smallgdp          1.8     1.9     1.9     1.9 0.1     1     3929     5129
## lastmid           0.0     0.0     0.0     0.0 0.0     1     6013     5186
## capratio         -0.6    -0.4    -0.2    -0.4 0.1     1     6061     4520
## durdyad           0.0     0.0     0.0     0.0 0.0     1     5600     5117
## mean_PPD          0.2     0.2     0.2     0.2 0.0     1     7437     6509
## log-posterior -2528.3 -2524.4 -2522.3 -2524.8 1.9     1     3205     4456
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
##                        mean      se_mean           sd          2.5%
## (Intercept)   -1.704534e+01 7.231475e-03 0.4399890621 -1.792243e+01
## settle         5.289383e-01 1.612268e-03 0.1213965804  2.852344e-01
## icowsal        2.890979e-02 1.507604e-04 0.0117481697  6.307100e-03
## smallgdp       1.863280e+00 8.224589e-04 0.0515425385  1.764021e+00
## lastmid        1.269685e-02 1.752941e-05 0.0013589728  1.002315e-02
## capratio      -3.924435e-01 1.668183e-03 0.1297988148 -6.504382e-01
## durdyad        3.039272e-03 1.041010e-05 0.0007787774  1.506502e-03
## mean_PPD       1.614187e-01 4.802559e-05 0.0041334938  1.534392e-01
## log-posterior -2.524754e+03 3.381714e-02 1.8759881592 -2.529218e+03
##                         25%           50%           75%         97.5% n_eff
## (Intercept)   -1.733891e+01 -1.704129e+01 -1.675017e+01 -1.619753e+01  3672
## settle         4.484956e-01  5.287606e-01  6.107025e-01  7.637310e-01  5651
## icowsal        2.084717e-02  2.880917e-02  3.707350e-02  5.197817e-02  6048
## smallgdp       1.828362e+00  1.862972e+00  1.898137e+00  1.966238e+00  3902
## lastmid        1.178420e-02  1.271333e-02  1.361789e-02  1.532951e-02  5990
## capratio      -4.788604e-01 -3.904964e-01 -3.043793e-01 -1.397622e-01  6018
## durdyad        2.517808e-03  3.042298e-03  3.577833e-03  4.549197e-03  5588
## mean_PPD       1.586243e-01  1.613757e-01  1.641270e-01  1.697354e-01  7384
## log-posterior -2.525768e+03 -2.524418e+03 -2.523402e+03 -2.522122e+03  3063
##                   Rhat valid            Q5           Q50           Q95
## (Intercept)   1.001351     1 -1.777368e+01 -1.704129e+01 -1.632918e+01
## settle        1.000639     1  3.268329e-01  5.287606e-01  7.277428e-01
## icowsal       1.000242     1  9.795861e-03  2.880917e-02  4.807407e-02
## smallgdp      1.001135     1  1.778852e+00  1.862972e+00  1.949518e+00
## lastmid       1.000109     1  1.043836e-02  1.271333e-02  1.489488e-02
## capratio      1.000822     1 -6.072416e-01 -3.904964e-01 -1.804681e-01
## durdyad       1.000472     1  1.753799e-03  3.042298e-03  4.315895e-03
## mean_PPD      1.000271     1  1.547090e-01  1.613757e-01  1.682540e-01
## log-posterior 1.001339     1 -2.528254e+03 -2.524418e+03 -2.522339e+03
##                  MCSE_Q2.5     MCSE_Q25     MCSE_Q50     MCSE_Q75   MCSE_Q97.5
## (Intercept)   1.993227e-02 1.002723e-02 7.285251e-03 8.268266e-03 1.441800e-02
## settle        4.482032e-03 1.972999e-03 2.117089e-03 2.242780e-03 3.604682e-03
## icowsal       5.236992e-04 1.812256e-04 1.864408e-04 2.371631e-04 4.245076e-04
## smallgdp      2.024980e-03 1.054862e-03 8.474534e-04 1.104072e-03 1.930038e-03
## lastmid       5.132472e-05 2.221922e-05 2.050555e-05 2.765149e-05 4.580265e-05
## capratio      4.362591e-03 2.150402e-03 2.204191e-03 1.905483e-03 4.216249e-03
## durdyad       2.036545e-05 1.264303e-05 1.182988e-05 1.355986e-05 2.021594e-05
## mean_PPD      1.587302e-04 1.058201e-04 5.291005e-05 1.058201e-04 1.587302e-04
## log-posterior 1.307794e-01 5.456179e-02 2.935567e-02 3.419047e-02 2.610160e-02
##                    MCSE_SD Bulk_ESS Tail_ESS
## (Intercept)   5.113828e-03     3697     4825
## settle        1.144924e-03     5661     4819
## icowsal       1.093636e-04     6073     5544
## smallgdp      5.816095e-04     3929     5129
## lastmid       1.239577e-05     6013     5186
## capratio      1.242405e-03     6061     4520
## durdyad       7.384539e-06     5600     5117
## mean_PPD      3.401674e-05     7437     6509
## log-posterior 2.391535e-02     3205     4456

##.Model-3: Post-Estimation of the model

##Bayesplot diagnostic plots
mcmc_acf(mod3)

mcmc_trace(mod3)

mcmc_combo(mod3)

##ggmcmc post-diagnostic plots
gmod3 = ggs(mod3)
ggs_autocorrelation(gmod3)

ggs_traceplot(gmod3)

ggs_density(gmod3)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod3)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod3, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod3)
plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod3, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod3)

plot(res5)

#launch_shinystan(mod3)

0.5 Model-3 Comparison: LOO method

lfit3 = loo(mod3)
print(lfit3)

## 
## Computed from 7000 by 9450 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo  -2519.4  57.4
## p_loo         7.3   0.2
## looic      5038.7 114.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.

plot(lfit3)

0.6 Model-4: Including all other major border strength control variables in the Gibler’s model (Refer: Gibler 2007)

mod4 = stan_glm(formula = jointdem ~ settle + icowsal + durdyad + smallgdp +  lastmid + capratio + samecol, chains = 4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salient, refresh = 0)

summary(mod4)

## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal + durdyad + smallgdp + lastmid + 
##     capratio + samecol
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 9450
##  predictors:   8
## 
## Estimates:
##               mean   sd    10%   50%   90%
## (Intercept) -15.8    0.5 -16.4 -15.8 -15.2
## settle        0.5    0.1   0.3   0.5   0.6
## icowsal       0.1    0.0   0.1   0.1   0.1
## durdyad       0.0    0.0   0.0   0.0   0.0
## smallgdp      1.8    0.1   1.7   1.8   1.9
## lastmid       0.0    0.0   0.0   0.0   0.0
## capratio     -0.6    0.1  -0.8  -0.6  -0.4
## samecol      -1.2    0.1  -1.3  -1.2  -1.1
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  5144 
## settle        0.0  1.0  6425 
## icowsal       0.0  1.0  5782 
## durdyad       0.0  1.0  6794 
## smallgdp      0.0  1.0  5231 
## lastmid       0.0  1.0  7197 
## capratio      0.0  1.0  7554 
## samecol       0.0  1.0  5990 
## mean_PPD      0.0  1.0  8076 
## log-posterior 0.0  1.0  3315 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).

monitor(mod4$stanfit)

## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                    Q5     Q50     Q95    Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)     -16.6   -15.8   -15.0   -15.8 0.5     1     5189     4413
## settle            0.3     0.5     0.7     0.5 0.1     1     6462     4875
## icowsal           0.0     0.1     0.1     0.1 0.0     1     5829     5305
## durdyad           0.0     0.0     0.0     0.0 0.0     1     6832     5826
## smallgdp          1.7     1.8     1.9     1.8 0.1     1     5276     4855
## lastmid           0.0     0.0     0.0     0.0 0.0     1     7235     5517
## capratio         -0.8    -0.6    -0.4    -0.6 0.1     1     7637     4589
## samecol          -1.4    -1.2    -1.1    -1.2 0.1     1     5997     5325
## mean_PPD          0.2     0.2     0.2     0.2 0.0     1     8120     6429
## log-posterior -2446.9 -2442.7 -2440.4 -2443.1 2.0     1     3369     4608
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).

tidy(mod4)

## # A tibble: 8 x 3
##   term          estimate std.error
##   <chr>            <dbl>     <dbl>
## 1 (Intercept) -15.8       0.462   
## 2 settle        0.478     0.113   
## 3 icowsal       0.0655    0.0118  
## 4 durdyad       0.000174  0.000790
## 5 smallgdp      1.80      0.0539  
## 6 lastmid       0.0112    0.00132 
## 7 capratio     -0.590     0.138   
## 8 samecol      -1.21      0.0933

augment(mod4)

## # A tibble: 9,450 x 12
##    .rownames jointdem  settle icowsal durdyad smallgdp lastmid capratio samecol
##    <chr>        <dbl> <dbl+l>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>   <dbl>
##  1 1                1 1 [Set…       5      97     9.38      54   0.0768       1
##  2 2                1 1 [Set…       5      97     9.53       3   0.0842       1
##  3 3                1 1 [Set…       0      97     9.13      48   0.0587       1
##  4 4                1 1 [Set…       0      97     8.62      28   0.0427       1
##  5 5                1 1 [Set…       0      97     8.88      39   0.0560       1
##  6 6                1 1 [Set…       0      97     9.12      47   0.0576       1
##  7 7                1 1 [Set…       5      97     9.52       3   0.0859       1
##  8 8                1 1 [Set…       5      97     9.27      52   0.0707       1
##  9 9                1 1 [Set…       0      97     9.17      49   0.0561       1
## 10 10               1 1 [Set…       5      97     9.32      53   0.0729       1
## # … with 9,440 more rows, and 3 more variables: .fitted <dbl>, .se.fit <dbl>,
## #   .resid <dbl>

0.7 Model-4: Post-Estimation

mcmc_acf(mod4)

mcmc_trace(mod4)

mcmc_combo(mod4)

##ggmcmc pack
gmod4 = ggs(mod4)
ggs_autocorrelation(gmod4)

ggs_traceplot(gmod4)

ggs_density(gmod4)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod4)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod4, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod4)

## Computation of Bayes factors: sampling priors, please wait...

## Warning in logspline::logspline(posterior): Not all models could be fitted

## Warning in logspline::logspline(x): Not all models could be fitted

plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod4, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod4)

plot(res5)

## Picking joint bandwidth of 0.00719

## Warning: Removed 5383 rows containing non-finite values (stat_density_ridges).

#launch_shinystan(mod4)

0.8 Model-4: Model Comparison: LOO method

lfit4 = loo(mod4)
print(lfit4)

## 
## Computed from 7000 by 9450 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo  -2436.9  58.9
## p_loo         7.9   0.2
## looic      4873.8 117.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.

plot(lfit4)