##. Loading dataset into the r environment

library(knitr); library(rmarkdown);
library(rstanarm); library(rstan);
library(tidybayes); 
library(tidyverse); 
library(easystats); 
## # Attaching packages
## ✔ insight     0.8.2   ✔ bayestestR  0.5.2
## ✔ performance 0.4.4   ✔ parameters  0.6.0
## ✔ see         0.4.1   ✔ effectsize  0.2.0
## ✔ correlation 0.1.0   ✔ estimate    0.1.0
## ✔ report      0.1.0
library(rmarkdown); 
library(bayesplot); library(projpred); library(loo)
library(haven); library(shinystan)
library(broom); library(ggmcmc)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
library(haven)
#salience = salience
#salience <- read_dta("/Users/Karthy/Downloads/salience.dta")
#head(salience[, 1:10])
#glimpse(salience[, 1:10])

##.Subsetting the data to replicate Gibler’s 2007 model

salient = salience %>% filter(cont == 1, year >= 1945 & year <= 2000)

##. Descriptive Statistics of major variables in the model

library(skimr)

vars = c("jointdem", "icowsal", "settle", "smallgdp", "samecol", "civwar", "lastmid", "logmntpct","capratio")
skim(salience[, vars])
Data summary
Name salience[, vars]
Number of rows 41223
Number of columns 9
_______________________
Column type frequency:
character 1
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
settle 0 1 1 1 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
jointdem 8322 0.80 0.22 0.41 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
icowsal 8260 0.80 1.63 3.32 0.00 0.00 0.00 0.00 12.00 ▇▁▁▁▁
smallgdp 24839 0.40 7.35 1.01 3.87 6.57 7.29 8.07 9.83 ▁▂▇▆▂
samecol 0 1.00 0.25 0.43 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
civwar 0 1.00 0.01 0.12 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
lastmid 8260 0.80 24.17 29.74 0.00 5.00 14.00 32.00 185.00 ▇▁▁▁▁
logmntpct 24654 0.40 0.33 0.30 0.00 0.07 0.27 0.57 1.00 ▇▃▃▂▂
capratio 2222 0.95 0.29 0.27 0.00 0.06 0.20 0.47 1.00 ▇▃▂▂▁
glancedata::glance_data(salience[, vars])
## # A tibble: 9 x 11
##   name  type  distinct_values minimum median maximum    mean     sd
##   <chr> <chr>           <int>   <dbl>  <dbl>   <dbl>   <dbl>  <dbl>
## 1 join… nume…               2 0.       0        1     0.220   0.414
## 2 icow… nume…              12 0.       0       12     1.63    3.32 
## 3 sett… nume…               2 0.       0        1     0.329   0.470
## 4 smal… nume…            3578 3.87e+0  7.29     9.83  7.35    1.01 
## 5 same… nume…               2 0.       0        1     0.246   0.430
## 6 civw… nume…               2 0.       0        1     0.0135  0.115
## 7 last… nume…             186 0.      14      185    24.2    29.7  
## 8 logm… nume…             355 0.       0.267    1     0.331   0.299
## 9 capr… nume…           38981 3.98e-5  0.203    1.00  0.291   0.271
## # … with 3 more variables: na_proportion <dbl>, count <chr>,
## #   sample_values <chr>
glancedata::plot_discrete_vars(salience[, vars])

## TableGrob (3 x 3) "arrange": 6 grobs
##          z     cells    name                grob
## jointdem 1 (1-1,1-1) arrange      gtable[layout]
## icowsal  2 (1-1,2-2) arrange      gtable[layout]
## settle   3 (1-1,3-3) arrange      gtable[layout]
## samecol  4 (2-2,1-1) arrange      gtable[layout]
## civwar   5 (2-2,2-2) arrange      gtable[layout]
##          6 (3-3,1-3) arrange text[GRID.text.203]
glancedata::plot_numerical_vars(salience[, vars], plot_type = "histogram")

##. Visualizing correlations among the major variables. The correlation plot visualizes the possible correlation between major variables used in the study.

GGally::ggpairs(salience[, 1:10])

##.Bayesian regression analysis- Model-1 (logistic regression) I test whether settled border increases the chances of bordering countries becoming democracies. For that purpose, I use the rstanarm package, which utilizes Stan program to fit the bayesian model. It uses the Hamiltonian Monte Carlo Sampling to fit the model. It is a single variable model.

mod1 = stan_glm(formula = jointdem ~ settle, chains =4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salience, refresh = 0)


summary(mod1)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 32901
##  predictors:   2
## 
## Estimates:
##               mean   sd   10%   50%   90%
## (Intercept) -1.2    0.0 -1.2  -1.2  -1.1 
## settle      -0.3    0.0 -0.4  -0.3  -0.3 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  4649 
## settle        0.0  1.0  4498 
## mean_PPD      0.0  1.0  4965 
## log-posterior 0.0  1.0  3511 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
monitor(mod1$stanfit)
## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                     Q5      Q50      Q95     Mean SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)       -1.2     -1.2     -1.1     -1.2  0     1     4665     4514
## settle            -0.4     -0.3     -0.3     -0.3  0     1     4528     4432
## mean_PPD           0.2      0.2      0.2      0.2  0     1     4970     5641
## log-posterior -17271.6 -17269.4 -17268.7 -17269.7  1     1     3596     4142
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
##Hamiltonian montecarlo sampling diagnostics suggests the model has not many glaring issues during fitting
check_hmc_diagnostics(mod1$stanfit)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
#.Posterior interval of the model
posterior_interval(mod1)
##                     5%        95%
## (Intercept) -1.1812979 -1.1288610
## settle      -0.3709075 -0.2772931
posterior_vs_prior(mod1)

bayestestR::hdi(mod1, ci = c(0.9))
## # Highest Density Interval
## 
## Parameter   |        90% HDI
## ----------------------------
## (Intercept) | [-1.18, -1.13]
## settle      | [-0.37, -0.28]

0.1 Model-1 Post-Estimation:

This section describes the fit of the bayesian model by examining the MCMC (Monte Carlo Marko Chains) convergence in the model. As one can see from the model, the convergence fits better with not much divergences of the multiple chains. First, I visualize the plotting of the intercept and the single variabl in the model: settled borders variable. Followed by that, I ran an auto-correlation plot to check whether there is much correlation between samples. Since HMC uses dependent sampling, the fitted model should not supposed to have a lot of independence. An imperfect autocorrelation plot will have a scattered appearance of various samples.Thus, the autocorrelation plot suggests the model has no autocorrelation issues. Also, the trace plot for the model indicates the model has good matching with various samples. As one can observe from the traceplot graph(3rd graph), there is good matching of various chains in the model. As before suggested by the HMC (Hamiltonian Monte Carlo) diagnostics and Rhat score of one, the model suffers from no divergences of chains during model fitting.

##Bayesplot diagnostic plots
mcmc_acf(mod1)

mcmc_trace(mod1)

mcmc_combo(mod1)

##ggmcmc plots 
gmod1 = ggs(mod1)
ggs_autocorrelation(gmod1)

ggs_traceplot(gmod1)

ggs_density(gmod1)

0.2 Model-2: This model adds the salience, or the Territorial Salience, variable

In the model, I included the salience, the Territorial Salience, variable in the model. The variable has a negative relationship with the presence of joint democracies. The presence of salience decreases the likelihood of joint democracies. The model, like the previous one, has no issues with divergence. The HMS diagnostics tests suggest there are no inherent issues with the model fitting or during the sampling process.

mod2 = stan_glm(formula = jointdem ~ settle + icowsal, chains =4, iter = 1000, warmup = 250, family = binomial(link = "logit"), data = salience, refresh = 0)

summary(mod2)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal
##  algorithm:    sampling
##  sample:       3000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 26832
##  predictors:   3
## 
## Estimates:
##               mean   sd   10%   50%   90%
## (Intercept) -1.5    0.0 -1.5  -1.5  -1.4 
## settle       0.1    0.0  0.0   0.1   0.1 
## icowsal     -0.1    0.0 -0.1  -0.1  -0.1 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  2512 
## settle        0.0  1.0  2672 
## icowsal       0.0  1.0  2588 
## mean_PPD      0.0  1.0  2262 
## log-posterior 0.0  1.0  1406 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
print(monitor(mod2$stanfit))
## Inference for the input samples (4 chains: each with iter = 1000; warmup = 0):
## 
##                     Q5      Q50      Q95     Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)       -1.5     -1.5     -1.4     -1.5 0.0     1     2542     1892
## settle             0.0      0.1      0.1      0.1 0.0     1     2637     2061
## icowsal           -0.1     -0.1     -0.1     -0.1 0.0     1     2628     1963
## mean_PPD           0.2      0.2      0.2      0.2 0.0     1     2282     2322
## log-posterior -12432.9 -12430.2 -12429.2 -12430.5 1.2     1     1430     1925
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
##                        mean      se_mean          sd          2.5%
## (Intercept)   -1.451821e+00 4.607438e-04 0.023210818 -1.496372e+00
## settle         6.117146e-02 6.232294e-04 0.031992404 -3.323830e-03
## icowsal       -7.615104e-02 1.086225e-04 0.005549431 -8.724073e-02
## mean_PPD       1.771975e-01 6.790552e-05 0.003241624  1.708399e-01
## log-posterior -1.243053e+04 3.234220e-02 1.214428085 -1.243370e+04
##                         25%           50%           75%         97.5% n_eff
## (Intercept)   -1.467701e+00 -1.451065e+00 -1.436375e+00 -1.405884e+00  2513
## settle         4.086702e-02  6.175227e-02  8.312635e-02  1.229505e-01  2672
## icowsal       -7.994660e-02 -7.622923e-02 -7.244241e-02 -6.533743e-02  2589
## mean_PPD       1.750149e-01  1.771765e-01  1.793754e-01  1.835514e-01  2263
## log-posterior -1.243104e+04 -1.243023e+04 -1.242964e+04 -1.242918e+04  1407
##                    Rhat valid            Q5           Q50           Q95
## (Intercept)   1.0007175     1 -1.490237e+00 -1.451065e+00 -1.414646e+00
## settle        1.0023656     1  7.737261e-03  6.175227e-02  1.133695e-01
## icowsal       0.9998046     1 -8.506677e-02 -7.622923e-02 -6.703335e-02
## mean_PPD      1.0019445     1  1.717725e-01  1.771765e-01  1.826196e-01
## log-posterior 1.0023500     1 -1.243287e+04 -1.243023e+04 -1.242923e+04
##                  MCSE_Q2.5     MCSE_Q25     MCSE_Q50     MCSE_Q75   MCSE_Q97.5
## (Intercept)   0.0007631618 6.367441e-04 5.152920e-04 6.632879e-04 0.0012164650
## settle        0.0016379871 9.135333e-04 6.262725e-04 7.199390e-04 0.0016053683
## icowsal       0.0002336172 1.196279e-04 1.297373e-04 1.747803e-04 0.0004125261
## mean_PPD      0.0001304413 9.317233e-05 7.453787e-05 7.453787e-05 0.0001304413
## log-posterior 0.1884294098 5.668871e-02 3.306831e-02 1.873379e-02 0.0105975363
##                    MCSE_SD Bulk_ESS Tail_ESS
## (Intercept)   3.258325e-04     2542     1892
## settle        4.530872e-04     2637     2061
## icowsal       7.681632e-05     2628     1963
## mean_PPD      4.807250e-05     2282     2322
## log-posterior 2.287412e-02     1430     1925
check_hmc_diagnostics(mod2$stanfit)
## 
## Divergences:
## 0 of 3000 iterations ended with a divergence.
## 
## Tree depth:
## 0 of 3000 iterations saturated the maximum tree depth of 15.
## 
## Energy:
## E-BFMI indicated no pathological behavior.
posterior_vs_prior(mod2)
## 
## Drawing from prior...

0.3 Model-2: Post-estimation

##Bayesplot diagnostic plots
mcmc_acf(mod2)

mcmc_trace(mod2)

mcmc_combo(mod2)

##ggmcmc plots 
gmod2 = ggs(mod2)
ggs_autocorrelation(gmod2)

ggs_traceplot(gmod2)

ggs_density(gmod2)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod2)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod2, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod2)
## Computation of Bayes factors: sampling priors, please wait...
## Loading required namespace: logspline
## Warning in logspline::logspline(posterior): too much data close together
## Warning in logspline::logspline(posterior): re-ran with oldlogspline
## Warning in logspline::logspline(x): too much data close together
## Warning in logspline::logspline(x): re-ran with oldlogspline
plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod2, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod2)

plot(res5)
## Picking joint bandwidth of 0.00269
## Warning: Removed 658 rows containing non-finite values (stat_density_ridges).

#launch_shinystan(mod2)

0.4 Model-3: This model includes economic and border strength variable of the Gibler’s model (Gibler 2007)

This model includes a pertinent economic variable, gdp per capita, that could explain the evolution of joint democracies. This model also includes border strength variables such as capability ratio, years since the last MID outbreak, duration of the dyad and so on. (Refer: Gibler 2007).

mod3 = stan_glm(formula = jointdem ~ settle + icowsal + smallgdp + lastmid + capratio + durdyad, chains =4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salient, refresh = 0)

summary(mod3)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal + smallgdp + lastmid + capratio + 
##     durdyad
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 9450
##  predictors:   7
## 
## Estimates:
##               mean   sd    10%   50%   90%
## (Intercept) -17.0    0.4 -17.6 -17.0 -16.5
## settle        0.5    0.1   0.4   0.5   0.7
## icowsal       0.0    0.0   0.0   0.0   0.0
## smallgdp      1.9    0.1   1.8   1.9   1.9
## lastmid       0.0    0.0   0.0   0.0   0.0
## capratio     -0.4    0.1  -0.6  -0.4  -0.2
## durdyad       0.0    0.0   0.0   0.0   0.0
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  3675 
## settle        0.0  1.0  5649 
## icowsal       0.0  1.0  6047 
## smallgdp      0.0  1.0  3904 
## lastmid       0.0  1.0  5989 
## capratio      0.0  1.0  6015 
## durdyad       0.0  1.0  5727 
## mean_PPD      0.0  1.0  7382 
## log-posterior 0.0  1.0  3069 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
print(monitor(mod3$stanfit))
## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                    Q5     Q50     Q95    Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)     -17.8   -17.0   -16.3   -17.0 0.4     1     3697     4825
## settle            0.3     0.5     0.7     0.5 0.1     1     5661     4819
## icowsal           0.0     0.0     0.0     0.0 0.0     1     6073     5544
## smallgdp          1.8     1.9     1.9     1.9 0.1     1     3929     5129
## lastmid           0.0     0.0     0.0     0.0 0.0     1     6013     5186
## capratio         -0.6    -0.4    -0.2    -0.4 0.1     1     6061     4520
## durdyad           0.0     0.0     0.0     0.0 0.0     1     5600     5117
## mean_PPD          0.2     0.2     0.2     0.2 0.0     1     7437     6509
## log-posterior -2528.3 -2524.4 -2522.3 -2524.8 1.9     1     3205     4456
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
##                        mean      se_mean           sd          2.5%
## (Intercept)   -1.704534e+01 7.231475e-03 0.4399890621 -1.792243e+01
## settle         5.289383e-01 1.612268e-03 0.1213965804  2.852344e-01
## icowsal        2.890979e-02 1.507604e-04 0.0117481697  6.307100e-03
## smallgdp       1.863280e+00 8.224589e-04 0.0515425385  1.764021e+00
## lastmid        1.269685e-02 1.752941e-05 0.0013589728  1.002315e-02
## capratio      -3.924435e-01 1.668183e-03 0.1297988148 -6.504382e-01
## durdyad        3.039272e-03 1.041010e-05 0.0007787774  1.506502e-03
## mean_PPD       1.614187e-01 4.802559e-05 0.0041334938  1.534392e-01
## log-posterior -2.524754e+03 3.381714e-02 1.8759881592 -2.529218e+03
##                         25%           50%           75%         97.5% n_eff
## (Intercept)   -1.733891e+01 -1.704129e+01 -1.675017e+01 -1.619753e+01  3672
## settle         4.484956e-01  5.287606e-01  6.107025e-01  7.637310e-01  5651
## icowsal        2.084717e-02  2.880917e-02  3.707350e-02  5.197817e-02  6048
## smallgdp       1.828362e+00  1.862972e+00  1.898137e+00  1.966238e+00  3902
## lastmid        1.178420e-02  1.271333e-02  1.361789e-02  1.532951e-02  5990
## capratio      -4.788604e-01 -3.904964e-01 -3.043793e-01 -1.397622e-01  6018
## durdyad        2.517808e-03  3.042298e-03  3.577833e-03  4.549197e-03  5588
## mean_PPD       1.586243e-01  1.613757e-01  1.641270e-01  1.697354e-01  7384
## log-posterior -2.525768e+03 -2.524418e+03 -2.523402e+03 -2.522122e+03  3063
##                   Rhat valid            Q5           Q50           Q95
## (Intercept)   1.001351     1 -1.777368e+01 -1.704129e+01 -1.632918e+01
## settle        1.000639     1  3.268329e-01  5.287606e-01  7.277428e-01
## icowsal       1.000242     1  9.795861e-03  2.880917e-02  4.807407e-02
## smallgdp      1.001135     1  1.778852e+00  1.862972e+00  1.949518e+00
## lastmid       1.000109     1  1.043836e-02  1.271333e-02  1.489488e-02
## capratio      1.000822     1 -6.072416e-01 -3.904964e-01 -1.804681e-01
## durdyad       1.000472     1  1.753799e-03  3.042298e-03  4.315895e-03
## mean_PPD      1.000271     1  1.547090e-01  1.613757e-01  1.682540e-01
## log-posterior 1.001339     1 -2.528254e+03 -2.524418e+03 -2.522339e+03
##                  MCSE_Q2.5     MCSE_Q25     MCSE_Q50     MCSE_Q75   MCSE_Q97.5
## (Intercept)   1.993227e-02 1.002723e-02 7.285251e-03 8.268266e-03 1.441800e-02
## settle        4.482032e-03 1.972999e-03 2.117089e-03 2.242780e-03 3.604682e-03
## icowsal       5.236992e-04 1.812256e-04 1.864408e-04 2.371631e-04 4.245076e-04
## smallgdp      2.024980e-03 1.054862e-03 8.474534e-04 1.104072e-03 1.930038e-03
## lastmid       5.132472e-05 2.221922e-05 2.050555e-05 2.765149e-05 4.580265e-05
## capratio      4.362591e-03 2.150402e-03 2.204191e-03 1.905483e-03 4.216249e-03
## durdyad       2.036545e-05 1.264303e-05 1.182988e-05 1.355986e-05 2.021594e-05
## mean_PPD      1.587302e-04 1.058201e-04 5.291005e-05 1.058201e-04 1.587302e-04
## log-posterior 1.307794e-01 5.456179e-02 2.935567e-02 3.419047e-02 2.610160e-02
##                    MCSE_SD Bulk_ESS Tail_ESS
## (Intercept)   5.113828e-03     3697     4825
## settle        1.144924e-03     5661     4819
## icowsal       1.093636e-04     6073     5544
## smallgdp      5.816095e-04     3929     5129
## lastmid       1.239577e-05     6013     5186
## capratio      1.242405e-03     6061     4520
## durdyad       7.384539e-06     5600     5117
## mean_PPD      3.401674e-05     7437     6509
## log-posterior 2.391535e-02     3205     4456

##.Model-3: Post-Estimation of the model

##Bayesplot diagnostic plots
mcmc_acf(mod3)

mcmc_trace(mod3)

mcmc_combo(mod3)

##ggmcmc post-diagnostic plots
gmod3 = ggs(mod3)
ggs_autocorrelation(gmod3)

ggs_traceplot(gmod3)

ggs_density(gmod3)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod3)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod3, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod3)
plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod3, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod3)

plot(res5)

#launch_shinystan(mod3)

0.5 Model-3 Comparison: LOO method

lfit3 = loo(mod3)
print(lfit3)
## 
## Computed from 7000 by 9450 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo  -2519.4  57.4
## p_loo         7.3   0.2
## looic      5038.7 114.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
plot(lfit3)

0.6 Model-4: Including all other major border strength control variables in the Gibler’s model (Refer: Gibler 2007)

mod4 = stan_glm(formula = jointdem ~ settle + icowsal + durdyad + smallgdp +  lastmid + capratio + samecol, chains = 4, iter = 2000, warmup = 250, family = binomial(link = "logit"), data = salient, refresh = 0)

summary(mod4)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      jointdem ~ settle + icowsal + durdyad + smallgdp + lastmid + 
##     capratio + samecol
##  algorithm:    sampling
##  sample:       7000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 9450
##  predictors:   8
## 
## Estimates:
##               mean   sd    10%   50%   90%
## (Intercept) -15.8    0.5 -16.4 -15.8 -15.2
## settle        0.5    0.1   0.3   0.5   0.6
## icowsal       0.1    0.0   0.1   0.1   0.1
## durdyad       0.0    0.0   0.0   0.0   0.0
## smallgdp      1.8    0.1   1.7   1.8   1.9
## lastmid       0.0    0.0   0.0   0.0   0.0
## capratio     -0.6    0.1  -0.8  -0.6  -0.4
## samecol      -1.2    0.1  -1.3  -1.2  -1.1
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.2    0.0  0.2   0.2   0.2  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##               mcse Rhat n_eff
## (Intercept)   0.0  1.0  5144 
## settle        0.0  1.0  6425 
## icowsal       0.0  1.0  5782 
## durdyad       0.0  1.0  6794 
## smallgdp      0.0  1.0  5231 
## lastmid       0.0  1.0  7197 
## capratio      0.0  1.0  7554 
## samecol       0.0  1.0  5990 
## mean_PPD      0.0  1.0  8076 
## log-posterior 0.0  1.0  3315 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
monitor(mod4$stanfit)
## Inference for the input samples (4 chains: each with iter = 2000; warmup = 0):
## 
##                    Q5     Q50     Q95    Mean  SD  Rhat Bulk_ESS Tail_ESS
## (Intercept)     -16.6   -15.8   -15.0   -15.8 0.5     1     5189     4413
## settle            0.3     0.5     0.7     0.5 0.1     1     6462     4875
## icowsal           0.0     0.1     0.1     0.1 0.0     1     5829     5305
## durdyad           0.0     0.0     0.0     0.0 0.0     1     6832     5826
## smallgdp          1.7     1.8     1.9     1.8 0.1     1     5276     4855
## lastmid           0.0     0.0     0.0     0.0 0.0     1     7235     5517
## capratio         -0.8    -0.6    -0.4    -0.6 0.1     1     7637     4589
## samecol          -1.4    -1.2    -1.1    -1.2 0.1     1     5997     5325
## mean_PPD          0.2     0.2     0.2     0.2 0.0     1     8120     6429
## log-posterior -2446.9 -2442.7 -2440.4 -2443.1 2.0     1     3369     4608
## 
## For each parameter, Bulk_ESS and Tail_ESS are crude measures of 
## effective sample size for bulk and tail quantities respectively (an ESS > 100 
## per chain is considered good), and Rhat is the potential scale reduction 
## factor on rank normalized split chains (at convergence, Rhat <= 1.05).
tidy(mod4)
## # A tibble: 8 x 3
##   term          estimate std.error
##   <chr>            <dbl>     <dbl>
## 1 (Intercept) -15.8       0.462   
## 2 settle        0.478     0.113   
## 3 icowsal       0.0655    0.0118  
## 4 durdyad       0.000174  0.000790
## 5 smallgdp      1.80      0.0539  
## 6 lastmid       0.0112    0.00132 
## 7 capratio     -0.590     0.138   
## 8 samecol      -1.21      0.0933
augment(mod4)
## # A tibble: 9,450 x 12
##    .rownames jointdem  settle icowsal durdyad smallgdp lastmid capratio samecol
##    <chr>        <dbl> <dbl+l>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>   <dbl>
##  1 1                1 1 [Set…       5      97     9.38      54   0.0768       1
##  2 2                1 1 [Set…       5      97     9.53       3   0.0842       1
##  3 3                1 1 [Set…       0      97     9.13      48   0.0587       1
##  4 4                1 1 [Set…       0      97     8.62      28   0.0427       1
##  5 5                1 1 [Set…       0      97     8.88      39   0.0560       1
##  6 6                1 1 [Set…       0      97     9.12      47   0.0576       1
##  7 7                1 1 [Set…       5      97     9.52       3   0.0859       1
##  8 8                1 1 [Set…       5      97     9.27      52   0.0707       1
##  9 9                1 1 [Set…       0      97     9.17      49   0.0561       1
## 10 10               1 1 [Set…       5      97     9.32      53   0.0729       1
## # … with 9,440 more rows, and 3 more variables: .fitted <dbl>, .se.fit <dbl>,
## #   .resid <dbl>

0.7 Model-4: Post-Estimation

mcmc_acf(mod4)

mcmc_trace(mod4)

mcmc_combo(mod4)

##ggmcmc pack
gmod4 = ggs(mod4)
ggs_autocorrelation(gmod4)

ggs_traceplot(gmod4)

ggs_density(gmod4)

##Point-estimates of the model: it includes the mean, median and the MAP(Maximum A'Posterior) of the model. 
res1 <- point_estimate(mod4)
plot(res1)

##Visualizing the posterior density interval (credibility interval)
res2 <- hdi(mod4, ci = c(0.5, 0.75, 0.89, 0.95))
plot(res2)

## Visualizing the support interval of the model with priors
res3 <- si(mod4)
## Computation of Bayes factors: sampling priors, please wait...
## Warning in logspline::logspline(posterior): Not all models could be fitted
## Warning in logspline::logspline(x): Not all models could be fitted
plot(res3) +scale_color_metro(palette = "ice") +
  scale_fill_metro(palette = "ice")

##Region of Practical Equaivalence or the ROPE region 
res4 <- rope(mod4, ci = c(0.9, 0.95))

plot(res4)

## Test for practical equivalence
res5 <- equivalence_test(mod4)

plot(res5)
## Picking joint bandwidth of 0.00719
## Warning: Removed 5383 rows containing non-finite values (stat_density_ridges).

#launch_shinystan(mod4)

0.8 Model-4: Model Comparison: LOO method

lfit4 = loo(mod4)
print(lfit4)
## 
## Computed from 7000 by 9450 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo  -2436.9  58.9
## p_loo         7.9   0.2
## looic      4873.8 117.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
plot(lfit4)