In previous topic we discussed Z-test for space launches outcomes (success and failure) on a real data: https://rpubs.com/alex-lev/111354. Now we want to estimate these proportions applying Bayesian statistics with MCMC.
For this purpose we use real data: https://knoema.com/ocuodcf/space-launch-history, http://vz.ru/infographics/2015/6/22/752202.html, http://www.spacelaunchreport.com/log2015.html.
library(binom)
library(pwr)
library(ggplot2)
#From Rasmus Baath
#Bayesian First Aid Alternative to the Binomial Test
#http://www.sumsar.net/blog/2014/06/bayesian-first-aid-prop-test/
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.1.0
## Loaded modules: basemod,bugs
library(mcmc)
library(stringr)
source(file = "bayes_prp_tst.R")
source(file = "binom_test.R")
source(file = "r_jags.R")
source(file = "generic.R")
For more information on Bayesian statistics with MCMC see: https://en.wikipedia.org/wiki/Bayesian_statistics, https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo.
# USSR, 1970-1984
ussr<-bayes.binom.test(1348,1407,0.95) # success, all, H0: P=0.95
summary(ussr) # credible interval and quantiles for posterior probability density
## Data
## number of successes = 1348, number of trials = 1407
##
## Model parameters and generated quantities
## theta: the relative frequency of success
## x_pred: predicted number of successes in a replication
##
## Measures
## mean sd HDIlo HDIup %<comp %>comp
## theta 0.958 0.005 0.947 0.968 0.084 0.916
## x_pred 1347.230 10.717 1325.000 1366.000 0.000 1.000
##
## 'HDIlo' and 'HDIup' are the limits of a 95% HDI credible interval.
## '%<comp' and '%>comp' are the probabilities of the respective parameter being
## smaller or larger than 0.95.
##
## Quantiles
## q2.5% q25% median q75% q97.5%
## theta 0.946 0.954 0.958 0.961 0.967
## x_pred 1325.000 1340.000 1348.000 1355.000 1367.000
ussr.all<-as.data.frame(ussr$mcmc_samples[1:10000,]) #10000 iterations for MCMC
summary(ussr.all$theta) #posterior quantiles
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9349 0.9540 0.9577 0.9575 0.9613 0.9733
# USA, 2000-2014
usa<-bayes.binom.test(273,282,0.95) # success, all, H0: P=0.95
summary(usa) # credible interval and quantiles for posterior probability density
## Data
## number of successes = 273, number of trials = 282
##
## Model parameters and generated quantities
## theta: the relative frequency of success
## x_pred: predicted number of successes in a replication
##
## Measures
## mean sd HDIlo HDIup %<comp %>comp
## theta 0.965 0.011 0.944 0.985 0.093 0.907
## x_pred 272.137 4.299 263.000 279.000 0.000 1.000
##
## 'HDIlo' and 'HDIup' are the limits of a 95% HDI credible interval.
## '%<comp' and '%>comp' are the probabilities of the respective parameter being
## smaller or larger than 0.95.
##
## Quantiles
## q2.5% q25% median q75% q97.5%
## theta 0.941 0.958 0.966 0.973 0.983
## x_pred 263.000 269.000 273.000 275.000 279.000
usa.all<-as.data.frame(usa$mcmc_samples[1:10000,]) #10000 iterations for MCMC
summary(usa.all$theta) #posterior quantiles
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9075 0.9584 0.9658 0.9649 0.9726 0.9941
# Russia, 2000-2014
ru<-bayes.binom.test(392,413,0.95) # success, all, H0: P=0.95
summary(ru) # credible interval and quantiles for posterior probability density
## Data
## number of successes = 392, number of trials = 413
##
## Model parameters and generated quantities
## theta: the relative frequency of success
## x_pred: predicted number of successes in a replication
##
## Measures
## mean sd HDIlo HDIup %<comp %>comp
## theta 0.947 0.011 0.925 0.967 0.584 0.416
## x_pred 391.169 6.354 378.000 402.000 0.000 1.000
##
## 'HDIlo' and 'HDIup' are the limits of a 95% HDI credible interval.
## '%<comp' and '%>comp' are the probabilities of the respective parameter being
## smaller or larger than 0.95.
##
## Quantiles
## q2.5% q25% median q75% q97.5%
## theta 0.924 0.94 0.948 0.955 0.967
## x_pred 378.000 387.00 392.000 396.000 402.000
ru.all<-as.data.frame(ru$mcmc_samples[1:10000,]) #10000 iterations for MCMC
summary(ru.all$theta) #posterior quantiles
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9018 0.9401 0.9477 0.9471 0.9547 0.9837
# China, 2000-2014
ch<-bayes.binom.test(143,147,0.95) # success, all, H0: P=0.95
summary(ch) # credible interval and quantiles for posterior probability density
## Data
## number of successes = 143, number of trials = 147
##
## Model parameters and generated quantities
## theta: the relative frequency of success
## x_pred: predicted number of successes in a replication
##
## Measures
## mean sd HDIlo HDIup %<comp %>comp
## theta 0.966 0.015 0.938 0.992 0.127 0.873
## x_pred 142.100 3.022 136.000 147.000 0.000 1.000
##
## 'HDIlo' and 'HDIup' are the limits of a 95% HDI credible interval.
## '%<comp' and '%>comp' are the probabilities of the respective parameter being
## smaller or larger than 0.95.
##
## Quantiles
## q2.5% q25% median q75% q97.5%
## theta 0.933 0.958 0.968 0.977 0.989
## x_pred 135.000 140.000 143.000 144.000 147.000
ch.all<-as.data.frame(ch$mcmc_samples[1:10000,]) #10000 iterations for MCMC
summary(ch.all$theta) #posterior quantiles
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8740 0.9581 0.9683 0.9665 0.9770 0.9975
# Histograms for posterior probability density: USSR, USA, Russia, China
par(mfrow=c(2,2))
hist(ussr.all$theta,freq = F,col="lightblue",main = "USSR",xlab = "Probability")
curve(dnorm(x,mean = mean(ussr.all$theta),sd = sd(ussr.all$theta)),add = T)
abline(v = mean(ussr.all$theta),col="red")
hist(usa.all$theta,freq = F,col="lightblue",main = "USA",xlab = "Probability")
curve(dnorm(x,mean = mean(usa.all$theta),sd = sd(usa.all$theta)),add = T)
abline(v = mean(usa.all$theta),col="red")
hist(ru.all$theta,freq = F,col="lightblue",main = "Russia",xlab = "Probability")
curve(dnorm(x,mean = mean(ru.all$theta),sd = sd(ru.all$theta)),add = T)
abline(v = mean(ru.all$theta),col="red")
hist(ch.all$theta,freq = F,col="lightblue",main = "China",xlab = "Probability")
curve(dnorm(x,mean = mean(ch.all$theta),sd = sd(ch.all$theta)),add = T)
abline(v = mean(ch.all$theta),col="red")
par(mfrow=c(1,1))
all<-bayes.prop.test(c(1348,273,392,143),c(1407,282,413,147)) # USSR, USA, Russia, China
summary(all)
## Data
## number of successes: 1348, 273, 392, 143
## number of trials: 1407, 282, 413, 147
##
## Model parameters and generated quantities
## theta[i]: the relative frequency of success for Group i
## x_pred[i]: predicted number of successes in a replication for Group i
## theta_diff[i,j]: the difference between two groups (theta[i] - theta[j])
##
## Measures
## mean sd HDIlo HDIup %<comp %>comp
## theta[1] 0.957 0.005 0.946 0.967 0.000 1.000
## theta[2] 0.965 0.011 0.942 0.984 0.000 1.000
## theta[3] 0.947 0.011 0.925 0.968 0.000 1.000
## theta[4] 0.966 0.015 0.937 0.991 0.000 1.000
## x_pred[1] 1347.211 10.834 1325.000 1367.000 0.000 1.000
## x_pred[2] 272.067 4.332 263.000 279.000 0.000 1.000
## x_pred[3] 391.145 6.408 378.000 402.000 0.000 1.000
## x_pred[4] 142.001 3.104 136.000 147.000 0.000 1.000
## theta_diff[1,2] -0.007 0.012 -0.030 0.017 0.738 0.262
## theta_diff[1,3] 0.010 0.012 -0.013 0.035 0.202 0.798
## theta_diff[1,4] -0.009 0.016 -0.037 0.023 0.748 0.252
## theta_diff[2,3] 0.018 0.015 -0.013 0.048 0.119 0.881
## theta_diff[2,4] -0.002 0.019 -0.035 0.039 0.560 0.440
## theta_diff[3,4] -0.019 0.018 -0.053 0.018 0.857 0.143
##
## 'HDIlo' and 'HDIup' are the limits of a 95% HDI credible interval.
## '%<comp' and '%>comp' are the probabilities of the respective parameter being
## smaller or larger than 0.5 (except for the theta_diff parameters where
## the comparison value comp is 0.0).
##
## Quantiles
## q2.5% q25% median q75% q97.5%
## theta[1] 0.946 0.954 0.958 0.961 0.967
## theta[2] 0.940 0.958 0.966 0.973 0.983
## theta[3] 0.924 0.940 0.948 0.955 0.967
## theta[4] 0.931 0.958 0.969 0.977 0.989
## x_pred[1] 1324.000 1340.000 1348.000 1355.000 1367.000
## x_pred[2] 262.000 269.000 273.000 275.000 279.000
## x_pred[3] 378.000 387.000 392.000 396.000 402.025
## x_pred[4] 135.000 140.000 142.000 144.000 147.000
## theta_diff[1,2] -0.029 -0.016 -0.008 0.001 0.019
## theta_diff[1,3] -0.012 0.002 0.010 0.018 0.036
## theta_diff[1,4] -0.035 -0.020 -0.011 0.000 0.027
## theta_diff[2,3] -0.014 0.008 0.018 0.028 0.048
## theta_diff[2,4] -0.036 -0.014 -0.003 0.010 0.039
## theta_diff[3,4] -0.052 -0.032 -0.020 -0.008 0.019
plot(all)
diagnostics(all)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 10000
##
## Diagnostic measures
## mean sd mcmc_se n_eff Rhat
## theta[1] 0.957 0.005 0.000 29078 NaN
## theta[2] 0.965 0.011 0.000 29999 NaN
## theta[3] 0.947 0.011 0.000 30000 NaN
## theta[4] 0.966 0.015 0.000 30000 NaN
## x_pred[1] 1347.211 10.834 0.063 30000 NaN
## x_pred[2] 272.067 4.332 0.025 28998 NaN
## x_pred[3] 391.145 6.408 0.038 29119 NaN
## x_pred[4] 142.001 3.104 0.018 30000 NaN
## theta_diff[1,2] -0.007 0.012 NA NA NA
## theta_diff[1,3] 0.010 0.012 NA NA NA
## theta_diff[1,4] -0.009 0.016 NA NA NA
## theta_diff[2,3] 0.018 0.015 NA NA NA
## theta_diff[2,4] -0.002 0.019 NA NA NA
## theta_diff[3,4] -0.019 0.018 NA NA NA
##
## mcmc_se: the estimated standard error of the MCMC approximation of the mean.
## n_eff: a crude measure of effective MCMC sample size.
## Rhat: the potential scale reduction factor (at convergence, Rhat=1).
##
## Model parameters and generated quantities
## theta: The relative frequency of success
## x_pred: Predicted number of successes in a replication
## theta_diff[i,j]: the difference between two groups (theta[i] - theta[j])
usch<-bayes.prop.test(c(273,143),c(282,147)) # USA versus China
plot(usch)
So what’s it all about? Does it mean something at all? Let’s see!