German Dora

German Dora

Data

All data for long range guns (LRG) is here: https://en.wikipedia.org/wiki/List_of_railway_artillery

Some interesting statistics is here:http://rpubs.com/alex-lev/357849

Classical (frequentest) linear regression model and prediction are here: http://rpubs.com/alex-lev/360385

Bayesian linear regression model

library(readxl)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rstanarm)
## Loading required package: Rcpp
## rstanarm (Version 2.17.2, packaged: 2017-12-20 23:59:28 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - Plotting theme set to bayesplot::theme_default().
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

LRG <- read_excel("LRG.xlsx")
LRG
fit.LRG_B <- rstanarm::stan_glm(Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation,data = na.omit(LRG),iter=5000,chain=4,seed=12345)
plot(fit.LRG_B)

stan_hist(fit.LRG_B)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_trace(fit.LRG_B)

LRG_B_R <- bayes_R2(fit.LRG_B)
hist(LRG_B_R,col="blue",main = "R2 distribution - full model",xlab = "R2",xlim = c(0.85,0.96),breaks = 30)

summary(LRG_B_R)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7579  0.9323  0.9411  0.9377  0.9470  0.9559

Reducing the model

fit.LRG_B1 <- rstanarm::stan_glm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG),iter=5000,chain=4,seed=12345)
plot(fit.LRG_B1)

stan_hist(fit.LRG_B1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_trace(fit.LRG_B1)

draws <- as.data.frame(fit.LRG_B1)
bayesplot::mcmc_pairs(draws)

LRG_B1_R <- bayes_R2(fit.LRG_B1)
hist(LRG_B1_R,col="green",main = "R2 distribution - reduced model",xlab = "R2",xlim = c(0.85,0.96),breaks = 30)

summary(LRG_B1_R)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8133  0.9332  0.9400  0.9370  0.9441  0.9482

Predicting Range with the model

Here we fit the reduced model for data with Range_M < 70000. Then we make prediction for K12VE gun applying new model.

LRG2 <- LRG%>%filter(Range_M<70000)
fit.LRG_B2 <- rstanarm::stan_glm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG2),iter=5000,chain=4,seed=12345)

fit.LRG_B2
## stan_glm
##  family:       gaussian [identity]
##  formula:      Range_M ~ Velocity_MPS + Barrel_M + Elevation
##  observations: 24
##  predictors:   4
## ------
##              Median   MAD_SD  
## (Intercept)  -44302.5   6988.4
## Velocity_MPS     59.2      7.5
## Barrel_M        738.5    186.2
## Elevation       410.4    118.0
## sigma          5233.4    841.7
## 
## Sample avg. posterior predictive distribution of y:
##          Median  MAD_SD 
## mean_PPD 31644.4  1520.4
## 
## ------
## For info on the priors used see help('prior_summary.stanreg').
plot(fit.LRG_B2)

stan_hist(fit.LRG_B2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_trace(fit.LRG_B2)

df <- data.frame(Velocity_MPS=1650,Barrel_M=33.3,Elevation=55)

pp <- posterior_predict(fit.LRG_B2,newdata = df,seed = 12345)

qq <- quantile(pp,probs = c(0.025,0.5,0.975))
qq
##      2.5%       50%     97.5% 
##  83542.57 100430.75 117616.87
hist(pp,breaks = 50,col = "red",main = "Posterior destribution for K12VE Range",xlab = "Range")
abline(v = 115000)
qqq <- as.vector(qq)
lines(c(qqq[1],qqq[3]),c(-10,-10),col="blue",lwd=5)

pi <- posterior_interval(fit.LRG_B2,newdata =df,seed = 12345,prob = 0.99)

pi
##                      0.5%        99.5%
## (Intercept)  -63900.53836 -23949.14560
## Velocity_MPS     37.77990     80.12443
## Barrel_M        191.52451   1284.80824
## Elevation        66.17955    750.62824
## sigma          3629.49239   8516.91711

Not bad too! \(P(83542.57<Range<1117616.87)=0.95\).

LRG%>%filter(Name=="K12VE")

Conclusions

  1. Bayesian linear regression model Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation with normal priors is relevant with all variables in the data set.
  2. The reduced model Range_M~Velocity_MPS+Barrel_M+Elevation with normal priors is relevant too.
  3. The full and reduced models R-squared values have very close distributions with 80% credible interval \(0.92\leq R^2 \leq 0.95\).
  4. Applying the reduced model for K12VE German gun on data excluding 70 km range produced relevant results in terms of 95% credible interval for posterior sample \(P(83542.57<Range<1117616.87)=0.95\).