German Dora
All data for long range guns (LRG) is here: https://en.wikipedia.org/wiki/List_of_railway_artillery
Some interesting statistics is here:http://rpubs.com/alex-lev/357849
Classical (frequentest) linear regression model and prediction are here: http://rpubs.com/alex-lev/360385
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rstanarm)
## Loading required package: Rcpp
## rstanarm (Version 2.17.2, packaged: 2017-12-20 23:59:28 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - Plotting theme set to bayesplot::theme_default().
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
LRG <- read_excel("LRG.xlsx")
LRG
fit.LRG_B <- rstanarm::stan_glm(Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation,data = na.omit(LRG),iter=5000,chain=4,seed=12345)
plot(fit.LRG_B)
stan_hist(fit.LRG_B)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_trace(fit.LRG_B)
LRG_B_R <- bayes_R2(fit.LRG_B)
hist(LRG_B_R,col="blue",main = "R2 distribution - full model",xlab = "R2",xlim = c(0.85,0.96),breaks = 30)
summary(LRG_B_R)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7579 0.9323 0.9411 0.9377 0.9470 0.9559
fit.LRG_B1 <- rstanarm::stan_glm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG),iter=5000,chain=4,seed=12345)
plot(fit.LRG_B1)
stan_hist(fit.LRG_B1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_trace(fit.LRG_B1)
draws <- as.data.frame(fit.LRG_B1)
bayesplot::mcmc_pairs(draws)
LRG_B1_R <- bayes_R2(fit.LRG_B1)
hist(LRG_B1_R,col="green",main = "R2 distribution - reduced model",xlab = "R2",xlim = c(0.85,0.96),breaks = 30)
summary(LRG_B1_R)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8133 0.9332 0.9400 0.9370 0.9441 0.9482
Here we fit the reduced model for data with Range_M < 70000. Then we make prediction for K12VE gun applying new model.
LRG2 <- LRG%>%filter(Range_M<70000)
fit.LRG_B2 <- rstanarm::stan_glm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG2),iter=5000,chain=4,seed=12345)
fit.LRG_B2
## stan_glm
## family: gaussian [identity]
## formula: Range_M ~ Velocity_MPS + Barrel_M + Elevation
## observations: 24
## predictors: 4
## ------
## Median MAD_SD
## (Intercept) -44302.5 6988.4
## Velocity_MPS 59.2 7.5
## Barrel_M 738.5 186.2
## Elevation 410.4 118.0
## sigma 5233.4 841.7
##
## Sample avg. posterior predictive distribution of y:
## Median MAD_SD
## mean_PPD 31644.4 1520.4
##
## ------
## For info on the priors used see help('prior_summary.stanreg').
plot(fit.LRG_B2)
stan_hist(fit.LRG_B2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stan_trace(fit.LRG_B2)
df <- data.frame(Velocity_MPS=1650,Barrel_M=33.3,Elevation=55)
pp <- posterior_predict(fit.LRG_B2,newdata = df,seed = 12345)
qq <- quantile(pp,probs = c(0.025,0.5,0.975))
qq
## 2.5% 50% 97.5%
## 83542.57 100430.75 117616.87
hist(pp,breaks = 50,col = "red",main = "Posterior destribution for K12VE Range",xlab = "Range")
abline(v = 115000)
qqq <- as.vector(qq)
lines(c(qqq[1],qqq[3]),c(-10,-10),col="blue",lwd=5)
pi <- posterior_interval(fit.LRG_B2,newdata =df,seed = 12345,prob = 0.99)
pi
## 0.5% 99.5%
## (Intercept) -63900.53836 -23949.14560
## Velocity_MPS 37.77990 80.12443
## Barrel_M 191.52451 1284.80824
## Elevation 66.17955 750.62824
## sigma 3629.49239 8516.91711
Not bad too! \(P(83542.57<Range<1117616.87)=0.95\).
LRG%>%filter(Name=="K12VE")
Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation with normal priors is relevant with all variables in the data set.Range_M~Velocity_MPS+Barrel_M+Elevation with normal priors is relevant too.