German Dora

German Dora

Data

All data for long range guns (LRG) is here: https://en.wikipedia.org/wiki/List_of_railway_artillery Some interesting statistics is here:http://rpubs.com/alex-lev/357849

library(readxl)
library(ggplot2)
library(GGally)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
## 
##     nasa
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(broom)
library(tidyr)
library(rgl)
library(rglwidget)
## The functions in the rglwidget package have been moved to rgl.
library(pca3d)
library(scatterplot3d)
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
LRG <- read_excel("LRG.xlsx")
LRG

Correlations

LRG2 <- LRG%>%select(WW,Range_M,Velocity_MPS,Weight_t,Caliber_mm,Barrel_M,Elevation)%>%mutate(WW_=ifelse(WW==1,"WW1","WW2"))
ggpairs(LRG2[,-1],title = "Correlations for guns data by WW",mapping=ggplot2::aes(colour = WW_))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

cor(na.omit(LRG2[,2:7]))
##                 Range_M Velocity_MPS  Weight_t  Caliber_mm  Barrel_M
## Range_M      1.00000000    0.9234430 0.3306489  0.07395537 0.7431743
## Velocity_MPS 0.92344302    1.0000000 0.1472641 -0.19241415 0.5803611
## Weight_t     0.33064892    0.1472641 1.0000000  0.82209028 0.7026894
## Caliber_mm   0.07395537   -0.1924142 0.8220903  1.00000000 0.5431284
## Barrel_M     0.74317428    0.5803611 0.7026894  0.54312844 1.0000000
## Elevation    0.49380319    0.3230183 0.2787904  0.18491938 0.3140588
##              Elevation
## Range_M      0.4938032
## Velocity_MPS 0.3230183
## Weight_t     0.2787904
## Caliber_mm   0.1849194
## Barrel_M     0.3140588
## Elevation    1.0000000

Principal component analysis

LRG3 <- LRG%>%select(Country,Range_M,Velocity_MPS,Weight_t,Caliber_mm,Barrel_M,Elevation)%>%
  arrange(Country)
LRG3 <- tibble::as_tibble(LRG3)
fit.pca <- prcomp(na.omit(LRG3[,-1]),scale. = T)
summary(fit.pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7870 1.3217 0.8765 0.40627 0.31972 0.15569
## Proportion of Variance 0.5322 0.2911 0.1281 0.02751 0.01704 0.00404
## Cumulative Proportion  0.5322 0.8234 0.9514 0.97892 0.99596 1.00000
plot(fit.pca)

pca <- prcomp(na.omit(LRG3[,2:7]), scale.=TRUE)
gr <- factor(LRG3$Country)
summary(gr)
##  France      GB Germany   Italy   Japan  Russia     USA 
##       4       6      18       1       1       2       5
pca2d( pca, group= gr,legend = "topleft", biplot=TRUE, biplot.vars=5)

Linear regression model

fit.LRG <- lm(Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation,data = na.omit(LRG))
summary(fit.LRG)
## 
## Call:
## lm(formula = Range_M ~ WW + Velocity_MPS + Weight_t + Caliber_mm + 
##     Barrel_M + Elevation, data = na.omit(LRG))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10227.2  -1634.6     89.5   3105.2  10202.1 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -62912.053   9292.879  -6.770 2.42e-06 ***
## WW             -726.407   2567.009  -0.283  0.78042    
## Velocity_MPS     76.864      9.463   8.122 1.97e-07 ***
## Weight_t        -13.845      8.797  -1.574  0.13292    
## Caliber_mm       31.056     18.587   1.671  0.11204    
## Barrel_M        698.205    338.019   2.066  0.05357 .  
## Elevation       390.454    127.327   3.067  0.00665 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5289 on 18 degrees of freedom
## Multiple R-squared:  0.9541, Adjusted R-squared:  0.9388 
## F-statistic: 62.38 on 6 and 18 DF,  p-value: 4.556e-11
avPlots(fit.LRG)

crPlots(fit.LRG)

durbinWatsonTest(fit.LRG)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1066984      1.733899   0.436
##  Alternative hypothesis: rho != 0
shapiro.test(fit.LRG$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit.LRG$residuals
## W = 0.95008, p-value = 0.2518
par(mfrow=c(2,2))
plot(fit.LRG)

fit.LRG.2 <- lm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG))
summary(fit.LRG.2)
## 
## Call:
## lm(formula = Range_M ~ Velocity_MPS + Barrel_M + Elevation, data = na.omit(LRG))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14849.4  -2322.2    387.2   3071.6   9218.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -51415.65    5924.33  -8.679 2.18e-08 ***
## Velocity_MPS     67.57       6.11  11.059 3.23e-10 ***
## Barrel_M        815.78     186.01   4.386 0.000258 ***
## Elevation       399.48     121.91   3.277 0.003599 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5341 on 21 degrees of freedom
## Multiple R-squared:  0.9454, Adjusted R-squared:  0.9376 
## F-statistic: 121.2 on 3 and 21 DF,  p-value: 2.023e-13
shapiro.test(fit.LRG.2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  fit.LRG.2$residuals
## W = 0.95459, p-value = 0.3173
plot(fit.LRG.2)

Comparison of models with ANOVA

anova(fit.LRG,fit.LRG.2)
AIC(fit.LRG,fit.LRG.2)

So fit.LRG.2 is optimal model out of two.

3D regression planes

LRG3D <- LRG%>%select(Velocity_MPS,Barrel_M,Range_M)
fit.lm.3d <- lm(log(LRG3D$Range_M)~LRG3D$Velocity_MPS+LRG3D$Barrel_M)
s3d <- scatterplot3d(x = LRG3D$Velocity_MPS,y = LRG3D$Barrel_M,
                     z = log(LRG3D$Range_M),color = "blue",
                     pch = 16,angle = 55,type = "p",
                     main = "Long range guns linear regression plane",
                     xlab = "Velocity,M/S",ylab = "Barrel,M",zlab = "log(Range,M)")


s3d$plane3d(fit.lm.3d,draw_polygon = T,lty.box = "solid")

LRG32D <- LRG%>%select(Elevation,Barrel_M,Range_M)
fit.lm.32d <- lm(log(LRG32D$Range_M)~LRG32D$Elevation+LRG32D$Barrel_M)
s32d <- scatterplot3d(x = LRG32D$Elevation,y = LRG32D$Barrel_M,
                     z = log(LRG32D$Range_M),color = "magenta",
                     pch = 16,angle = 55,type = "p",
                     main = "Long range guns linear regression plane",
                     xlab = "Elevation,grad",ylab = "Barrel,M",zlab = "log(Range,M)")

s32d$plane3d(fit.lm.32d,draw_polygon = T,lty.box = "solid")

Predicting Range with the model

Here we fit the model for data with Range_M < 70000. Then we make prediction for K12VE gun applying new model.

LRG2 <- LRG%>%filter(Range_M<70000)
fit.LRG.22 <- lm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG2))
summary(fit.LRG.22)
## 
## Call:
## lm(formula = Range_M ~ Velocity_MPS + Barrel_M + Elevation, data = na.omit(LRG2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14153.6  -1985.0    -60.7   2916.1   7010.3 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -44325.420   6863.817  -6.458 2.69e-06 ***
## Velocity_MPS     59.200      7.428   7.970 1.24e-07 ***
## Barrel_M        736.305    182.142   4.042 0.000637 ***
## Elevation       411.624    116.034   3.547 0.002020 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5075 on 20 degrees of freedom
## Multiple R-squared:  0.8804, Adjusted R-squared:  0.8624 
## F-statistic: 49.06 on 3 and 20 DF,  p-value: 2.096e-09
predict(fit.LRG.22,newdata = data.frame(Velocity_MPS=1650,Barrel_M=33.3,Elevation=55),interval = "conf",level = 0.99)
##        fit     lwr      upr
## 1 100513.3 82826.9 118199.7

Not bad! \(P(82826.9<Range<118199.79)=0.99\).

LRG%>%filter(Name=="K12VE")

Conclusions

  1. The best fitting linear regression model is Range_M~Velocity_MPS+Barrel_M+Elevation.
  2. Applying model for K12VE German gun on data excluding 70 km range produced relevant results in terms of 99% confidence interval \(P(82826.9<Range<118199.79)=0.99\).