German Dora
All data for long range guns (LRG) is here: https://en.wikipedia.org/wiki/List_of_railway_artillery Some interesting statistics is here:http://rpubs.com/alex-lev/357849
library(readxl)
library(ggplot2)
library(GGally)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:GGally':
##
## nasa
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(broom)
library(tidyr)
library(rgl)
library(rglwidget)
## The functions in the rglwidget package have been moved to rgl.
library(pca3d)
library(scatterplot3d)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
LRG <- read_excel("LRG.xlsx")
LRG
LRG2 <- LRG%>%select(WW,Range_M,Velocity_MPS,Weight_t,Caliber_mm,Barrel_M,Elevation)%>%mutate(WW_=ifelse(WW==1,"WW1","WW2"))
ggpairs(LRG2[,-1],title = "Correlations for guns data by WW",mapping=ggplot2::aes(colour = WW_))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
cor(na.omit(LRG2[,2:7]))
## Range_M Velocity_MPS Weight_t Caliber_mm Barrel_M
## Range_M 1.00000000 0.9234430 0.3306489 0.07395537 0.7431743
## Velocity_MPS 0.92344302 1.0000000 0.1472641 -0.19241415 0.5803611
## Weight_t 0.33064892 0.1472641 1.0000000 0.82209028 0.7026894
## Caliber_mm 0.07395537 -0.1924142 0.8220903 1.00000000 0.5431284
## Barrel_M 0.74317428 0.5803611 0.7026894 0.54312844 1.0000000
## Elevation 0.49380319 0.3230183 0.2787904 0.18491938 0.3140588
## Elevation
## Range_M 0.4938032
## Velocity_MPS 0.3230183
## Weight_t 0.2787904
## Caliber_mm 0.1849194
## Barrel_M 0.3140588
## Elevation 1.0000000
LRG3 <- LRG%>%select(Country,Range_M,Velocity_MPS,Weight_t,Caliber_mm,Barrel_M,Elevation)%>%
arrange(Country)
LRG3 <- tibble::as_tibble(LRG3)
fit.pca <- prcomp(na.omit(LRG3[,-1]),scale. = T)
summary(fit.pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.7870 1.3217 0.8765 0.40627 0.31972 0.15569
## Proportion of Variance 0.5322 0.2911 0.1281 0.02751 0.01704 0.00404
## Cumulative Proportion 0.5322 0.8234 0.9514 0.97892 0.99596 1.00000
plot(fit.pca)
pca <- prcomp(na.omit(LRG3[,2:7]), scale.=TRUE)
gr <- factor(LRG3$Country)
summary(gr)
## France GB Germany Italy Japan Russia USA
## 4 6 18 1 1 2 5
pca2d( pca, group= gr,legend = "topleft", biplot=TRUE, biplot.vars=5)
fit.LRG <- lm(Range_M~WW+Velocity_MPS+Weight_t+Caliber_mm+Barrel_M+Elevation,data = na.omit(LRG))
summary(fit.LRG)
##
## Call:
## lm(formula = Range_M ~ WW + Velocity_MPS + Weight_t + Caliber_mm +
## Barrel_M + Elevation, data = na.omit(LRG))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10227.2 -1634.6 89.5 3105.2 10202.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62912.053 9292.879 -6.770 2.42e-06 ***
## WW -726.407 2567.009 -0.283 0.78042
## Velocity_MPS 76.864 9.463 8.122 1.97e-07 ***
## Weight_t -13.845 8.797 -1.574 0.13292
## Caliber_mm 31.056 18.587 1.671 0.11204
## Barrel_M 698.205 338.019 2.066 0.05357 .
## Elevation 390.454 127.327 3.067 0.00665 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5289 on 18 degrees of freedom
## Multiple R-squared: 0.9541, Adjusted R-squared: 0.9388
## F-statistic: 62.38 on 6 and 18 DF, p-value: 4.556e-11
avPlots(fit.LRG)
crPlots(fit.LRG)
durbinWatsonTest(fit.LRG)
## lag Autocorrelation D-W Statistic p-value
## 1 0.1066984 1.733899 0.436
## Alternative hypothesis: rho != 0
shapiro.test(fit.LRG$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit.LRG$residuals
## W = 0.95008, p-value = 0.2518
par(mfrow=c(2,2))
plot(fit.LRG)
fit.LRG.2 <- lm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG))
summary(fit.LRG.2)
##
## Call:
## lm(formula = Range_M ~ Velocity_MPS + Barrel_M + Elevation, data = na.omit(LRG))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14849.4 -2322.2 387.2 3071.6 9218.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -51415.65 5924.33 -8.679 2.18e-08 ***
## Velocity_MPS 67.57 6.11 11.059 3.23e-10 ***
## Barrel_M 815.78 186.01 4.386 0.000258 ***
## Elevation 399.48 121.91 3.277 0.003599 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5341 on 21 degrees of freedom
## Multiple R-squared: 0.9454, Adjusted R-squared: 0.9376
## F-statistic: 121.2 on 3 and 21 DF, p-value: 2.023e-13
shapiro.test(fit.LRG.2$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit.LRG.2$residuals
## W = 0.95459, p-value = 0.3173
plot(fit.LRG.2)
anova(fit.LRG,fit.LRG.2)
AIC(fit.LRG,fit.LRG.2)
So fit.LRG.2 is optimal model out of two.
LRG3D <- LRG%>%select(Velocity_MPS,Barrel_M,Range_M)
fit.lm.3d <- lm(log(LRG3D$Range_M)~LRG3D$Velocity_MPS+LRG3D$Barrel_M)
s3d <- scatterplot3d(x = LRG3D$Velocity_MPS,y = LRG3D$Barrel_M,
z = log(LRG3D$Range_M),color = "blue",
pch = 16,angle = 55,type = "p",
main = "Long range guns linear regression plane",
xlab = "Velocity,M/S",ylab = "Barrel,M",zlab = "log(Range,M)")
s3d$plane3d(fit.lm.3d,draw_polygon = T,lty.box = "solid")
LRG32D <- LRG%>%select(Elevation,Barrel_M,Range_M)
fit.lm.32d <- lm(log(LRG32D$Range_M)~LRG32D$Elevation+LRG32D$Barrel_M)
s32d <- scatterplot3d(x = LRG32D$Elevation,y = LRG32D$Barrel_M,
z = log(LRG32D$Range_M),color = "magenta",
pch = 16,angle = 55,type = "p",
main = "Long range guns linear regression plane",
xlab = "Elevation,grad",ylab = "Barrel,M",zlab = "log(Range,M)")
s32d$plane3d(fit.lm.32d,draw_polygon = T,lty.box = "solid")
Here we fit the model for data with Range_M < 70000. Then we make prediction for K12VE gun applying new model.
LRG2 <- LRG%>%filter(Range_M<70000)
fit.LRG.22 <- lm(Range_M~Velocity_MPS+Barrel_M+Elevation,data = na.omit(LRG2))
summary(fit.LRG.22)
##
## Call:
## lm(formula = Range_M ~ Velocity_MPS + Barrel_M + Elevation, data = na.omit(LRG2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14153.6 -1985.0 -60.7 2916.1 7010.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -44325.420 6863.817 -6.458 2.69e-06 ***
## Velocity_MPS 59.200 7.428 7.970 1.24e-07 ***
## Barrel_M 736.305 182.142 4.042 0.000637 ***
## Elevation 411.624 116.034 3.547 0.002020 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5075 on 20 degrees of freedom
## Multiple R-squared: 0.8804, Adjusted R-squared: 0.8624
## F-statistic: 49.06 on 3 and 20 DF, p-value: 2.096e-09
predict(fit.LRG.22,newdata = data.frame(Velocity_MPS=1650,Barrel_M=33.3,Elevation=55),interval = "conf",level = 0.99)
## fit lwr upr
## 1 100513.3 82826.9 118199.7
Not bad! \(P(82826.9<Range<118199.79)=0.99\).
LRG%>%filter(Name=="K12VE")
Range_M~Velocity_MPS+Barrel_M+Elevation.