U-576
During World War II, U-boat warfare was the major component of the Battle of the Atlantic, which lasted the duration of the war. Prime Minister Winston Churchill wrote “The only thing that really frightened me during the war was the U-boat peril.”
Here we can get some interesting data about German submarine losses during 1943 - climax of the Battle for Atlantic: http://uboat.net/fates/losses/1943.htm. Let’s do it!
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lattice)
library(broom)
library(qcc)
## Package 'qcc', version 2.6
## Type 'citation("qcc")' for citing this R package in publications.
load(file = "ub1943.dat")
ub1943
## # A tibble: 236 <U+00D7> 7
## Month Uboat Plus Crew Dead Surv Cause
## <chr> <chr> <int> <int> <int> <int> <chr>
## 1 Jan U-337 1 47 47 0 Missing
## 2 Jan U-164 0 56 54 2 Aircraft
## 3 Jan U-224 0 46 45 1 Warship
## 4 Jan U-507 1 54 54 0 Aircraft
## 5 Jan U-553 1 47 47 0 Missing
## 6 Jan U-301 0 46 45 1 Warship
## 7 Jan U-519 1 50 50 0 Missing
## 8 Feb U-265 1 46 46 0 Aircraft
## 9 Feb U-187 0 54 9 45 Warship
## 10 Feb U-609 1 47 47 0 Warship
## # ... with 226 more rows
Note: \(Plus=1\) means that u-boat crew was lost for 100%: \(Crew - Dead=0\)
We want to estimate the losses caused by various factors: Accident, Aircraft and Warship. Does it make any difference by Month and Cause in terms of Survived members of the Crews in mean values and in the proportions? Let’s do it!
#Number of u-boats lost by cause
ublost<-filter(ub1943) %>% group_by(Cause) %>% count() %>% arrange(desc(n))
#ublost
#with(ublost,barplot(n,names.arg = Cause,main = "U-boats lost by cause"))
ubl<-ublost$n
names(ubl)<-ublost$Cause
pareto.chart(ubl,main = "U-boats lost by cause")
##
## Pareto chart analysis for ubl
## Frequency Cum.Freq. Percentage Cum.Percent.
## Aircraft 145 145 61.440678 61.44068
## Warship 65 210 27.542373 88.98305
## Missing 15 225 6.355932 95.33898
## Accident 11 236 4.661017 100.00000
#Number of u-boats with 100% dead crew by cause
ubdead<-filter(ub1943,Plus==1) %>% group_by(Cause) %>% count() %>% arrange(desc(n))
#ubdead
#with(ubdead,barplot(n,names.arg = Cause,main = "U-boats lost with 100% crew by cause"))
ubd<-ubdead$n
names(ubd)<-ubdead$Cause
pareto.chart(ubd,main = "U-boats lost with 100% crew by cause")
##
## Pareto chart analysis for ubd
## Frequency Cum.Freq. Percentage Cum.Percent.
## Aircraft 97 97 61.78344 61.78344
## Warship 45 142 28.66242 90.44586
## Missing 15 157 9.55414 100.00000
#Number of u-boats with saved crew by cause
ubsaved<-filter(ub1943,Plus==0) %>% group_by(Cause) %>% count() %>% arrange(desc(n))
#ubsaved
#with(ubsaved,barplot(n,names.arg = Cause,main = "U-boats lost with saved crew by cause"))
ubs<-ubsaved$n
names(ubs)<-ubsaved$Cause
pareto.chart(ubs,main = "U-boats lost with saved crew by cause")
##
## Pareto chart analysis for ubs
## Frequency Cum.Freq. Percentage Cum.Percent.
## Aircraft 48 48 60.75949 60.75949
## Warship 20 68 25.31646 86.07595
## Accident 11 79 13.92405 100.00000
#Number of u-boats lost by month
ubmonth<-filter(ub1943) %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubmonth
## # A tibble: 12 <U+00D7> 2
## Month n
## <chr> <int>
## 1 May 41
## 2 Jul 37
## 3 Oct 26
## 4 Aug 25
## 5 Nov 19
## 6 Feb 18
## 7 Apr 16
## 8 Jun 16
## 9 Mar 15
## 10 Dec 8
## 11 Sep 8
## 12 Jan 7
with(ubmonth,barplot(n,names.arg = Month,main = "U-boats lost by month"))
#Number of u-boats with 100% dead crew by month
ubmonthd<-filter(ub1943,Plus==1) %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubmonthd
## # A tibble: 12 <U+00D7> 2
## Month n
## <chr> <int>
## 1 May 33
## 2 Jul 20
## 3 Oct 17
## 4 Feb 14
## 5 Nov 14
## 6 Apr 12
## 7 Aug 12
## 8 Jun 11
## 9 Mar 11
## 10 Sep 5
## 11 Dec 4
## 12 Jan 4
with(ubmonthd,barplot(n,names.arg = Month,main = "U-boats with 100% dead crew lost by month "))
#Number of u-boats with saved crew by month
ubmonths<-filter(ub1943,Plus==0) %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubmonths
## # A tibble: 12 <U+00D7> 2
## Month n
## <chr> <int>
## 1 Jul 17
## 2 Aug 13
## 3 Oct 9
## 4 May 8
## 5 Jun 5
## 6 Nov 5
## 7 Apr 4
## 8 Dec 4
## 9 Feb 4
## 10 Mar 4
## 11 Jan 3
## 12 Sep 3
with(ubmonths,barplot(n,names.arg = Month,main = "U-boats with saved crew lost by month "))
U-boat under air attack
#Overall U-boat losses by aircraft and by month
#Number of u-boats with 100% dead crew by Aircraft and by Month
ubair<-filter(ub1943,Plus==1, Cause=="Aircraft") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubair
## # A tibble: 12 <U+00D7> 2
## Month n
## <chr> <int>
## 1 May 19
## 2 Jul 16
## 3 Oct 13
## 4 Aug 8
## 5 Feb 8
## 6 Apr 7
## 7 Jun 7
## 8 Nov 7
## 9 Mar 6
## 10 Sep 3
## 11 Dec 2
## 12 Jan 1
with(ubair,barplot(n,names.arg = Month,main = "U-boats lost by aircraft with 100% dead crew"))
#Number of u-boats lost with saved crew by Aircraft and by Month
ubairs<-filter(ub1943,Plus==0,Cause=="Aircraft") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubairs
## # A tibble: 12 <U+00D7> 2
## Month n
## <chr> <int>
## 1 Jul 15
## 2 Aug 9
## 3 Oct 7
## 4 May 5
## 5 Jun 3
## 6 Apr 2
## 7 Nov 2
## 8 Dec 1
## 9 Feb 1
## 10 Jan 1
## 11 Mar 1
## 12 Sep 1
with(ubairs,barplot(n,names.arg = Month,main = "U-boats lost by aircraft with saved crew"))
U-boat and destroyer
#Overall U-boat losses by warships and by month
#Number of u-boats with 100% dead crew by Warship and by Month
ubship<-filter(ub1943,Plus==1, Cause=="Warship") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubship
## # A tibble: 10 <U+00D7> 2
## Month n
## <chr> <int>
## 1 May 13
## 2 Feb 6
## 3 Nov 6
## 4 Jun 4
## 5 Mar 4
## 6 Apr 3
## 7 Aug 3
## 8 Oct 3
## 9 Jul 2
## 10 Sep 1
with(ubship,barplot(n,names.arg = Month,main = "U-boats lost by warships with 100% dead crew"))
#Number of u-boats lost with saved crew by Warship and by Month
ubships<-filter(ub1943,Plus==0,Cause=="Warship") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubships
## # A tibble: 11 <U+00D7> 2
## Month n
## <chr> <int>
## 1 Apr 2
## 2 Aug 2
## 3 Dec 2
## 4 Feb 2
## 5 Jan 2
## 6 Jul 2
## 7 Jun 2
## 8 Mar 2
## 9 Oct 2
## 10 May 1
## 11 Nov 1
with(ubships,barplot(n,names.arg = Month,main = "U-boats lost by warships with saved crew"))
U-boat wreck
#Overall U-boat losses by accident and by month
#Number of u-boats lost with 100% dead crew by Accident and by Month
ubacc<-filter(ub1943,Plus==1,Cause=="Accident") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubacc
## # A tibble: 0 <U+00D7> 2
## # ... with 2 variables: Month <chr>, n <int>
#empty
#Number of u-boats lost with saved crew by Accident and by Month
ubaccs<-filter(ub1943,Plus==0,Cause=="Accident") %>% group_by(Month) %>% count() %>% arrange(desc(n))
ubaccs
## # A tibble: 7 <U+00D7> 2
## Month n
## <chr> <int>
## 1 Aug 2
## 2 May 2
## 3 Nov 2
## 4 Sep 2
## 5 Dec 1
## 6 Feb 1
## 7 Mar 1
with(ubaccs,barplot(n,names.arg = Month,main = "U-boats lost by accident with saved crew"))
#Overall U-boat losses by missing and by month
ubmiss<-filter(ub1943,Plus==1,Cause=="Missing") %>% group_by(Month) %>% count() %>% arrange(desc(n))#Number of missed u-boats with 100% dead crew by Month
ubmiss
## # A tibble: 10 <U+00D7> 2
## Month n
## <chr> <int>
## 1 Jan 3
## 2 Apr 2
## 3 Dec 2
## 4 Jul 2
## 5 Aug 1
## 6 Mar 1
## 7 May 1
## 8 Nov 1
## 9 Oct 1
## 10 Sep 1
with(ubmiss,barplot(n,names.arg = Month,main = "U-boats lost by missing with 100% dead crew"))
#number of all u-boats lost in 1943
count(ub1943)
## # A tibble: 1 <U+00D7> 1
## n
## <int>
## 1 236
#number of u-boats with 100% dead crew
sum(ub1943$Plus)
## [1] 157
#overall probability crew would not survive in lost u-boat
sum(ub1943$Plus)/count(ub1943)
## n
## 1 0.6652542
#overall probability some members of the crew would survive in lost u-boat
1 - sum(ub1943$Plus)/count(ub1943)
## n
## 1 0.3347458
#Probability u-boat would be lost by aircraft
nub_air<-filter(ub1943, Cause=="Aircraft") %>% count()
nub_all<-filter(ub1943) %>% count()
prob_air<-nub_air$n/nub_all$n
prob_air
## [1] 0.6144068
#Probability u-boat would be lost by warship
nub_ship<-filter(ub1943, Cause=="Warship") %>% count()
prob_ship<-nub_ship$n/nub_all$n
prob_ship
## [1] 0.2754237
#Probability u-boat would be lost by accident or by missing
prob_acc_mis<-1 - prob_air - prob_ship
prob_acc_mis
## [1] 0.1101695
#Conditional probabilities
#Probability 100% crew would be dead if u-boat lost by aircraft
nub_air_all<-filter(ub1943, Cause=="Aircraft", Plus==1) %>% count()
prob_air_all<-nub_air_all$n/nub_air$n
prob_air_all
## [1] 0.6689655
#Probability 100% crew would be dead if u-boat lost by warship
nub_ship_all<-filter(ub1943, Cause=="Warship", Plus==1) %>% count()
prob_ship_all<-nub_ship_all$n/nub_ship$n
prob_ship_all
## [1] 0.6923077
#Probability 100% crew would be dead if u-boat lost by accident or by missing
nub_miss_all<-filter(ub1943, Cause=="Missing", Plus==1) %>% count()
nub_acc<-filter(ub1943, Cause=="Accident", Plus==0) %>% count()
nub_acc_miss<-nub_miss_all$n + nub_acc$n
prob_acc_mis__all<-nub_miss_all$n/nub_acc_miss
prob_acc_mis__all
## [1] 0.5769231
#Bayes law
#Probability that aircraft or warship or accident or missing would cause 100% loss of the crew
prob_all<-prob_air*prob_air_all + prob_ship*prob_ship_all + prob_acc_mis*prob_acc_mis__all
#Probability that aircraft would cause 100% loss of the crew
prob_all_air<-prob_air*prob_air_all/prob_all
prob_all_air
## [1] 0.6178344
#Probability that warship would cause 100% loss of the crew
prob_all_ship<-prob_ship*prob_ship_all/prob_all
prob_all_ship
## [1] 0.2866242
#Probability that accident or missing would cause 100% loss of the crew
prob_all_acc_mis<-prob_acc_mis*prob_acc_mis__all/prob_all
prob_all_acc_mis
## [1] 0.0955414
#Check for sum = 1
prob_all_air + prob_all_ship + prob_all_acc_mis
## [1] 1
#Probability that aircraft or warship or accident or missing would not cause 100% loss of the crew - some members of the crew would survive
prob_not_all<-prob_air*(1- prob_air_all) + prob_ship*(1 -prob_ship_all) + prob_acc_mis*(1 -prob_acc_mis__all)
prob_not_all
## [1] 0.3347458
#Check for sum =1
prob_not_all + prob_all
## [1] 1
#Probability that aircraft would not cause 100% loss of the crew
prob_not_all_air<-prob_air*(1 -prob_air_all)/prob_not_all
prob_not_all_air
## [1] 0.6075949
#Probability that warship would not cause 100% loss of the crew
prob_not_all_ship<-prob_ship*(1 - prob_ship_all)/prob_not_all
prob_not_all_ship
## [1] 0.2531646
#Probability that accident or missing would not cause 100% loss of the crew
prob_not_all_acc_mis<-prob_acc_mis*(1 - prob_acc_mis__all)/prob_not_all
prob_not_all_acc_mis
## [1] 0.1392405
#Check for sum = 1
prob_not_all_air + prob_not_all_ship + prob_not_all_acc_mis
## [1] 1
# the summary for absolute values of the survived members of the crews
filter(ub1943,Plus==0) %>% summarise(min(Surv), mean(Surv),median(Surv), max(Surv))
## # A tibble: 1 <U+00D7> 4
## `min(Surv)` `mean(Surv)` `median(Surv)` `max(Surv)`
## <int> <dbl> <int> <int>
## 1 1 23.25316 18 64
# the summary for survived members of the crews - u-boats lost by aircraft
filter(ub1943,Plus==0, Cause=="Aircraft") %>% summarise(min(Surv), mean(Surv),median(Surv), max(Surv))
## # A tibble: 1 <U+00D7> 4
## `min(Surv)` `mean(Surv)` `median(Surv)` `max(Surv)`
## <int> <dbl> <dbl> <int>
## 1 1 23.625 18 64
# the summary for survived members of the crews - u-boats lost by warships
filter(ub1943,Plus==0, Cause=="Warship") %>% summarise(min(Surv), mean(Surv),median(Surv), max(Surv))
## # A tibble: 1 <U+00D7> 4
## `min(Surv)` `mean(Surv)` `median(Surv)` `max(Surv)`
## <int> <dbl> <dbl> <int>
## 1 1 22.95 23.5 51
# the summary for survived members of the crews - u-boats lost by accident
filter(ub1943,Plus==0, Cause=="Accident") %>% summarise(min(Surv), mean(Surv),median(Surv), max(Surv))
## # A tibble: 1 <U+00D7> 4
## `min(Surv)` `mean(Surv)` `median(Surv)` `max(Surv)`
## <int> <dbl> <int> <int>
## 1 3 22.18182 16 49
histogram(~Dead|Cause, data=ub1943, type = "count")
#General linear model for number of survived by crew and all factors
ub_glm<-glm(Surv~Crew+Month+Cause,data = ub1943)
tidy(ub_glm)
## term estimate std.error statistic p.value
## 1 (Intercept) 17.1345080 10.9305985 1.5675727 0.1184180755
## 2 Crew 0.1268852 0.2106235 0.6024265 0.5475106856
## 3 MonthAug 4.8808016 4.5928461 1.0626965 0.2890847010
## 4 MonthDec 12.6633980 6.2988250 2.0104381 0.0456055974
## 5 MonthFeb -4.3858084 4.9480903 -0.8863639 0.3763893458
## 6 MonthJan -5.3921571 6.5662878 -0.8211881 0.4124281323
## 7 MonthJul -0.9539797 4.3077526 -0.2214565 0.8249424338
## 8 MonthJun -4.7446366 5.0599448 -0.9376854 0.3494343410
## 9 MonthMar -6.8066927 5.1379327 -1.3247921 0.1866145288
## 10 MonthMay -5.8375464 4.2215992 -1.3827808 0.1681340057
## 11 MonthNov -2.9808652 4.9179196 -0.6061232 0.5450578196
## 12 MonthOct -5.0770799 4.5500898 -1.1158197 0.2657166065
## 13 MonthSep -0.3782765 6.2704553 -0.0603268 0.9519501230
## 14 CauseAircraft -13.5745010 4.8250720 -2.8133261 0.0053470530
## 15 CauseMissing -22.7293528 5.9149559 -3.8426918 0.0001592487
## 16 CauseWarship -13.3808374 4.9262148 -2.7162513 0.0071274190
Note: The variance of Surv (number of survived) does not depend on Crew (number of the crew members) - \(p.value=0.547\), but does depend on Month=December - \(p.value=0.045\) and Cause (Aircraft - \(p.value=0.005\), Warship - \(p.value=0.007\)). For December we see extra \(12.6\) survived added to the overall mean value \(23.25\) while for Aircraft we see \(-13.57\) and for Warshp \(-13.38\). Missing gives no survivers at all \(-22.72\) with \(p.value=0.0001\).
# the summary for proportion of the survived members of the crews
filter(ub1943,Plus==0) %>% summarise(min(Surv/Crew), mean(Surv/Crew),median(Surv/Crew), max(Surv/Crew))
## # A tibble: 1 <U+00D7> 4
## `min(Surv/Crew)` `mean(Surv/Crew)` `median(Surv/Crew)` `max(Surv/Crew)`
## <dbl> <dbl> <dbl> <dbl>
## 1 0.01886792 0.461866 0.4313725 1
# the summary for proportion of the survived members of the crews - u-boats lost by aircraft
filter(ub1943,Plus==0, Cause=="Aircraft") %>% summarise(min(Surv/Crew), mean(Surv/Crew),median(Surv/Crew), max(Surv/Crew))
## # A tibble: 1 <U+00D7> 4
## `min(Surv/Crew)` `mean(Surv/Crew)` `median(Surv/Crew)` `max(Surv/Crew)`
## <dbl> <dbl> <dbl> <dbl>
## 1 0.01886792 0.4561311 0.3831522 1
# the summary for proportion of the survived members of the crews - u-boats lost by warships
filter(ub1943,Plus==0, Cause=="Warship") %>% summarise(min(Surv/Crew), mean(Surv/Crew),median(Surv/Crew), max(Surv/Crew))
## # A tibble: 1 <U+00D7> 4
## `min(Surv/Crew)` `mean(Surv/Crew)` `median(Surv/Crew)` `max(Surv/Crew)`
## <dbl> <dbl> <dbl> <dbl>
## 1 0.01923077 0.4546099 0.4673913 1
# the summary for proportion of the survived members of the crews - u-boats lost by accident
filter(ub1943,Plus==0, Cause=="Accident") %>% summarise(min(Surv/Crew), mean(Surv/Crew),median(Surv/Crew), max(Surv/Crew))
## # A tibble: 1 <U+00D7> 4
## `min(Surv/Crew)` `mean(Surv/Crew)` `median(Surv/Crew)` `max(Surv/Crew)`
## <dbl> <dbl> <dbl> <dbl>
## 1 0.06382979 0.5000842 0.4324324 1
qplot(Dead, Surv, data=ub1943, colour=Surv/Crew, xlab = "Number of dead", ylab="Number of survived", main = "Survived versus dead", shape=Cause)
qplot(match(Month,month.abb), Surv, data=ub1943, colour=factor(Cause), xlab = "Month (1 - January,..12 - December)", ylab="Number of survived", main = "Survived by cause")
qplot(match(Month,month.abb), Surv/Crew, data=ub1943, colour=factor(Cause), xlab = "Month (1 - January,..12 - December)", ylab="Proportion of survived", main = "Proportions of survived by cause")
qplot(match(Month,month.abb), data=ub1943, fill=Cause, geom="bar",xlab = "Month (1 - January,..12 - December)", ylab="Number of survived", main = "Stacked diagram of survived by cause" )
qplot(match(Month,month.abb), data=ub1943, geom="freqpoly", group=Cause, colour=Cause, bins=7, xlab = "Month (1 - January,..12 - December)", ylab="Number of survived", main = "Density diagram of survived by cause" )
ggplot(ub1943, aes(factor(Cause), Surv)) + geom_violin(scale = "width",aes(fill = factor(Cause))) + geom_jitter(height = 0) + xlab ("Cause") + ylab("Number of survived") + ggtitle("Number of survived by cause")
ggplot(ub1943, aes(factor(Month), Surv)) + geom_violin(scale = "width",aes(fill = factor(Month)))+ geom_jitter(height = 0) + xlab ("Month") + ylab("Number of survived") + ggtitle("Number of survived by month")
ggplot(ub1943, aes(factor(Cause), Surv/Crew)) + geom_violin(scale = "width",aes(fill = factor(Cause))) + geom_jitter(height = 0) + xlab ("Cause") + ylab("Proportion of survived") + ggtitle("Proportion of survived by cause")
ggplot(ub1943, aes(factor(Month), Surv/Crew)) + geom_violin(scale = "width",aes(fill = factor(Month)))+ geom_jitter(height = 0)+ xlab ("Month") + ylab("Proportion of survived") + ggtitle("Proportion of survived by month")
with(ub1943,cor.test(match(Month,month.abb),Surv)) # Survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv
## t = 2.4601, df = 234, p-value = 0.01461
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03172266 0.28078710
## sample estimates:
## cor
## 0.1587799
with(ub1943,cor.test(match(Month,month.abb),Surv/Crew)) # Proportion of survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv/Crew
## t = 2.4808, df = 234, p-value = 0.01381
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.03305795 0.28201794
## sample estimates:
## cor
## 0.1600826
#U-boat losses by aircraft
air<-filter(ub1943,Cause=="Aircraft")
with(air,cor.test(match(Month,month.abb),Surv)) # Survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv
## t = 1.1663, df = 143, p-value = 0.2454
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06699954 0.25602787
## sample estimates:
## cor
## 0.09706984
with(air,cor.test(match(Month,month.abb),Surv/Crew)) # Proportion of survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv/Crew
## t = 1.1444, df = 143, p-value = 0.2544
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.06881757 0.25432034
## sample estimates:
## cor
## 0.09526028
#U-boat losses by warship
ship<-filter(ub1943,Cause=="Warship")
with(ship,cor.test(match(Month,month.abb),Surv)) # Survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv
## t = 1.5469, df = 63, p-value = 0.1269
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05517639 0.41579631
## sample estimates:
## cor
## 0.1912971
with(ship,cor.test(match(Month,month.abb),Surv/Crew)) # Proportion of survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv/Crew
## t = 1.5437, df = 63, p-value = 0.1277
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05558137 0.41546026
## sample estimates:
## cor
## 0.1909057
#U-boat losses by accident
acc<-filter(ub1943,Cause=="Accident")
with(acc,cor.test(match(Month,month.abb),Surv)) # Survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv
## t = 2.0306, df = 9, p-value = 0.07286
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05925847 0.86840996
## sample estimates:
## cor
## 0.5605425
with(acc,cor.test(match(Month,month.abb),Surv/Crew)) # Proportion of survived versus month
##
## Pearson's product-moment correlation
##
## data: match(Month, month.abb) and Surv/Crew
## t = 1.8224, df = 9, p-value = 0.1017
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1172064 0.8532972
## sample estimates:
## cor
## 0.5191711
#Linear model Dead~Crew with loess smoothing by cause
ggplot(ub1943, aes(Crew,Surv)) + geom_jitter(height = 0) + geom_smooth( method = loess) +facet_wrap(~ Cause)
#Linear model Dead~Crew with loess smoothing by Month
ggplot(ub1943, aes(Crew,Surv)) + geom_jitter(height = 0) + geom_smooth( method=loess) +facet_wrap(~ Month)
#Proportion of the survived members of the crews by cause
by_cause_0_<-filter(ub1943,Plus==0) %>% group_by(Cause) %>%
summarise(min=min(Surv/Crew), mean=mean(Surv/Crew), max=max(Surv/Crew), sd=sd(Surv/Crew), var=sd/mean)
by_cause_0_ # minimum, mean and maxixmum values, standard deviation and variance- to-mean ratio of proportion of the survived members by cause
## # A tibble: 3 <U+00D7> 6
## Cause min mean max sd var
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Accident 0.06382979 0.5000842 1 0.3788756 0.7576236
## 2 Aircraft 0.01886792 0.4561311 1 0.3538102 0.7756765
## 3 Warship 0.01923077 0.4546099 1 0.3143278 0.6914232
#ABsolute values of the survived members of the crews by month
by_month_0<-filter(ub1943,Plus==0) %>% group_by(Month) %>%
summarise(min=min(Surv), mean=mean(Surv), max=max(Surv), sd=sd(Surv), var=sd/mean)
# minimum, mean and maxixmum values, standard deviation and variance- to-mean ratio of the survived members by month
arrange(by_month_0,desc(mean))# in descending order by mean
## # A tibble: 12 <U+00D7> 6
## Month min mean max sd var
## <chr> <int> <dbl> <int> <dbl> <dbl>
## 1 Dec 34 45.000000 51 7.6157731 0.1692394
## 2 Apr 12 35.250000 50 16.3171689 0.4628984
## 3 Nov 7 31.600000 48 18.3657290 0.5811940
## 4 Sep 6 31.000000 49 22.3383079 0.7205906
## 5 Aug 4 30.076923 53 15.0856529 0.5015690
## 6 Feb 11 27.250000 45 18.8038117 0.6900481
## 7 May 3 22.875000 47 16.5480901 0.7234138
## 8 Jul 2 18.941176 64 18.2772762 0.9649494
## 9 Jun 1 17.200000 30 10.5214068 0.6117097
## 10 Oct 1 13.444444 49 16.4705124 1.2250794
## 11 Mar 4 12.250000 20 7.1355915 0.5824973
## 12 Jan 1 1.333333 2 0.5773503 0.4330127
#Proportion of the survived members of the crews by month
by_month_0_<-filter(ub1943,Plus==0) %>% group_by(Month) %>%
summarise(min=min(Surv/Crew), mean=mean(Surv/Crew), max=max(Surv/Crew), sd=sd(Surv/Crew), var=sd/mean)
# minimum, mean and maxixmum values, standard deviation and variance- to-mean ratio of proportion of the survived members by month
arrange(by_month_0_,desc(mean))# in descending order by mean
## # A tibble: 12 <U+00D7> 6
## Month min mean max sd var
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Dec 0.68000000 0.86491525 1.00000000 0.16120144 0.1863783
## 2 Apr 0.22222222 0.69328704 1.00000000 0.33166539 0.4783955
## 3 Sep 0.13953488 0.67441860 1.00000000 0.46685721 0.6922365
## 4 Nov 0.14000000 0.65373655 1.00000000 0.40108602 0.6135285
## 5 Aug 0.08695652 0.61247751 0.98148148 0.29934951 0.4887518
## 6 Feb 0.23404255 0.53662658 0.84000000 0.34647337 0.6456508
## 7 May 0.06382979 0.45018692 0.87037037 0.29897361 0.6641099
## 8 Jun 0.01923077 0.34884431 0.62500000 0.22394223 0.6419546
## 9 Jul 0.04444444 0.34511255 0.98461538 0.32242032 0.9342469
## 10 Mar 0.08888889 0.28689832 0.43478261 0.17450935 0.6082620
## 11 Oct 0.01886792 0.26828219 1.00000000 0.33258006 1.2396651
## 12 Jan 0.02173913 0.02639752 0.03571429 0.00806856 0.3056560
#by month and by cause
#Aircraft absolute values of the survived members by month
by_month_air_0<-filter(ub1943,Plus==0, Cause=="Aircraft") %>% group_by(Month) %>% summarise(min=min(Surv), mean=mean(Surv), max=max(Surv),sd=sd(Surv), var=sd/mean)
arrange(by_month_air_0, desc(mean))# in descending order by mean
## # A tibble: 12 <U+00D7> 6
## Month min mean max sd var
## <chr> <int> <dbl> <int> <dbl> <dbl>
## 1 Sep 49 49.00000 49 NaN NaN
## 2 Dec 46 46.00000 46 NaN NaN
## 3 Nov 42 45.00000 48 4.242641 0.0942809
## 4 Apr 38 44.00000 50 8.485281 0.1928473
## 5 Feb 42 42.00000 42 NaN NaN
## 6 May 9 28.60000 47 16.876018 0.5900706
## 7 Aug 4 28.22222 53 17.419657 0.6172319
## 8 Jul 2 18.66667 64 18.561353 0.9943582
## 9 Jun 16 18.33333 21 2.516611 0.1372697
## 10 Oct 1 10.85714 49 17.883219 1.6471386
## 11 Mar 9 9.00000 9 NaN NaN
## 12 Jan 2 2.00000 2 NaN NaN
#Aircraft proportion of the survived members by month
by_month_air_0_<-filter(ub1943,Plus==0, Cause=="Aircraft") %>% group_by(Month) %>% summarise(min=min(Surv/Crew), mean=mean(Surv/Crew), max=max(Surv/Crew),sd=sd(Surv/Crew), var=sd/mean)
arrange(by_month_air_0_, desc(mean)) # in descending order by mean
## # A tibble: 12 <U+00D7> 6
## Month min mean max sd var
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sep 1.00000000 1.00000000 1.00000000 NA NA
## 2 Nov 0.84000000 0.90979592 0.97959184 0.09870633 0.1084928
## 3 Apr 0.79166667 0.89583333 1.00000000 0.14731391 0.1644434
## 4 Feb 0.84000000 0.84000000 0.84000000 NA NA
## 5 Dec 0.77966102 0.77966102 0.77966102 NA NA
## 6 Aug 0.08695652 0.55873673 0.98148148 0.33539502 0.6002738
## 7 May 0.18367347 0.55413175 0.87037037 0.28880991 0.5211936
## 8 Jun 0.27118644 0.36666360 0.43750000 0.08585118 0.2341415
## 9 Jul 0.04444444 0.33264646 0.98461538 0.31878304 0.9583239
## 10 Oct 0.01886792 0.22102754 1.00000000 0.36645554 1.6579633
## 11 Mar 0.19148936 0.19148936 0.19148936 NA NA
## 12 Jan 0.03571429 0.03571429 0.03571429 NA NA
#Warship absolute values of the survived members by month
by_month_ship_0<-filter(ub1943,Plus==0, Cause=="Warship") %>% group_by(Month) %>% summarise(min=min(Surv), mean=mean(Surv), max=max(Surv),sd=sd(Surv), var=sd/mean)
arrange(by_month_ship_0, desc(mean)) # in descending order by mean
## # A tibble: 11 <U+00D7> 6
## Month min mean max sd var
## <chr> <int> <dbl> <int> <dbl> <dbl>
## 1 Dec 34 42.5 51 12.020815 0.28284271
## 2 Aug 37 38.0 39 1.414214 0.03721615
## 3 Feb 11 28.0 45 24.041631 0.85862966
## 4 May 28 28.0 28 NaN NaN
## 5 Apr 12 26.5 41 20.506097 0.77381497
## 6 Oct 18 22.5 27 6.363961 0.28284271
## 7 Jul 5 21.0 37 22.627417 1.07749605
## 8 Nov 17 17.0 17 NaN NaN
## 9 Jun 1 15.5 30 20.506097 1.32297398
## 10 Mar 4 12.0 20 11.313708 0.94280904
## 11 Jan 1 1.0 1 0.000000 0.00000000
#Warship proportion of the survived members by month
by_month_ship_0_<-filter(ub1943,Plus==0, Cause=="Warship") %>% group_by(Month) %>% summarise(min=min(Surv/Crew), mean=mean(Surv/Crew), max=max(Surv/Crew),sd=sd(Surv/Crew), var=sd/mean)
arrange(by_month_ship_0_,desc(mean)) # in descending order by mean
## # A tibble: 11 <U+00D7> 6
## Month min mean max sd var
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Dec 0.68000000 0.84000000 1.00000000 0.22627417 0.2693740
## 2 Aug 0.68518519 0.75748621 0.82978723 0.10224909 0.1349848
## 3 May 0.58333333 0.58333333 0.58333333 NA NA
## 4 Feb 0.23404255 0.53368794 0.83333333 0.42376257 0.7940269
## 5 Apr 0.22222222 0.49074074 0.75925926 0.37974253 0.7738150
## 6 Jul 0.10638298 0.43860816 0.77083333 0.46983735 1.0712007
## 7 Oct 0.36734694 0.43367347 0.50000000 0.09379988 0.2162915
## 8 Jun 0.01923077 0.32211538 0.62500000 0.42834353 1.3297829
## 9 Nov 0.30909091 0.30909091 0.30909091 NA NA
## 10 Mar 0.08888889 0.26183575 0.43478261 0.24458379 0.9341115
## 11 Jan 0.02173913 0.02173913 0.02173913 0.00000000 0.0000000
#Accident absolute values of the survived members by month
by_month_accd_0<-filter(ub1943,Plus==0, Cause=="Accident") %>% group_by(Month) %>% summarise(min=min(Surv), mean=mean(Surv), max=max(Surv),sd=sd(Surv), var=sd/mean)
arrange(by_month_accd_0, desc(mean))# in descending order by mean
## # A tibble: 7 <U+00D7> 6
## Month min mean max sd var
## <chr> <int> <dbl> <int> <dbl> <dbl>
## 1 Dec 49 49.0 49 NaN NaN
## 2 Aug 22 30.5 39 12.020815 0.3941251
## 3 Nov 7 25.5 44 26.162951 1.0259981
## 4 Sep 6 22.0 38 22.627417 1.0285190
## 5 Mar 16 16.0 16 NaN NaN
## 6 Feb 11 11.0 11 NaN NaN
## 7 May 3 6.0 9 4.242641 0.7071068
#Accident proportions of the survived members by month
by_month_accd_0_<-filter(ub1943,Plus==0, Cause=="Accident") %>% group_by(Month) %>% summarise(min=min(Surv/Crew), mean=mean(Surv/Crew), max=max(Surv/Crew),sd=sd(Surv/Crew), var=sd/mean)
arrange(by_month_accd_0_, desc(mean))# in descending order by mean
## # A tibble: 7 <U+00D7> 6
## Month min mean max sd var
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Dec 1.00000000 1.0000000 1.0000000 NA NA
## 2 Aug 0.51162791 0.7093023 0.9069767 0.27955384 0.3941251
## 3 Nov 0.14000000 0.5700000 1.0000000 0.60811183 1.0668629
## 4 Sep 0.13953488 0.5116279 0.8837209 0.52621900 1.0285190
## 5 Mar 0.43243243 0.4324324 0.4324324 NA NA
## 6 Feb 0.23913043 0.2391304 0.2391304 NA NA
## 7 May 0.06382979 0.1237516 0.1836735 0.08474228 0.6847771
#Missing crew members absolute values by month
by_month_mis<-filter(ub1943,Cause=="Missing") %>% group_by(Month) %>%
summarise(min=min(Dead), mean=mean(Dead), max=max(Dead), sd=sd(Dead), var=sd/mean)
arrange(by_month_mis,desc(mean))# in descending order by mean
## # A tibble: 10 <U+00D7> 6
## Month min mean max sd var
## <chr> <int> <dbl> <int> <dbl> <dbl>
## 1 Aug 52 52.0 52 NaN NaN
## 2 Dec 49 52.0 55 4.2426407 0.08158924
## 3 Sep 51 51.0 51 NaN NaN
## 4 Nov 50 50.0 50 NaN NaN
## 5 Oct 49 49.0 49 NaN NaN
## 6 Jan 47 48.0 50 1.7320508 0.03608439
## 7 Apr 47 47.5 48 0.7071068 0.01488646
## 8 Jul 46 47.0 48 1.4142136 0.03008965
## 9 May 47 47.0 47 NaN NaN
## 10 Mar 46 46.0 46 NaN NaN
#Test for normality
shapiro.test(by_month_air_0$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_air_0$mean
## W = 0.90676, p-value = 0.1939
shapiro.test(by_month_air_0_$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_air_0_$mean
## W = 0.9315, p-value = 0.3963
shapiro.test(by_month_ship_0$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_ship_0$mean
## W = 0.98097, p-value = 0.9713
shapiro.test(by_month_ship_0_$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_ship_0_$mean
## W = 0.978, p-value = 0.954
shapiro.test(by_month_accd_0$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_accd_0$mean
## W = 0.94889, p-value = 0.7196
shapiro.test(by_month_accd_0_$mean)
##
## Shapiro-Wilk normality test
##
## data: by_month_accd_0_$mean
## W = 0.97878, p-value = 0.9534
#F Test to Compare Two Variances
#Warships and aircraft
var.test(by_month_ship_0$mean, by_month_air_0$mean)
##
## F test to compare two variances
##
## data: by_month_ship_0$mean and by_month_air_0$mean
## F = 0.49941, num df = 10, denom df = 11, p-value = 0.2841
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1416498 1.8302967
## sample estimates:
## ratio of variances
## 0.4994106
var.test(by_month_ship_0_$mean, by_month_air_0_$mean)
##
## F test to compare two variances
##
## data: by_month_ship_0_$mean and by_month_air_0_$mean
## F = 0.49675, num df = 10, denom df = 11, p-value = 0.2805
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1408937 1.8205280
## sample estimates:
## ratio of variances
## 0.4967451
#Warships and accidents
var.test(by_month_ship_0$mean, by_month_accd_0$mean)
##
## F test to compare two variances
##
## data: by_month_ship_0$mean and by_month_accd_0$mean
## F = 0.67249, num df = 10, denom df = 6, p-value = 0.5518
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1231361 2.7384509
## sample estimates:
## ratio of variances
## 0.6724859
var.test(by_month_ship_0_$mean, by_month_accd_0_$mean)
##
## F test to compare two variances
##
## data: by_month_ship_0_$mean and by_month_accd_0_$mean
## F = 0.6192, num df = 10, denom df = 6, p-value = 0.4794
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1133795 2.5214715
## sample estimates:
## ratio of variances
## 0.6192019
#Aircraft and accidents
var.test(by_month_air_0$mean, by_month_accd_0$mean)
##
## F test to compare two variances
##
## data: by_month_air_0$mean and by_month_accd_0$mean
## F = 1.3466, num df = 11, denom df = 6, p-value = 0.7457
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2489129 5.2255267
## sample estimates:
## ratio of variances
## 1.346559
var.test(by_month_air_0_$mean, by_month_accd_0_$mean)
##
## F test to compare two variances
##
## data: by_month_air_0_$mean and by_month_accd_0_$mean
## F = 1.2465, num df = 11, denom df = 6, p-value = 0.8236
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2304202 4.8373033
## sample estimates:
## ratio of variances
## 1.246518
#Student test for mean values and proportions
#Warships and aircraft
t.test(by_month_ship_0$mean, by_month_air_0$mean)
##
## Welch Two Sample t-test
##
## data: by_month_ship_0$mean and by_month_air_0$mean
## t = -0.93669, df = 19.79, p-value = 0.3602
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -17.963811 6.835432
## sample estimates:
## mean of x mean of y
## 22.90909 28.47328
t.test(by_month_ship_0_$mean, by_month_air_0_$mean)
##
## Welch Two Sample t-test
##
## data: by_month_ship_0_$mean and by_month_air_0_$mean
## t = -0.88359, df = 19.767, p-value = 0.3875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3473377 0.1407472
## sample estimates:
## mean of x mean of y
## 0.4538465 0.5571417
#Warships and accidents
t.test(by_month_ship_0$mean, by_month_accd_0$mean)
##
## Welch Two Sample t-test
##
## data: by_month_ship_0$mean and by_month_accd_0$mean
## t = 0.0080615, df = 11.023, p-value = 0.9937
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.1275 14.2314
## sample estimates:
## mean of x mean of y
## 22.90909 22.85714
t.test(by_month_ship_0_$mean, by_month_accd_0_$mean)
##
## Welch Two Sample t-test
##
## data: by_month_ship_0_$mean and by_month_accd_0_$mean
## t = -0.44857, df = 10.666, p-value = 0.6627
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3464846 0.2295362
## sample estimates:
## mean of x mean of y
## 0.4538465 0.5123207
#Aircraft and accidents
t.test(by_month_air_0$mean, by_month_accd_0$mean)
##
## Welch Two Sample t-test
##
## data: by_month_air_0$mean and by_month_accd_0$mean
## t = 0.7794, df = 14.311, p-value = 0.4484
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.807042 21.039317
## sample estimates:
## mean of x mean of y
## 28.47328 22.85714
t.test(by_month_air_0_$mean, by_month_accd_0_$mean)
##
## Welch Two Sample t-test
##
## data: by_month_air_0_$mean and by_month_accd_0_$mean
## t = 0.3089, df = 13.892, p-value = 0.762
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.266608 0.356250
## sample estimates:
## mean of x mean of y
## 0.5571417 0.5123207
So what these math, tables and diagrams are all about? Do we see something interesting?