library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url = 'https://raw.githubusercontent.com/AlphaCurse/Military-Plane/main/militaryplaneacceptance.csv'
df = read.csv(url)
head(df)
## ï..Year.or.Month Total VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber
## 1 Grand Total 299,230 3,764 31,890 22,110 40,646
## 2
## 3
## 4 Annually
## 5 1940 6,028 - 61 95 1,038
## 6 1941 19,445 1 318 865 2,935
## Fighters Reconnaissance Transports Trainers Communications
## 1 100,554 3,981 24,059 58,568 13,658
## 2
## 3
## 4
## 5 1,689 123 290 2,731 1
## 6 4,421 727 532 9,376 270
Lets remove rows 2, 3 and 4 to bring the data together. Additionally, rename column 1 to Year and remove the month from the last element of that column.
df = df %>%
filter(!row_number() %in% c(2, 3, 4))
colnames(df)[1] = 'Year'
df$Year[7] = 1945
df
## Year Total VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber
## 1 Grand Total 299,230 3,764 31,890 22,110 40,646
## 2 1940 6,028 - 61 95 1,038
## 3 1941 19,445 1 318 865 2,935
## 4 1942 47,675 3 2,615 4,122 5,894
## 5 1943 85,433 92 9,524 7,624 12,122
## 6 1944 95,272 1,161 15,173 6,782 11,892
## 7 1945 45,377 2,507 4,199 2,622 6,765
## Fighters Reconnaissance Transports Trainers Communications
## 1 100,554 3,981 24,059 58,568 13,658
## 2 1,689 123 290 2,731 1
## 3 4,421 727 532 9,376 270
## 4 10,780 1,468 1,985 17,632 3,176
## 5 24,005 734 7,013 19,942 4,377
## 6 38,895 261 9,834 7,578 3,696
## 7 20,764 668 4,405 1,309 2,138
Converting all character columns to integers for calculations.
df$Total = as.integer(gsub(",","",df$Total))
df$VH.Bombers = as.integer(gsub(",","",df$VH.Bombers))
## Warning: NAs introduced by coercion
df$Heavy.Bombers = as.integer(gsub(",","",df$Heavy.Bombers))
df$Medium.Bombers = as.integer(gsub(",","",df$Medium.Bombers))
df$Light.Bomber = as.integer(gsub(",","",df$Light.Bomber))
df$Fighters = as.integer(gsub(",","",df$Fighters))
df$Reconnaissance = as.integer(gsub(",","",df$Reconnaissance))
df$Transports = as.integer(gsub(",","",df$Transports))
df$Trainers = as.integer(gsub(",","",df$Trainers))
df$Communications = as.integer(gsub(",","",df$Communications))
head(df)
## Year Total VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber
## 1 Grand Total 299230 3764 31890 22110 40646
## 2 1940 6028 NA 61 95 1038
## 3 1941 19445 1 318 865 2935
## 4 1942 47675 3 2615 4122 5894
## 5 1943 85433 92 9524 7624 12122
## 6 1944 95272 1161 15173 6782 11892
## Fighters Reconnaissance Transports Trainers Communications
## 1 100554 3981 24059 58568 13658
## 2 1689 123 290 2731 1
## 3 4421 727 532 9376 270
## 4 10780 1468 1985 17632 3176
## 5 24005 734 7013 19942 4377
## 6 38895 261 9834 7578 3696
Lets replace the NA value with 0.
df$VH.Bombers[2] = 0
df$VH.Bombers = as.integer(df$VH.Bombers)
head(df)
## Year Total VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber
## 1 Grand Total 299230 3764 31890 22110 40646
## 2 1940 6028 0 61 95 1038
## 3 1941 19445 1 318 865 2935
## 4 1942 47675 3 2615 4122 5894
## 5 1943 85433 92 9524 7624 12122
## 6 1944 95272 1161 15173 6782 11892
## Fighters Reconnaissance Transports Trainers Communications
## 1 100554 3981 24059 58568 13658
## 2 1689 123 290 2731 1
## 3 4421 727 532 9376 270
## 4 10780 1468 1985 17632 3176
## 5 24005 734 7013 19942 4377
## 6 38895 261 9834 7578 3696
Since the analysis is of each aircraft, we can elimate the total column and grand total row.
new_df = df %>%
select(Year, VH.Bombers, Heavy.Bombers, Medium.Bombers, Light.Bomber, Fighters, Reconnaissance, Transports, Trainers, Communications)
head(new_df)
## Year VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber Fighters
## 1 Grand Total 3764 31890 22110 40646 100554
## 2 1940 0 61 95 1038 1689
## 3 1941 1 318 865 2935 4421
## 4 1942 3 2615 4122 5894 10780
## 5 1943 92 9524 7624 12122 24005
## 6 1944 1161 15173 6782 11892 38895
## Reconnaissance Transports Trainers Communications
## 1 3981 24059 58568 13658
## 2 123 290 2731 1
## 3 727 532 9376 270
## 4 1468 1985 17632 3176
## 5 734 7013 19942 4377
## 6 261 9834 7578 3696
new_df = new_df %>%
filter(!row_number() %in% c(1))
new_df
## Year VH.Bombers Heavy.Bombers Medium.Bombers Light.Bomber Fighters
## 1 1940 0 61 95 1038 1689
## 2 1941 1 318 865 2935 4421
## 3 1942 3 2615 4122 5894 10780
## 4 1943 92 9524 7624 12122 24005
## 5 1944 1161 15173 6782 11892 38895
## 6 1945 2507 4199 2622 6765 20764
## Reconnaissance Transports Trainers Communications
## 1 123 290 2731 1
## 2 727 532 9376 270
## 3 1468 1985 17632 3176
## 4 734 7013 19942 4377
## 5 261 9834 7578 3696
## 6 668 4405 1309 2138
For the VH Bomber airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$VH.Bombers)
## [1] 1 2 89 1069 1346
For the Heavy Bomber airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Heavy.Bombers)
## [1] 257 2297 6909 5649 -10974
For the Medium Bomber airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Medium.Bombers)
## [1] 770 3257 3502 -842 -4160
For the Light Bomber airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Light.Bomber)
## [1] 1897 2959 6228 -230 -5127
For the Fighter airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Fighters)
## [1] 2732 6359 13225 14890 -18131
For the Reconnaissance airplanes, the sharpest drop was from 1940 to 1942.
diff(new_df$Reconnaissance)
## [1] 604 741 -734 -473 407
For the Transport airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Transports)
## [1] 242 1453 5028 2821 -5429
For the Trainer airplanes, the sharpest drop was from 1942 to 1944.
diff(new_df$Trainers)
## [1] 6645 8256 2310 -12364 -6269
For the Communication airplanes, the sharpest drop was from 1943 to 1945.
diff(new_df$Communications)
## [1] 269 2906 1201 -681 -1558
From all of the drops, we can conclude majority of the airplanes had their sharpest drop between 1943 and 1945.