The purpose of this page was to test the relationship between accidents, number of engines and phases of flight. The Chi-Square test was used because of the ranked or nominal data . The data was available from Kaggle. The dplyr, from the tidyverse, approach was used to manipulate the initial flat file with 81,013 observations. The classes of data were reformatted through mutations. The data set was sanitized with the removal of rows with blank spaces, “unknown” or “other”" designations, data elements that were not available (N/A) and zero recordings of injuries or engines. The 31 variables were reduced to eight of injury severity, aircraft damage, number of engines, Total Fatal Injuries, Total Serious Injuries,Total Minor Injuries, Total Uninjured and Broad Phase of Flight.
library(tidyr)
library(dplyr)
data<-read.csv("AviationData.csv")
data %>% drop_na() %>% filter( !Broad.Phase.of.Flight %in% c("UNKNOWN","", 'OTHER')) %>%
filter( !Aircraft.Damage %in% "") %>%
filter( !Number.of.Engines %in% "0") %>%
filter( !Injury.Severity %in% "Unavailable") %>%
filter( !Total.Fatal.Injuries %in% "0" ) %>%
select( Injury.Severity, Aircraft.Damage, Number.of.Engines,
Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries,
Total.Uninjured,Broad.Phase.of.Flight)-> data
The data were re-ranked to simplify complexity. This occurred in relation to total fatal injuries into the trinary classification of one, two or more than two people. The number of engines was binary classified into the number of single or twin engines. The nine stages of flights phases were segregating into three elements of take-off, cruise and landing. The re-ranked variables were reordered to match expected sequential transitions and rendered as factors where appropriate.
data %>% mutate(Fatalities = ifelse(Total.Fatal.Injuries == 1, "One",
ifelse(Total.Fatal.Injuries == 2, "Two", "Greater"))) -> data
#data %>% mutate(Fatalities = ifelse(Total.Fatal.Injuries <= 1, "One", "Greater")) -> data
data %>% mutate(Engines = case_when(Number.of.Engines == 1 ~ "Single",
Number.of.Engines == 2 ~ "Twin")) -> data
data %>% mutate(Phase = ifelse(grepl( "APPROACH", Broad.Phase.of.Flight), "Landing",
ifelse(grepl( "ClIMB", Broad.Phase.of.Flight), "Takeoff",
ifelse(grepl( "CRUISE", Broad.Phase.of.Flight), "Cruise",
ifelse(grepl( "DESCENT", Broad.Phase.of.Flight), "Landing",
ifelse(grepl( "GO-", Broad.Phase.of.Flight), "Landing",
ifelse(grepl( "LAND", Broad.Phase.of.Flight), "Landing",
ifelse(grepl( "MAN", Broad.Phase.of.Flight), "Cruise",
ifelse(grepl( "STAN", Broad.Phase.of.Flight), "Takeoff",
ifelse(grepl( "TAKE", Broad.Phase.of.Flight), "Takeoff","Takeoff")))))))))) -> data #rerank
data %>% mutate(Number.of.Engines=as.factor(Number.of.Engines),
Total.Fatal.Injuries=as.factor(Total.Fatal.Injuries)) -> data
data %>% mutate(Phase= factor(Phase, levels=c("Takeoff", "Cruise","Landing"))) -> data #set order
data %>% mutate(Engines= factor(Engines, levels=c("Single", "Twin"))) -> data #set order
data %>% mutate(Fatalities= factor(Fatalities, levels=c("One", "Two", "Greater"))) -> data #set order
#data %>% mutate(Fatalities= factor(Fatalities, levels=c("One", "Greater"))) -> data #set order
#devtools::install_github("ggobi/ggally#266")
library(GGally)
#s <- svgstring()
ggpairs(data[9:11] , mapping=ggplot2::aes(colour =Engines))
#htmltools::HTML(s())
#invisible(dev.off())
Frequency tables were constructed from the remaining 138 observations with relationship of Flight Phases & Engines for Incidents, Relationship of Fatalities & Flight Phases for Incidents and Relationship of Fatalities & Flight Phases for Incidents
library(kableExtra)
library(janitor)
data %>% group_by(Phase,Engines) %>%
summarize(freq = n()) %>%
spread(key = Phase, value =freq) %>%
group_by(Engines) %>%
mutate(Total = sum(Landing+Cruise+Takeoff)) %>% ## total rows by engines
adorn_totals("row") %>% # total columns from janitor package
kable("html") %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
add_header_above(c("", "Phases" = 3, ""))
| Engines | Takeoff | Cruise | Landing | Total |
|---|---|---|---|---|
| Single | 29 | 62 | 23 | 114 |
| Twin | 5 | 9 | 10 | 24 |
| Total | 34 | 71 | 33 | 138 |
data %>% group_by(Fatalities,Phase) %>%
summarize(freq = n()) %>%
spread(key = Phase, value =freq) %>% group_by(Fatalities) %>%
mutate(Total = sum(Landing+Cruise+Takeoff)) %>% ##rows
adorn_totals("row") %>%
kable("html") %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
add_header_above(c("", "Phases" = 3, ""))
| Fatalities | Takeoff | Cruise | Landing | Total |
|---|---|---|---|---|
| One | 19 | 39 | 19 | 77 |
| Two | 11 | 21 | 6 | 38 |
| Greater | 4 | 11 | 8 | 23 |
| Total | 34 | 71 | 33 | 138 |
data %>% group_by(Fatalities,Engines) %>%
summarize(freq = n()) %>%
spread(key = Fatalities, value =freq) %>% group_by(Engines) %>%
mutate(Total = sum(One+Two+Greater)) %>% ##rows
adorn_totals("row") %>%
kable("html") %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
add_header_above(c("", "Fatalities" = 3, ""))
| Engines | One | Two | Greater | Total |
|---|---|---|---|---|
| Single | 67 | 32 | 15 | 114 |
| Twin | 10 | 6 | 8 | 24 |
| Total | 77 | 38 | 23 | 138 |
Mosaic plots displaying the proportional relationship between variables with associated chi-square test.
library(vcd)
data %>% group_by_(.dots=c("Engines","Phase")) %>%
summarize(freq = length(Phase)) -> array_EP
P_array_EP <- xtabs(freq ~ ., data = array_EP)
mosaic(P_array_EP , shade=TRUE)
chisq.test(data$Engines,data$Phase)
Pearson's Chi-squared test
data: data$Engines and data$Phase
X-squared = 5.0988, df = 2, p-value = 0.07813
data %>% group_by_(.dots=c("Engines","Fatalities")) %>%
summarize(freq = length(Phase)) -> array_FE
P_array_FE <- xtabs(freq ~ ., data = array_FE)
mosaic(P_array_FE , shade=TRUE)
chisq.test(data$Engines, data$Fatalities)
Pearson's Chi-squared test
data: data$Engines and data$Fatalities
X-squared = 5.9496, df = 2, p-value = 0.05106
data %>% group_by_(.dots=c("Phase","Fatalities")) %>%
summarize(freq = length(Phase)) -> array_PF
P_array_PF <- xtabs(freq ~ ., data = array_PF)
mosaic(P_array_PF , shade=TRUE)
chisq.test(data$Phase, data$Fatalities)
Pearson's Chi-squared test
data: data$Phase and data$Fatalities
X-squared = 3.1562, df = 4, p-value = 0.532
#CMHtest(df)
library(vcdExtra)
df<-table(data$Phase, data$Engines, data$Fatalities)
mantelhaen.test(df)
Cochran-Mantel-Haenszel test
data: df
Cochran-Mantel-Haenszel M^2 = 4.1622, df = 2, p-value = 0.1248
structable(df)
B Single Twin
A C
Takeoff One 17 2
Two 9 2
Greater 3 1
Cruise One 33 6
Two 19 2
Greater 10 1
Landing One 17 2
Two 4 2
Greater 2 6
summary(oddsratio(df))
z test of coefficients:
Estimate Std. Error z value Pr(>|z|)
Takeoff:Cruise/Single:Twin|One 0.43532 0.86936 0.5007 0.6166
Cruise:Landing/Single:Twin|One -0.43532 0.86936 -0.5007 0.6166
Takeoff:Cruise/Single:Twin|Two -0.74721 1.07877 -0.6927 0.4885
Cruise:Landing/Single:Twin|Two 1.55814 1.14133 1.3652 0.1722
Takeoff:Cruise/Single:Twin|Greater -1.20397 1.55991 -0.7718 0.4402
Cruise:Landing/Single:Twin|Greater 3.40120 1.32916 2.5589 0.0105 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
woolf_test(df)
Woolf-test on Homogeneity of Odds Ratios (no 3-Way assoc.)
data: df
X-squared = 0.77868, df = 2, p-value = 0.6775
data %>% group_by_(.dots=c("Engines","Phase","Fatalities")) %>%
summarize(freq = length(Phase))->array
cube<-xtabs(freq ~ ., data=array)
mosaic(cube)
mosaic3d(cube,
spacing = 0.01,
alpha=0.7,
box=FALSE,
interpolate=c(6,1))
You must enable Javascript to view this page properly.
No statistical significant relationship exists between number of fatalities, engines or phases of flight in relation to plane accidents which is reassuring.