Data Preparation

The purpose of this page was to test the relationship between accidents, number of engines and phases of flight. The Chi-Square test was used because of the ranked or nominal data . The data was available from Kaggle. The dplyr, from the tidyverse, approach was used to manipulate the initial flat file with 81,013 observations. The classes of data were reformatted through mutations. The data set was sanitized with the removal of rows with blank spaces, “unknown” or “other”" designations, data elements that were not available (N/A) and zero recordings of injuries or engines. The 31 variables were reduced to eight of injury severity, aircraft damage, number of engines, Total Fatal Injuries, Total Serious Injuries,Total Minor Injuries, Total Uninjured and Broad Phase of Flight.

library(tidyr)
library(dplyr)
 
data<-read.csv("AviationData.csv")

data %>% drop_na() %>%  filter( !Broad.Phase.of.Flight %in% c("UNKNOWN","", 'OTHER')) %>%
         filter( !Aircraft.Damage %in% "") %>% 
         filter( !Number.of.Engines %in% "0")  %>%
         filter( !Injury.Severity %in% "Unavailable") %>% 
         filter( !Total.Fatal.Injuries %in% "0" ) %>%
         select( Injury.Severity, Aircraft.Damage, Number.of.Engines, 
                 Total.Fatal.Injuries,Total.Serious.Injuries,Total.Minor.Injuries, 
                 Total.Uninjured,Broad.Phase.of.Flight)-> data

Simplify Ranking

The data were re-ranked to simplify complexity. This occurred in relation to total fatal injuries into the trinary classification of one, two or more than two people. The number of engines was binary classified into the number of single or twin engines. The nine stages of flights phases were segregating into three elements of take-off, cruise and landing. The re-ranked variables were reordered to match expected sequential transitions and rendered as factors where appropriate.

data %>% mutate(Fatalities = ifelse(Total.Fatal.Injuries == 1, "One", 
                                   ifelse(Total.Fatal.Injuries == 2, "Two", "Greater"))) -> data

#data %>% mutate(Fatalities = ifelse(Total.Fatal.Injuries <= 1, "One", "Greater")) -> data

data %>% mutate(Engines = case_when(Number.of.Engines == 1 ~ "Single", 
                                    Number.of.Engines == 2 ~ "Twin")) -> data


data %>% mutate(Phase = ifelse(grepl( "APPROACH", Broad.Phase.of.Flight), "Landing",
                              ifelse(grepl( "ClIMB", Broad.Phase.of.Flight), "Takeoff",
                              ifelse(grepl( "CRUISE", Broad.Phase.of.Flight), "Cruise",
                              ifelse(grepl( "DESCENT", Broad.Phase.of.Flight), "Landing",
                              ifelse(grepl( "GO-", Broad.Phase.of.Flight), "Landing", 
                              ifelse(grepl( "LAND", Broad.Phase.of.Flight), "Landing",
                              ifelse(grepl( "MAN", Broad.Phase.of.Flight), "Cruise",
                              ifelse(grepl( "STAN", Broad.Phase.of.Flight), "Takeoff",
                              ifelse(grepl( "TAKE", Broad.Phase.of.Flight), "Takeoff","Takeoff")))))))))) -> data #rerank
data %>% mutate(Number.of.Engines=as.factor(Number.of.Engines), 
                Total.Fatal.Injuries=as.factor(Total.Fatal.Injuries)) -> data

data %>% mutate(Phase= factor(Phase, levels=c("Takeoff",  "Cruise","Landing"))) -> data #set order
data %>% mutate(Engines= factor(Engines, levels=c("Single", "Twin"))) -> data #set order
data %>% mutate(Fatalities= factor(Fatalities, levels=c("One", "Two", "Greater"))) -> data #set order

#data %>% mutate(Fatalities= factor(Fatalities, levels=c("One",  "Greater"))) -> data #set order
#devtools::install_github("ggobi/ggally#266")
library(GGally)
#s <- svgstring()
ggpairs(data[9:11] , mapping=ggplot2::aes(colour =Engines))

#htmltools::HTML(s())

#invisible(dev.off())

Frequency Tables

Frequency tables were constructed from the remaining 138 observations with relationship of Flight Phases & Engines for Incidents, Relationship of Fatalities & Flight Phases for Incidents and Relationship of Fatalities & Flight Phases for Incidents

library(kableExtra)
library(janitor)
data  %>% group_by(Phase,Engines) %>%   
          summarize(freq = n()) %>%  
          spread(key = Phase, value =freq) %>% 
          group_by(Engines) %>% 
          mutate(Total = sum(Landing+Cruise+Takeoff)) %>% ## total rows by engines
          adorn_totals("row") %>%  # total columns from janitor package
          kable("html") %>%  
          kable_styling(bootstrap_options = "striped", full_width = F)  %>%
          add_header_above(c("", "Phases" = 3, ""))
Phases
Engines Takeoff Cruise Landing Total
Single 29 62 23 114
Twin 5 9 10 24
Total 34 71 33 138
data  %>% group_by(Fatalities,Phase) %>%   
          summarize(freq = n()) %>%  
          spread(key = Phase, value =freq) %>% group_by(Fatalities) %>% 
          mutate(Total = sum(Landing+Cruise+Takeoff)) %>% ##rows
          adorn_totals("row")   %>%
          kable("html") %>%  
          kable_styling(bootstrap_options = "striped", full_width = F) %>%
          add_header_above(c("", "Phases" = 3, ""))
Phases
Fatalities Takeoff Cruise Landing Total
One 19 39 19 77
Two 11 21 6 38
Greater 4 11 8 23
Total 34 71 33 138
data  %>% group_by(Fatalities,Engines) %>%   
          summarize(freq = n()) %>%  
          spread(key = Fatalities, value =freq) %>% group_by(Engines) %>% 
          mutate(Total = sum(One+Two+Greater)) %>% ##rows
          adorn_totals("row")   %>%
          kable("html") %>%  
          kable_styling(bootstrap_options = "striped", full_width = F) %>%
          add_header_above(c("", "Fatalities" = 3, ""))
Fatalities
Engines One Two Greater Total
Single 67 32 15 114
Twin 10 6 8 24
Total 77 38 23 138

Mosaic Plots & Chi-Square Test

Mosaic plots displaying the proportional relationship between variables with associated chi-square test.

library(vcd)
data %>% group_by_(.dots=c("Engines","Phase")) %>% 
         summarize(freq = length(Phase)) -> array_EP

P_array_EP <- xtabs(freq ~ ., data = array_EP)

mosaic(P_array_EP , shade=TRUE)

chisq.test(data$Engines,data$Phase)

    Pearson's Chi-squared test

data:  data$Engines and data$Phase
X-squared = 5.0988, df = 2, p-value = 0.07813
data %>% group_by_(.dots=c("Engines","Fatalities")) %>% 
         summarize(freq = length(Phase)) -> array_FE

P_array_FE <- xtabs(freq ~ ., data = array_FE)
mosaic(P_array_FE , shade=TRUE)

chisq.test(data$Engines, data$Fatalities)

    Pearson's Chi-squared test

data:  data$Engines and data$Fatalities
X-squared = 5.9496, df = 2, p-value = 0.05106
data %>% group_by_(.dots=c("Phase","Fatalities")) %>% 
         summarize(freq = length(Phase)) -> array_PF

P_array_PF <- xtabs(freq ~ ., data = array_PF)
mosaic(P_array_PF , shade=TRUE)

chisq.test(data$Phase, data$Fatalities) 

    Pearson's Chi-squared test

data:  data$Phase and data$Fatalities
X-squared = 3.1562, df = 4, p-value = 0.532
#CMHtest(df)

Three Dimensional Array

library(vcdExtra)




df<-table(data$Phase, data$Engines, data$Fatalities)


mantelhaen.test(df)

    Cochran-Mantel-Haenszel test

data:  df
Cochran-Mantel-Haenszel M^2 = 4.1622, df = 2, p-value = 0.1248
structable(df) 
                B Single Twin
A       C                    
Takeoff One           17    2
        Two            9    2
        Greater        3    1
Cruise  One           33    6
        Two           19    2
        Greater       10    1
Landing One           17    2
        Two            4    2
        Greater        2    6
summary(oddsratio(df))

z test of coefficients:

                                   Estimate Std. Error z value Pr(>|z|)  
Takeoff:Cruise/Single:Twin|One      0.43532    0.86936  0.5007   0.6166  
Cruise:Landing/Single:Twin|One     -0.43532    0.86936 -0.5007   0.6166  
Takeoff:Cruise/Single:Twin|Two     -0.74721    1.07877 -0.6927   0.4885  
Cruise:Landing/Single:Twin|Two      1.55814    1.14133  1.3652   0.1722  
Takeoff:Cruise/Single:Twin|Greater -1.20397    1.55991 -0.7718   0.4402  
Cruise:Landing/Single:Twin|Greater  3.40120    1.32916  2.5589   0.0105 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
woolf_test(df)

    Woolf-test on Homogeneity of Odds Ratios (no 3-Way assoc.)

data:  df
X-squared = 0.77868, df = 2, p-value = 0.6775
data %>% group_by_(.dots=c("Engines","Phase","Fatalities")) %>% 
  summarize(freq = length(Phase))->array
cube<-xtabs(freq ~ ., data=array)

mosaic(cube)

mosaic3d(cube, 
         spacing = 0.01,
         alpha=0.7, 
         box=FALSE,
         interpolate=c(6,1))

You must enable Javascript to view this page properly.

Conclusion

No statistical significant relationship exists between number of fatalities, engines or phases of flight in relation to plane accidents which is reassuring.