segregating long but non-risky landing distance from risky distance

Packages Required

library(tidyverse)  #to visualize, transform, input, tidy and join data
library(dplyr)      #data wrangling
library(stringr)    #string related functions
library(kableExtra) #to create HTML Table
library(DT)         #to preview the data sets
library(lubridate)  #to apply the date functions
library(xlsx)       #to load excel files
library(ROCR)       #to use ROC curves
library(faraway)    #to use the ilogit function

Long Landing

Loaded the datasets and did initial data cleaning(detailed steps were performed in the first assignment)

faa1 <- read.xlsx("FAA1.xls", sheetName = "FAA1")
faa2 <- read.xlsx("FAA2_2.xls", sheetName = "Sheet1")
faa <- bind_rows(faa1, faa2)
check <- faa %>%  
 select(-duration) %>% 
  duplicated() %>% 
  which()

faa <- faa[-check,]

faa_check <- faa %>% 
  filter((duration > 40| is.na(duration)) & (speed_ground >= 30) & (speed_ground <= 140) &
           (height >= 6) & (distance < 6000)) 
faa <- faa_check

faa$duration_corrected <- NA
faa <-  transform(faa, duration_corrected = ifelse(is.na(faa$duration), mean(faa$duration, na.rm=TRUE), faa$duration))

Step 1:

faa <- faa %>% 
  mutate(long.landing = as.factor(ifelse(distance > 2500, 1,0 )) , 
         risky.landing  = as.factor(ifelse(distance > 3000,1,0 )),
         aircraft = as.factor(aircraft))
faa$duration <- NULL
faa$distance <- NULL

Step 2: Histogram to show distribution of “long.landing”

faa %>% 
  ggplot(aes(long.landing)) + 
  geom_bar()

only 12% aircrafts have long landing

round(prop.table(table(faa$long.landing)),2)
## 
##    0    1 
## 0.88 0.12

Step 3:

mdl_duration <- glm (faa$long.landing ~ faa$duration_corrected, family = "binomial")
mdl_speedgrnd <- glm (faa$long.landing ~ faa$speed_ground, family = "binomial")
mdl_height <- glm (faa$long.landing ~ faa$height, family = "binomial")
mdl_pitch <- glm (faa$long.landing ~ faa$pitch, family = "binomial")
mdl_nopasg <- glm (faa$long.landing ~ faa$no_pasg, family = "binomial")
mdl_speedair <- glm (faa$long.landing ~ faa$speed_air, family = "binomial")
mdl_aircraft <- glm (faa$long.landing ~ faa$aircraft, family = "binomial")


duration <- summary(mdl_duration)$coef[2,c(1,4)]
speed_ground <- summary(mdl_speedgrnd)$coef[2,c(1,4)]
height <- summary(mdl_height)$coef[2,c(1,4)]
pitch <- summary(mdl_pitch)$coef[2,c(1,4)]
no_pasg <- summary(mdl_nopasg)$coef[2,c(1,4)]
speed_air <- summary(mdl_speedair)$coef[2,c(1,4)]
aircraft_boeing <- summary(mdl_aircraft)$coef[2,c(1,4)]
aircraft_airbus <- summary(mdl_aircraft)$coef[1,c(1,4)]

coefficients <- c(duration[1], speed_ground[1], height[1], pitch[1], no_pasg[1],speed_air[1],aircraft_boeing[1],aircraft_airbus[1])
coefficients <- round(coefficients, digits = 3)

odds_ratio <- round(exp(coefficients), 3)

p_value <- c(duration[2], speed_ground[2], height[2], pitch[2], no_pasg[2],speed_air[2],aircraft_boeing[2],aircraft_airbus[2]) 
p_value <- round(p_value, digits = 3)

variable_names <- c("Duration","Ground Speed","Height","Pitch","No. of Passengers","Air Speed","Aircraft-Boeing", "Aircraft-Airbus")

table_2 <- data.frame(variable_names, coefficients,odds_ratio, p_value)
table_2$slope_direction <- ifelse(coefficients > 0 , "Positive", "Negative")
table_2 <- table_2 %>% 
  select(variable_names, coefficients, odds_ratio, p_value, slope_direction) %>% 
  arrange(p_value)

table_2
##      variable_names coefficients odds_ratio p_value slope_direction
## 1      Ground Speed        0.472      1.603   0.000        Positive
## 2         Air Speed        0.512      1.669   0.000        Positive
## 3   Aircraft-Boeing        0.864      2.373   0.000        Positive
## 4   Aircraft-Airbus       -2.428      0.088   0.000        Negative
## 5             Pitch        0.401      1.493   0.047        Positive
## 6            Height        0.009      1.009   0.422        Positive
## 7 No. of Passengers       -0.007      0.993   0.606        Negative
## 8          Duration       -0.001      0.999   0.626        Negative

Step 4:

we see speed_ground, speed_air, aircraft type, pitch and height appear to be positively correlated to long_landing.

Let’s visualize it -

check_plot <- function(x) {
  ggplot(aes(x = x, fill = long.landing), data = faa) +
    geom_histogram(position = 'dodge', aes(y = ..density..))
}

The probability of long landing increases with the increase in speed_ground

check_plot(faa$speed_ground)

Probability of long landing increases with increase in speed of air

check_plot(faa$speed_air)

Long landing isn’t affected by pitch of aircraft

check_plot(faa$pitch)

Long landing seem to be unaffected by height of aircraft

check_plot(faa$height)

Step 5:

I observed that speed of ground, aircraft and height are significant. Pitch is not significant like we observed in the previous table

full_model <- glm(long.landing ~ aircraft + 
                    no_pasg + speed_ground + height + 
                    pitch  + duration_corrected, family = "binomial",
                  data = faa)
summary(full_model)
## 
## Call:
## glm(formula = long.landing ~ aircraft + no_pasg + speed_ground + 
##     height + pitch + duration_corrected, family = "binomial", 
##     data = faa)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.16087  -0.00052   0.00000   0.00000   2.32238  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.215e+02  2.500e+01  -4.858 1.19e-06 ***
## aircraftboeing      5.192e+00  1.200e+00   4.328 1.51e-05 ***
## no_pasg             3.423e-03  5.461e-02   0.063 0.950023    
## speed_ground        1.033e+00  2.082e-01   4.960 7.03e-07 ***
## height              2.531e-01  7.253e-02   3.490 0.000483 ***
## pitch               1.484e+00  8.454e-01   1.755 0.079247 .  
## duration_corrected  5.287e-03  7.864e-03   0.672 0.501355    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 622.778  on 830  degrees of freedom
## Residual deviance:  52.746  on 824  degrees of freedom
## AIC: 66.746
## 
## Number of Fisher Scoring iterations: 12

Step 6:

Based on AIC model, results seem to be consistent with the table observed.

faa_clean <-  select(faa, -risky.landing , -speed_air)
model01 <- glm(long.landing ~ 1,data=faa_clean, family = "binomial")
model61 <- glm(long.landing ~ speed_ground + aircraft + height + no_pasg + duration_corrected + pitch,data=faa_clean, family = "binomial")
model_1 <- MASS::stepAIC(model01,direction="forward",scope=list(upper=model61,lower=model01))
## Start:  AIC=624.78
## long.landing ~ 1
## 
##                      Df Deviance    AIC
## + speed_ground        1   115.47 119.47
## + aircraft            1   606.55 610.55
## + pitch               1   618.79 622.79
## <none>                    622.78 624.78
## + height              1   622.13 626.13
## + no_pasg             1   622.51 626.51
## + duration_corrected  1   622.54 626.54
## 
## Step:  AIC=119.47
## long.landing ~ speed_ground
## 
##                      Df Deviance     AIC
## + aircraft            1   84.665  90.665
## + height              1  100.459 106.459
## + pitch               1  105.527 111.527
## <none>                   115.470 119.470
## + duration_corrected  1  115.378 121.378
## + no_pasg             1  115.468 121.468
## 
## Step:  AIC=90.66
## long.landing ~ speed_ground + aircraft
## 
##                      Df Deviance    AIC
## + height              1   57.047 65.047
## + pitch               1   81.309 89.309
## <none>                    84.665 90.665
## + duration_corrected  1   83.164 91.164
## + no_pasg             1   84.219 92.219
## 
## Step:  AIC=65.05
## long.landing ~ speed_ground + aircraft + height
## 
##                      Df Deviance    AIC
## + pitch               1   53.204 63.204
## <none>                    57.047 65.047
## + duration_corrected  1   56.288 66.288
## + no_pasg             1   57.031 67.031
## 
## Step:  AIC=63.2
## long.landing ~ speed_ground + aircraft + height + pitch
## 
##                      Df Deviance    AIC
## <none>                    53.204 63.204
## + duration_corrected  1   52.750 64.750
## + no_pasg             1   53.204 65.204
summary(model_1)
## 
## Call:
## glm(formula = long.landing ~ speed_ground + aircraft + height + 
##     pitch, family = "binomial", data = faa_clean)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.20284  -0.00054   0.00000   0.00000   2.35719  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -119.77598   24.41821  -4.905 9.33e-07 ***
## speed_ground      1.02266    0.20290   5.040 4.65e-07 ***
## aircraftboeing    5.13443    1.18091   4.348 1.37e-05 ***
## height            0.25795    0.06861   3.760  0.00017 ***
## pitch             1.53751    0.84109   1.828  0.06755 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 622.778  on 830  degrees of freedom
## Residual deviance:  53.204  on 826  degrees of freedom
## AIC: 63.204
## 
## Number of Fisher Scoring iterations: 12

Step 7:

We further use BIC measure to find “best subsets” model. Pitch is not chosen by this model. This may be because BIC penalizes strongly

model_2 <- MASS::stepAIC(model01,direction="forward",scope=list(upper=model61,lower=model01), k = log(nrow(faa_clean)))
## Start:  AIC=629.5
## long.landing ~ 1
## 
##                      Df Deviance    AIC
## + speed_ground        1   115.47 128.92
## + aircraft            1   606.55 620.00
## <none>                    622.78 629.50
## + pitch               1   618.79 632.24
## + height              1   622.13 635.58
## + no_pasg             1   622.51 635.96
## + duration_corrected  1   622.54 635.98
## 
## Step:  AIC=128.92
## long.landing ~ speed_ground
## 
##                      Df Deviance    AIC
## + aircraft            1   84.665 104.83
## + height              1  100.459 120.63
## + pitch               1  105.527 125.69
## <none>                   115.470 128.92
## + duration_corrected  1  115.378 135.54
## + no_pasg             1  115.468 135.64
## 
## Step:  AIC=104.83
## long.landing ~ speed_ground + aircraft
## 
##                      Df Deviance     AIC
## + height              1   57.047  83.937
## <none>                    84.665 104.832
## + pitch               1   81.309 108.200
## + duration_corrected  1   83.164 110.054
## + no_pasg             1   84.219 111.110
## 
## Step:  AIC=83.94
## long.landing ~ speed_ground + aircraft + height
## 
##                      Df Deviance    AIC
## <none>                    57.047 83.937
## + pitch               1   53.204 86.817
## + duration_corrected  1   56.288 89.901
## + no_pasg             1   57.031 90.644
summary(model_2)
## 
## Call:
## glm(formula = long.landing ~ speed_ground + aircraft + height, 
##     family = "binomial", data = faa_clean)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.43442  -0.00117   0.00000   0.00000   2.57435  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -102.95437   19.22882  -5.354 8.59e-08 ***
## speed_ground      0.92657    0.17242   5.374 7.70e-08 ***
## aircraftboeing    5.04813    1.11520   4.527 5.99e-06 ***
## height            0.23106    0.05959   3.877 0.000106 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 622.778  on 830  degrees of freedom
## Residual deviance:  57.047  on 827  degrees of freedom
## AIC: 65.047
## 
## Number of Fisher Scoring iterations: 11

Step 8:

The variables and their contribution in prediction of long landing is -

table_2
##      variable_names coefficients odds_ratio p_value slope_direction
## 1      Ground Speed        0.472      1.603   0.000        Positive
## 2         Air Speed        0.512      1.669   0.000        Positive
## 3   Aircraft-Boeing        0.864      2.373   0.000        Positive
## 4   Aircraft-Airbus       -2.428      0.088   0.000        Negative
## 5             Pitch        0.401      1.493   0.047        Positive
## 6            Height        0.009      1.009   0.422        Positive
## 7 No. of Passengers       -0.007      0.993   0.606        Negative
## 8          Duration       -0.001      0.999   0.626        Negative

The various plots that helped us understand the relationship between the variables better-

The probability of long landing increases with the increase in speed_ground

check_plot(faa$speed_ground)

Probability of long landing increases with increase in speed of air

check_plot(faa$speed_air)

Long landing isn’t affected by pitch of aircraft

check_plot(faa$pitch)

Long landing seem to be unaffected by height of aircraft

check_plot(faa$height)

Based on our analysis, our final model is: long.landing ~ speed_ground + aircraft + height

Risky Landing

Step 9: Repeating all the steps for risky landing—

faa %>% 
  ggplot(aes(risky.landing)) + 
  geom_bar()

only 7% aircrafts have long landing

round(prop.table(table(faa$risky.landing)),2)
## 
##    0    1 
## 0.93 0.07

The speed of ground, air speed and aircraft make seems to be likely to affect the risky landing.

##      variable_names coefficients odds_ratio p_value slope_direction
## 1      Ground Speed        0.614      1.848   0.000        Positive
## 2         Air Speed        0.870      2.387   0.000        Positive
## 3   Aircraft-Boeing        1.002      2.724   0.000        Positive
## 4   Aircraft-Airbus       -3.108      0.045   0.000        Negative
## 5             Pitch        0.371      1.449   0.143        Positive
## 6 No. of Passengers       -0.025      0.975   0.154        Negative
## 7          Duration       -0.001      0.999   0.674        Negative
## 8            Height       -0.002      0.998   0.871        Negative

we see speed_ground, speed_air, aircraft type, pitch and height appear to be positively correlated to long_landing.

Let’s visualize it -

check_plot_risky <- function(x) {
  ggplot(aes(x = x, fill = risky.landing), data = faa) +
    geom_histogram(position = 'dodge', aes(y = ..density..))
}

The probability of riksy landing increases with the increase in speed_ground

check_plot_risky(faa$speed_ground)

Probability of risky landing increases with increase in speed of air

check_plot_risky(faa$speed_air)

Risky landing isn’t affected by pitch of aircraft

check_plot_risky(faa$pitch)

Risky landing seem to be unaffected by height of aircraft

check_plot_risky(faa$height)

I observed that speed of ground and make of aircraft are significant. Results seem to be consistent with our observations before

full_model <- glm(risky.landing ~ aircraft + 
                    no_pasg + speed_ground + height + 
                    pitch  + duration_corrected, family = "binomial",
                  data = faa)
summary(full_model)
## 
## Call:
## glm(formula = risky.landing ~ aircraft + no_pasg + speed_ground + 
##     height + pitch + duration_corrected, family = "binomial", 
##     data = faa)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.46375  -0.00009   0.00000   0.00000   1.85765  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.035e+02  2.801e+01  -3.697 0.000218 ***
## aircraftboeing      4.457e+00  1.547e+00   2.881 0.003970 ** 
## no_pasg            -8.620e-02  6.035e-02  -1.428 0.153201    
## speed_ground        9.488e-01  2.465e-01   3.848 0.000119 ***
## height              4.310e-02  4.624e-02   0.932 0.351382    
## pitch               6.139e-01  7.982e-01   0.769 0.441878    
## duration_corrected  7.952e-04  1.224e-02   0.065 0.948209    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 436.043  on 830  degrees of freedom
## Residual deviance:  36.473  on 824  degrees of freedom
## AIC: 50.473
## 
## Number of Fisher Scoring iterations: 12

Results seem to be inconsistent with our observation before. Based on AIC measure, number of passengers seems to be a significant variable whereas if we look at the p_value, it doesn’t support our hypothesis.

faa_clean2 <-  select(faa, -long.landing , -speed_air)
model_null2 <- glm(risky.landing ~ 1,data=faa_clean2, family = "binomial")
model_full2 <- glm(risky.landing ~ speed_ground + aircraft + height + no_pasg + duration_corrected + pitch,data=faa_clean2, family = "binomial")
model_3 <- MASS::stepAIC(model_null2,direction="forward",scope=list(upper=model_full2,lower=model_null2))
## Start:  AIC=438.04
## risky.landing ~ 1
## 
##                      Df Deviance    AIC
## + speed_ground        1    58.93  62.93
## + aircraft            1   422.74 426.74
## + pitch               1   433.89 437.89
## + no_pasg             1   434.00 438.00
## <none>                    436.04 438.04
## + duration_corrected  1   435.86 439.86
## + height              1   436.02 440.02
## 
## Step:  AIC=62.93
## risky.landing ~ speed_ground
## 
##                      Df Deviance    AIC
## + aircraft            1   40.097 46.097
## + pitch               1   53.079 59.079
## <none>                    58.931 62.931
## + no_pasg             1   58.318 64.318
## + height              1   58.667 64.667
## + duration_corrected  1   58.883 64.883
## 
## Step:  AIC=46.1
## risky.landing ~ speed_ground + aircraft
## 
##                      Df Deviance    AIC
## + no_pasg             1   37.707 45.707
## <none>                    40.097 46.097
## + height              1   39.402 47.402
## + duration_corrected  1   39.884 47.884
## + pitch               1   39.928 47.928
## 
## Step:  AIC=45.71
## risky.landing ~ speed_ground + aircraft + no_pasg
## 
##                      Df Deviance    AIC
## <none>                    37.707 45.707
## + height              1   37.099 47.099
## + pitch               1   37.449 47.449
## + duration_corrected  1   37.693 47.693
summary(model_3)
## 
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft + no_pasg, 
##     family = "binomial", data = faa_clean2)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.33913  -0.00009   0.00000   0.00000   1.87810  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -99.90780   25.57993  -3.906 9.39e-05 ***
## speed_ground     0.94963    0.23559   4.031 5.56e-05 ***
## aircraftboeing   4.64188    1.47520   3.147  0.00165 ** 
## no_pasg         -0.08462    0.05732  -1.476  0.13987    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 436.043  on 830  degrees of freedom
## Residual deviance:  37.707  on 827  degrees of freedom
## AIC: 45.707
## 
## Number of Fisher Scoring iterations: 12

Number of passengers, which seemed to be significant when considering AIC as the method for variable selection seem to be not significant when considering BIC. This may be because BIC penalizes strongly

model_4 <- MASS::stepAIC(model_null2,direction="forward",scope=list(upper=model_full2,lower=model_null2), k = log(nrow(faa_clean2)))
## Start:  AIC=442.77
## risky.landing ~ 1
## 
##                      Df Deviance    AIC
## + speed_ground        1    58.93  72.38
## + aircraft            1   422.74 436.18
## <none>                    436.04 442.77
## + pitch               1   433.89 447.34
## + no_pasg             1   434.00 447.45
## + duration_corrected  1   435.86 449.31
## + height              1   436.02 449.46
## 
## Step:  AIC=72.38
## risky.landing ~ speed_ground
## 
##                      Df Deviance    AIC
## + aircraft            1   40.097 60.264
## <none>                    58.931 72.376
## + pitch               1   53.079 73.247
## + no_pasg             1   58.318 78.486
## + height              1   58.667 78.835
## + duration_corrected  1   58.883 79.051
## 
## Step:  AIC=60.26
## risky.landing ~ speed_ground + aircraft
## 
##                      Df Deviance    AIC
## <none>                    40.097 60.264
## + no_pasg             1   37.707 64.597
## + height              1   39.402 66.292
## + duration_corrected  1   39.884 66.775
## + pitch               1   39.928 66.819
summary(model_4)
## 
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft, family = "binomial", 
##     data = faa_clean2)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.24398  -0.00011   0.00000   0.00000   1.61021  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -102.0772    24.7751  -4.120 3.79e-05 ***
## speed_ground      0.9263     0.2248   4.121 3.78e-05 ***
## aircraftboeing    4.0190     1.2494   3.217   0.0013 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 436.043  on 830  degrees of freedom
## Residual deviance:  40.097  on 828  degrees of freedom
## AIC: 46.097
## 
## Number of Fisher Scoring iterations: 12

Step 11: Conclusions for both the type of landings -

  1. Speed of ground, height and type of aircraft are significant predictors of long landing.

  2. Only speed of ground and type of aircraft are significant predictors of risky landing and height of aircraft seems to be not playing a role when landing is risky.

  3. BIC for long landing is 65.047 and for risky landing is 46.097.

  4. AIC for long landing is 63.204 and for long landing is 45.707

Model Assessment

Step 12: ROC Curves

Plot for risky landing is marginally smoother than that for long landing.

data1 <- select(faa, -risky.landing, -speed_air)
data2 <- select(faa, -long.landing, -speed_air)

pred1 <- prediction(predict(model_2), data1$long.landing)
roc1 <- performance(pred1, "tpr", "fpr", main = "ROC for long landing and risky landing")
plot(roc1)

pred2 <- prediction(predict(model_4), data2$risky.landing)
roc2 <- performance(pred2, "tpr", "fpr")
plot(roc2, add = TRUE, colorize = TRUE, main = "ROC for long landing and risky landing")

The AUC in case of long landing is 99.6% and that for risky landing is 99.9%.

#long landing
auc_ROCR1 <- performance(pred1, measure = "auc")
 auc_ROCR1@y.values[[1]]
## [1] 0.998333
 #risky landing 
auc_ROCR2 <- performance(pred2, measure = "auc")
auc_ROCR2@y.values[[1]]  
## [1] 0.9986161

Step 13: Prediction

Given few parameters,

  • Aircraft = Boeing
  • Duration = 200
  • no_pasg = 80
  • speed_ground = 115
  • speed_air = 120
  • height = 40
  • pitch = 4

The long landing probability -

new.ind <- data.frame(aircraft = "boeing", 
                      duration_corrected = 200, 
                      no_pasg = 80,
                      speed_ground = 115, 
                      speed_air = 120,
                      height = 40,
                      pitch = 4)

pred1 <- predict(model_2,newdata=new.ind, type = "link", se = T)

fit <- ilogit(pred1$fit)
upper <- ilogit(pred1$fit + (1.96 * pred1$se.fit))
lower <- ilogit(pred1$fit - (1.96 * pred1$se.fit))

cat("The confidence interval for long landing-",lower,"||", fit, "||", upper)
## The confidence interval for long landing- 0.999985 || 1 || 1

The risky landing probability -

new.ind <- data.frame(aircraft = "boeing", 
                      duration_corrected = 200, 
                      no_pasg = 80,
                      speed_ground = 115, 
                      speed_air = 120,
                      height = 40,
                      pitch = 4)

pred1 <- predict(model_4,newdata=new.ind,type = "link", se = T)

fit <- ilogit(pred1$fit)
upper <- ilogit(pred1$fit + (1.96 * pred1$se.fit))
lower <- ilogit(pred1$fit - (1.96 * pred1$se.fit))

cat("The confidence interval for risky landing-",lower,"||", fit, "||", upper)
## The confidence interval for risky landing- 0.9874843 || 0.999789 || 0.9999965