Loading Libraries:

library(readxl)
library(tidyverse)
library(dplyr)
library(stringr)       
library(ggplot2)  
library(knitr) 
library(MASS)
library(car)
library(ROCR)
library(faraway)
library(pROC)
library(nnet)

Step 0.

FAA1 <- read_excel("C:/Users/Anil Palazzo/Desktop/school stuff/Masters/BANA7042-Statistical Modeling/Data/FAA1.xls")
FAA2 <- read_excel('C:/Users/Anil Palazzo/Desktop/school stuff/Masters/BANA7042-Statistical Modeling/Data/FAA2.xls')

FAA <- merge(FAA1, FAA2, all = TRUE)

FAA <- FAA %>% 
  filter(height >= 6) %>% 
  filter(duration > 40) %>% 
  filter(speed_air >= 30) %>% 
  filter(speed_air <= 140) %>% 
  filter(speed_ground >= 30) %>% 
  filter(speed_ground <= 140) %>% 
  filter(distance < 6000)

FAA <- FAA %>%
  mutate(Y = case_when(
    distance < 1000  ~ 1,
    distance >= 1000 & distance < 2500 ~ 2,
    TRUE ~ 3
  )) %>%
  dplyr::select(-distance)

attach(FAA)

Q2.

model_1 <- glm(no_pasg ~ ., family = poisson, FAA)
summary(model_1)
## 
## Call:
## glm(formula = no_pasg ~ ., family = poisson, data = FAA)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     4.150e+00  1.459e-01  28.448   <2e-16 ***
## aircraftboeing -1.244e-02  2.111e-02  -0.589    0.556    
## speed_ground    7.798e-04  6.178e-03   0.126    0.900    
## speed_air      -8.324e-04  6.310e-03  -0.132    0.895    
## height         -5.875e-05  1.028e-03  -0.057    0.954    
## pitch          -4.671e-03  1.800e-02  -0.260    0.795    
## duration       -1.742e-04  1.961e-04  -0.889    0.374    
## Y               5.709e-04  2.971e-02   0.019    0.985    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 162.74  on 194  degrees of freedom
## Residual deviance: 161.31  on 187  degrees of freedom
## AIC: 1332.7
## 
## Number of Fisher Scoring iterations: 4
step(model_1)
## Start:  AIC=1332.69
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch + 
##     duration + Y
## 
##                Df Deviance    AIC
## - Y             1   161.31 1330.7
## - height        1   161.31 1330.7
## - speed_ground  1   161.32 1330.7
## - speed_air     1   161.32 1330.7
## - pitch         1   161.38 1330.8
## - aircraft      1   161.65 1331.0
## - duration      1   162.10 1331.5
## <none>              161.31 1332.7
## 
## Step:  AIC=1330.69
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch + 
##     duration
## 
##                Df Deviance    AIC
## - height        1   161.31 1328.7
## - speed_ground  1   161.32 1328.7
## - speed_air     1   161.32 1328.7
## - pitch         1   161.38 1328.8
## - aircraft      1   161.67 1329.0
## - duration      1   162.10 1329.5
## <none>              161.31 1330.7
## 
## Step:  AIC=1328.69
## no_pasg ~ aircraft + speed_ground + speed_air + pitch + duration
## 
##                Df Deviance    AIC
## - speed_ground  1   161.33 1326.7
## - speed_air     1   161.33 1326.7
## - pitch         1   161.38 1326.8
## - aircraft      1   161.67 1327.0
## - duration      1   162.11 1327.5
## <none>              161.31 1328.7
## 
## Step:  AIC=1326.71
## no_pasg ~ aircraft + speed_air + pitch + duration
## 
##             Df Deviance    AIC
## - speed_air  1   161.33 1324.7
## - pitch      1   161.40 1324.8
## - aircraft   1   161.69 1325.1
## - duration   1   162.18 1325.6
## <none>           161.33 1326.7
## 
## Step:  AIC=1324.71
## no_pasg ~ aircraft + pitch + duration
## 
##            Df Deviance    AIC
## - pitch     1   161.40 1322.8
## - aircraft  1   161.69 1323.1
## - duration  1   162.18 1323.6
## <none>          161.33 1324.7
## 
## Step:  AIC=1322.78
## no_pasg ~ aircraft + duration
## 
##            Df Deviance    AIC
## - aircraft  1   161.97 1321.3
## - duration  1   162.24 1321.6
## <none>          161.40 1322.8
## 
## Step:  AIC=1321.35
## no_pasg ~ duration
## 
##            Df Deviance    AIC
## - duration  1   162.74 1320.1
## <none>          161.97 1321.3
## 
## Step:  AIC=1320.12
## no_pasg ~ 1
## 
## Call:  glm(formula = no_pasg ~ 1, family = poisson, data = FAA)
## 
## Coefficients:
## (Intercept)  
##       4.091  
## 
## Degrees of Freedom: 194 Total (i.e. Null);  194 Residual
## Null Deviance:       162.7 
## Residual Deviance: 162.7     AIC: 1320
drop1(model_1, test = "LRT")
## Single term deletions
## 
## Model:
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch + 
##     duration + Y
##              Df Deviance    AIC     LRT Pr(>Chi)
## <none>            161.31 1332.7                 
## aircraft      1   161.65 1331.0 0.34706   0.5558
## speed_ground  1   161.32 1330.7 0.01593   0.8996
## speed_air     1   161.32 1330.7 0.01740   0.8951
## height        1   161.31 1330.7 0.00327   0.9544
## pitch         1   161.38 1330.8 0.06734   0.7953
## duration      1   162.10 1331.5 0.79052   0.3739
## Y             1   161.31 1330.7 0.00037   0.9847

As we can see from the summary of the Model I have created, there is no significant variable that impacts the number of passengers. Furthermore, the stepwise selection method removed all the other variables/predictors leaving only the intercept in the final model. This further indicates that the current variables are not useful in predicting the number of passengers on board, since it is shown that removing any of those predictors would not affect the model’s performance.