Loading Libraries:
library(readxl)
library(tidyverse)
library(dplyr)
library(stringr)
library(ggplot2)
library(knitr)
library(MASS)
library(car)
library(ROCR)
library(faraway)
library(pROC)
library(nnet)
FAA1 <- read_excel("C:/Users/Anil Palazzo/Desktop/school stuff/Masters/BANA7042-Statistical Modeling/Data/FAA1.xls")
FAA2 <- read_excel('C:/Users/Anil Palazzo/Desktop/school stuff/Masters/BANA7042-Statistical Modeling/Data/FAA2.xls')
FAA <- merge(FAA1, FAA2, all = TRUE)
FAA <- FAA %>%
filter(height >= 6) %>%
filter(duration > 40) %>%
filter(speed_air >= 30) %>%
filter(speed_air <= 140) %>%
filter(speed_ground >= 30) %>%
filter(speed_ground <= 140) %>%
filter(distance < 6000)
FAA <- FAA %>%
mutate(Y = case_when(
distance < 1000 ~ 1,
distance >= 1000 & distance < 2500 ~ 2,
TRUE ~ 3
)) %>%
dplyr::select(-distance)
attach(FAA)
model_1 <- glm(no_pasg ~ ., family = poisson, FAA)
summary(model_1)
##
## Call:
## glm(formula = no_pasg ~ ., family = poisson, data = FAA)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.150e+00 1.459e-01 28.448 <2e-16 ***
## aircraftboeing -1.244e-02 2.111e-02 -0.589 0.556
## speed_ground 7.798e-04 6.178e-03 0.126 0.900
## speed_air -8.324e-04 6.310e-03 -0.132 0.895
## height -5.875e-05 1.028e-03 -0.057 0.954
## pitch -4.671e-03 1.800e-02 -0.260 0.795
## duration -1.742e-04 1.961e-04 -0.889 0.374
## Y 5.709e-04 2.971e-02 0.019 0.985
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 162.74 on 194 degrees of freedom
## Residual deviance: 161.31 on 187 degrees of freedom
## AIC: 1332.7
##
## Number of Fisher Scoring iterations: 4
step(model_1)
## Start: AIC=1332.69
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch +
## duration + Y
##
## Df Deviance AIC
## - Y 1 161.31 1330.7
## - height 1 161.31 1330.7
## - speed_ground 1 161.32 1330.7
## - speed_air 1 161.32 1330.7
## - pitch 1 161.38 1330.8
## - aircraft 1 161.65 1331.0
## - duration 1 162.10 1331.5
## <none> 161.31 1332.7
##
## Step: AIC=1330.69
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch +
## duration
##
## Df Deviance AIC
## - height 1 161.31 1328.7
## - speed_ground 1 161.32 1328.7
## - speed_air 1 161.32 1328.7
## - pitch 1 161.38 1328.8
## - aircraft 1 161.67 1329.0
## - duration 1 162.10 1329.5
## <none> 161.31 1330.7
##
## Step: AIC=1328.69
## no_pasg ~ aircraft + speed_ground + speed_air + pitch + duration
##
## Df Deviance AIC
## - speed_ground 1 161.33 1326.7
## - speed_air 1 161.33 1326.7
## - pitch 1 161.38 1326.8
## - aircraft 1 161.67 1327.0
## - duration 1 162.11 1327.5
## <none> 161.31 1328.7
##
## Step: AIC=1326.71
## no_pasg ~ aircraft + speed_air + pitch + duration
##
## Df Deviance AIC
## - speed_air 1 161.33 1324.7
## - pitch 1 161.40 1324.8
## - aircraft 1 161.69 1325.1
## - duration 1 162.18 1325.6
## <none> 161.33 1326.7
##
## Step: AIC=1324.71
## no_pasg ~ aircraft + pitch + duration
##
## Df Deviance AIC
## - pitch 1 161.40 1322.8
## - aircraft 1 161.69 1323.1
## - duration 1 162.18 1323.6
## <none> 161.33 1324.7
##
## Step: AIC=1322.78
## no_pasg ~ aircraft + duration
##
## Df Deviance AIC
## - aircraft 1 161.97 1321.3
## - duration 1 162.24 1321.6
## <none> 161.40 1322.8
##
## Step: AIC=1321.35
## no_pasg ~ duration
##
## Df Deviance AIC
## - duration 1 162.74 1320.1
## <none> 161.97 1321.3
##
## Step: AIC=1320.12
## no_pasg ~ 1
##
## Call: glm(formula = no_pasg ~ 1, family = poisson, data = FAA)
##
## Coefficients:
## (Intercept)
## 4.091
##
## Degrees of Freedom: 194 Total (i.e. Null); 194 Residual
## Null Deviance: 162.7
## Residual Deviance: 162.7 AIC: 1320
drop1(model_1, test = "LRT")
## Single term deletions
##
## Model:
## no_pasg ~ aircraft + speed_ground + speed_air + height + pitch +
## duration + Y
## Df Deviance AIC LRT Pr(>Chi)
## <none> 161.31 1332.7
## aircraft 1 161.65 1331.0 0.34706 0.5558
## speed_ground 1 161.32 1330.7 0.01593 0.8996
## speed_air 1 161.32 1330.7 0.01740 0.8951
## height 1 161.31 1330.7 0.00327 0.9544
## pitch 1 161.38 1330.8 0.06734 0.7953
## duration 1 162.10 1331.5 0.79052 0.3739
## Y 1 161.31 1330.7 0.00037 0.9847
As we can see from the summary of the Model I have created, there is no significant variable that impacts the number of passengers. Furthermore, the stepwise selection method removed all the other variables/predictors leaving only the intercept in the final model. This further indicates that the current variables are not useful in predicting the number of passengers on board, since it is shown that removing any of those predictors would not affect the model’s performance.