library(readxl)
library(dplyr)
library(ggplot2)
library(plyr)
library(tidyr)
#Importing Dataset(Excel)
FAA1 <- read_excel("C:/Users/Swagatam/Desktop/Statistical_Modeling/Week_1/FAA1.xls")
FAA2 <- read_excel("C:/Users/Swagatam/Desktop/Statistical_Modeling/Week_1/FAA2.xls")
#Checking Structure
str(FAA1)
## Classes 'tbl_df', 'tbl' and 'data.frame': 800 obs. of 8 variables:
## $ aircraft : chr "boeing" "boeing" "boeing" "boeing" ...
## $ duration : num 98.5 125.7 112 196.8 90.1 ...
## $ no_pasg : num 53 69 61 56 70 55 54 57 61 56 ...
## $ speed_ground: num 107.9 101.7 71.1 85.8 59.9 ...
## $ speed_air : num 109 103 NA NA NA ...
## $ height : num 27.4 27.8 18.6 30.7 32.4 ...
## $ pitch : num 4.04 4.12 4.43 3.88 4.03 ...
## $ distance : num 3370 2988 1145 1664 1050 ...
str(FAA2)
## Classes 'tbl_df', 'tbl' and 'data.frame': 150 obs. of 7 variables:
## $ aircraft : chr "boeing" "boeing" "boeing" "boeing" ...
## $ no_pasg : num 53 69 61 56 70 55 54 57 61 56 ...
## $ speed_ground: num 107.9 101.7 71.1 85.8 59.9 ...
## $ speed_air : num 109 103 NA NA NA ...
## $ height : num 27.4 27.8 18.6 30.7 32.4 ...
## $ pitch : num 4.04 4.12 4.43 3.88 4.03 ...
## $ distance : num 3370 2988 1145 1664 1050 ...
#Merging the datasets
FAA<-bind_rows(FAA1,FAA2)
FAA.uni<-unique(FAA[,-2],incomparables = F)
FAA.final<-join(FAA.uni,FAA[,c("pitch","duration")] , type = "left",by="pitch",match="first")
#Structure of Combined Dataset
str(FAA.final)
## 'data.frame': 850 obs. of 8 variables:
## $ pitch : num 4.04 4.12 4.43 3.88 4.03 ...
## $ aircraft : chr "boeing" "boeing" "boeing" "boeing" ...
## $ no_pasg : num 53 69 61 56 70 55 54 57 61 56 ...
## $ speed_ground: num 107.9 101.7 71.1 85.8 59.9 ...
## $ speed_air : num 109 103 NA NA NA ...
## $ height : num 27.4 27.8 18.6 30.7 32.4 ...
## $ distance : num 3370 2988 1145 1664 1050 ...
## $ duration : num 98.5 125.7 112 196.8 90.1 ...
summary(FAA.final)
## pitch aircraft no_pasg speed_ground
## Min. :2.284 Length:850 Min. :29.0 Min. : 27.74
## 1st Qu.:3.642 Class :character 1st Qu.:55.0 1st Qu.: 65.90
## Median :4.008 Mode :character Median :60.0 Median : 79.64
## Mean :4.009 Mean :60.1 Mean : 79.45
## 3rd Qu.:4.377 3rd Qu.:65.0 3rd Qu.: 92.06
## Max. :5.927 Max. :87.0 Max. :141.22
##
## speed_air height distance duration
## Min. : 90.00 Min. :-3.546 Min. : 34.08 Min. : 14.76
## 1st Qu.: 96.25 1st Qu.:23.314 1st Qu.: 883.79 1st Qu.:119.49
## Median :101.15 Median :30.093 Median :1258.09 Median :153.95
## Mean :103.80 Mean :30.144 Mean :1526.02 Mean :154.01
## 3rd Qu.:109.40 3rd Qu.:36.993 3rd Qu.:1936.95 3rd Qu.:188.91
## Max. :141.72 Max. :59.946 Max. :6533.05 Max. :305.62
## NA's :642 NA's :50
FAA.final<-FAA.final%>%
filter(duration>40|is.na(duration))
FAA.final<-FAA.final%>%
filter(height>=6|is.na(height))
FAA.final<-FAA.final%>%
filter(speed_ground>=30 & speed_ground<=140)
FAA.final<-FAA.final%>%
filter(distance<6000)
str(FAA.final)
## 'data.frame': 831 obs. of 8 variables:
## $ pitch : num 4.04 4.12 4.43 3.88 4.03 ...
## $ aircraft : chr "boeing" "boeing" "boeing" "boeing" ...
## $ no_pasg : num 53 69 61 56 70 55 54 57 61 56 ...
## $ speed_ground: num 107.9 101.7 71.1 85.8 59.9 ...
## $ speed_air : num 109 103 NA NA NA ...
## $ height : num 27.4 27.8 18.6 30.7 32.4 ...
## $ distance : num 3370 2988 1145 1664 1050 ...
## $ duration : num 98.5 125.7 112 196.8 90.1 ...
summary(FAA.final)
## pitch aircraft no_pasg speed_ground
## Min. :2.284 Length:831 Min. :29.00 Min. : 33.57
## 1st Qu.:3.640 Class :character 1st Qu.:55.00 1st Qu.: 66.20
## Median :4.001 Mode :character Median :60.00 Median : 79.79
## Mean :4.005 Mean :60.06 Mean : 79.54
## 3rd Qu.:4.370 3rd Qu.:65.00 3rd Qu.: 91.91
## Max. :5.927 Max. :87.00 Max. :132.78
##
## speed_air height distance duration
## Min. : 90.00 Min. : 6.228 Min. : 41.72 Min. : 41.95
## 1st Qu.: 96.23 1st Qu.:23.530 1st Qu.: 893.28 1st Qu.:119.63
## Median :101.12 Median :30.167 Median :1262.15 Median :154.28
## Mean :103.48 Mean :30.458 Mean :1522.48 Mean :154.78
## 3rd Qu.:109.36 3rd Qu.:37.004 3rd Qu.:1936.63 3rd Qu.:189.66
## Max. :132.91 Max. :59.946 Max. :5381.96 Max. :305.62
## NA's :628 NA's :50
#Step 1
FAA.final<-FAA.final%>%
mutate(FAA.final, long.landing = ifelse(distance > 2500, 1, 0))
FAA.final<-FAA.final%>%
mutate(FAA.final, risky.landing = ifelse(distance > 3000, 1, 0))
FAA.final<-FAA.final%>%
select(-distance)
ggplot(FAA.final,aes(long.landing))+geom_histogram(bins=3,fill="red")
l<-lapply( FAA.final[,c(-8,-9)], function(x) summary(glm(long.landing ~ x,data=FAA.final,family=binomial))$coefficients[2,c(1,4)])
l1<-data.frame(l)
l2<-t(l1)
l3<-data.frame(l2)
l3<-l3%>%
mutate(Direction=ifelse(Estimate>=0,"Positive","Negative"))
l3<-l3%>%
mutate(OddsRatio=exp(Estimate))
l3
## Estimate Pr...z.. Direction OddsRatio
## pitch 0.400527824 4.664982e-02 Positive 1.4926123
## aircraft 0.864119860 8.398591e-05 Positive 2.3729167
## no_pasg -0.007256406 6.058565e-01 Negative 0.9927699
## speed_ground 0.472345752 3.935339e-14 Positive 1.6037518
## speed_air 0.512321766 4.334124e-11 Positive 1.6691621
## height 0.008623997 4.218576e-01 Positive 1.0086613
## duration -0.001070492 6.305122e-01 Negative 0.9989301
#Significant Factors: Speed_Ground,Speed_Air,Aircraft
#Pitch
ggplot(FAA.final, aes(x=pitch, fill=as.factor(long.landing))) +
geom_histogram(position="dodge")+
theme(legend.position="top")
#Speed_Air
ggplot(FAA.final, aes(x=speed_air, fill=as.factor(long.landing))) +
geom_histogram(position="dodge")+
theme(legend.position="top")
#Speed_Ground
ggplot(FAA.final, aes(x=speed_ground, fill=as.factor(long.landing))) +
geom_histogram(position="dodge")+
theme(legend.position="top")
* The long landing =0 data for pitch has a normal distribution * The long landing =0 data for speed_air has a right skewed distribution * The long landing =0 data for speed_ground has a right skewed distribution and follows speed_air due to high collinearity
full.model <- glm(long.landing ~ .,data=FAA.final,family=binomial)
summary(full.model)
##
## Call:
## glm(formula = long.landing ~ ., family = binomial, data = FAA.final)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.48513 -0.01382 0.00000 0.00000 1.56909
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.959e+02 5.627e+01 -3.481 0.000499 ***
## pitch 1.463e+00 1.057e+00 1.384 0.166281
## aircraftboeing 8.766e+00 2.628e+00 3.335 0.000852 ***
## no_pasg -7.327e-02 7.015e-02 -1.044 0.296317
## speed_ground -2.247e-01 3.842e-01 -0.585 0.558636
## speed_air 1.980e+00 7.098e-01 2.790 0.005277 **
## height 4.216e-01 1.431e-01 2.946 0.003221 **
## duration 3.121e-04 1.046e-02 0.030 0.976209
## risky.landing 1.113e+01 2.124e+03 0.005 0.995819
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 270.199 on 194 degrees of freedom
## Residual deviance: 32.898 on 186 degrees of freedom
## (636 observations deleted due to missingness)
## AIC: 50.898
##
## Number of Fisher Scoring iterations: 20
#Encoding
FAA.final<-FAA.final%>%
mutate(aircraft.binary=ifelse(aircraft=="airbus",0,1))
#Remove Speed_Air
FAA.final.new<-FAA.final%>%
select(-speed_air)
good.model<-glm(long.landing ~pitch+aircraft.binary+speed_ground,data=FAA.final.new,family=binomial)
summary(good.model)
##
## Call:
## glm(formula = long.landing ~ pitch + aircraft.binary + speed_ground,
## family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.11589 -0.01116 -0.00026 0.00000 2.40741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -67.92855 10.48408 -6.479 9.22e-11 ***
## pitch 1.06599 0.60389 1.765 0.0775 .
## aircraft.binary 3.04348 0.73345 4.150 3.33e-05 ***
## speed_ground 0.61471 0.09184 6.694 2.18e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.778 on 830 degrees of freedom
## Residual deviance: 81.309 on 827 degrees of freedom
## AIC: 89.309
##
## Number of Fisher Scoring iterations: 10
null.model.new<- glm(long.landing ~ 1,data=FAA.final.new,family=binomial)
full.model.new <- glm(long.landing ~ .,data=FAA.final.new,family=binomial)
model.AIC<-step(null.model.new,scope=list(lower=null.model.new, upper=full.model.new),trace=0,direction = "forward")
summary(model.AIC)
##
## Call:
## glm(formula = long.landing ~ speed_ground + aircraft + height +
## pitch, family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.20284 -0.00054 0.00000 0.00000 2.35719
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -119.77598 24.41821 -4.905 9.33e-07 ***
## speed_ground 1.02266 0.20290 5.040 4.65e-07 ***
## aircraftboeing 5.13443 1.18091 4.348 1.37e-05 ***
## height 0.25795 0.06861 3.760 0.00017 ***
## pitch 1.53751 0.84109 1.828 0.06755 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.778 on 830 degrees of freedom
## Residual deviance: 53.204 on 826 degrees of freedom
## AIC: 63.204
##
## Number of Fisher Scoring iterations: 12
#only aircraft and speed_ground
summary(full.model.new)
##
## Call:
## glm(formula = long.landing ~ ., family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.10283 -0.00089 0.00000 0.00000 2.21181
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.131e+02 2.399e+01 -4.715 2.42e-06 ***
## pitch 1.197e+00 8.521e-01 1.404 0.16019
## aircraftboeing 4.994e+00 1.189e+00 4.200 2.67e-05 ***
## no_pasg 9.929e-03 5.550e-02 0.179 0.85803
## speed_ground 9.632e-01 2.001e-01 4.815 1.47e-06 ***
## height 2.356e-01 7.174e-02 3.284 0.00102 **
## duration 5.393e-03 7.649e-03 0.705 0.48077
## risky.landing 1.522e+01 2.566e+03 0.006 0.99527
## aircraft.binary NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 597.692 on 780 degrees of freedom
## Residual deviance: 50.718 on 773 degrees of freedom
## (50 observations deleted due to missingness)
## AIC: 66.718
##
## Number of Fisher Scoring iterations: 20
model.BIC<-step(null.model.new,scope=list(lower=null.model.new, upper=full.model.new),k=log(nrow(FAA.final.new)))
## Start: AIC=629.5
## long.landing ~ 1
##
## Df Deviance AIC
## + speed_ground 1 107.40 145.93
## + risky.landing 1 309.08 347.61
## + aircraft 1 583.49 622.02
## + aircraft.binary 1 583.49 622.02
## <none> 597.69 629.50
## + pitch 1 595.08 633.61
## + height 1 597.29 635.82
## + no_pasg 1 597.46 635.99
## + duration 1 597.46 635.99
##
## Step: AIC=128.92
## long.landing ~ speed_ground
##
## Df Deviance AIC
## + aircraft 1 78.16 106.40
## + aircraft.binary 1 78.16 106.40
## + height 1 95.06 123.30
## + pitch 1 97.01 125.24
## <none> 115.47 128.92
## + risky.landing 1 104.66 132.90
## + duration 1 107.30 135.53
## + no_pasg 1 107.37 135.61
## - speed_ground 1 622.78 629.50
##
## Step: AIC=104.83
## long.landing ~ speed_ground + aircraft
##
## Df Deviance AIC
## + height 1 54.40 87.79
## <none> 84.66 104.83
## + pitch 1 75.18 108.57
## + duration 1 76.64 110.03
## + risky.landing 1 77.65 111.04
## + no_pasg 1 77.82 111.22
## - aircraft 1 115.47 128.92
## - speed_ground 1 606.55 620.00
##
## Step: AIC=83.94
## long.landing ~ speed_ground + aircraft + height
##
## Df Deviance AIC
## <none> 57.05 83.94
## + pitch 1 51.58 87.84
## + risky.landing 1 53.63 89.89
## + duration 1 53.68 89.94
## + no_pasg 1 54.40 90.66
## - height 1 84.66 104.83
## - aircraft 1 100.46 120.63
## - speed_ground 1 605.79 625.96
summary(model.BIC)
##
## Call:
## glm(formula = long.landing ~ speed_ground + aircraft + height,
## family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.43442 -0.00117 0.00000 0.00000 2.57435
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.95437 19.22882 -5.354 8.59e-08 ***
## speed_ground 0.92657 0.17242 5.374 7.70e-08 ***
## aircraftboeing 5.04813 1.11520 4.527 5.99e-06 ***
## height 0.23106 0.05959 3.877 0.000106 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.778 on 830 degrees of freedom
## Residual deviance: 57.047 on 827 degrees of freedom
## AIC: 65.047
##
## Number of Fisher Scoring iterations: 11
#Keep aircraft,speed_ground,height
ggplot(FAA.final,aes(risky.landing))+geom_histogram(bins=3,fill="red")
l.risk<-lapply( FAA.final[,c(-8,-9,-10)], function(x) summary(glm(risky.landing ~ x,data=FAA.final,family=binomial))$coefficients[2,c(1,4)])
l1.risk<-data.frame(l.risk)
l2.risk<-t(l1.risk)
l3.risk<-data.frame(l2.risk)
l3.risk<-l3.risk%>%
mutate(Direction=ifelse(Estimate>=0,"Positive","Negative"))
l3.risk<-l3.risk%>%
mutate(OddsRatio=exp(Estimate))
l3.risk
## Estimate Pr...z.. Direction OddsRatio
## pitch 0.371071969 1.432961e-01 Positive 1.4492874
## aircraft 1.001775330 4.560563e-04 Positive 2.7231120
## no_pasg -0.025379344 1.536237e-01 Negative 0.9749400
## speed_ground 0.614218747 6.898006e-08 Positive 1.8482121
## speed_air 0.870401902 3.728032e-06 Positive 2.3878703
## height -0.002218606 8.705917e-01 Negative 0.9977839
## duration -0.001151836 6.801987e-01 Negative 0.9988488
#Significant Factors:Speed_Ground,Speed_Air,Aircraft
#Speed_Air
ggplot(FAA.final, aes(x=speed_air, fill=as.factor(risky.landing))) +
geom_histogram(position="dodge")+
theme(legend.position="top")
#Speed_Ground
ggplot(FAA.final, aes(x=speed_ground, fill=as.factor(risky.landing))) +
geom_histogram(position="dodge")+
theme(legend.position="top")
* The Speed_Air Variable is Right Skewed * The Speed_Air Variable is Normally distributed for risky.landing=0
full.model.risk <- glm(risky.landing ~ .,data=FAA.final,family=binomial)
summary(full.model.risk)
##
## Call:
## glm(formula = risky.landing ~ ., family = binomial, data = FAA.final)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.97055 0.00000 0.00000 0.00001 2.22865
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.613e+02 3.289e+03 -0.049 0.9609
## pitch -1.328e+00 1.435e+00 -0.925 0.3549
## aircraftboeing 7.217e+00 3.034e+00 2.378 0.0174 *
## no_pasg -1.171e-01 9.731e-02 -1.203 0.2289
## speed_ground -1.770e-01 5.059e-01 -0.350 0.7264
## speed_air 1.615e+00 6.586e-01 2.452 0.0142 *
## height 4.372e-02 5.844e-02 0.748 0.4543
## duration 2.012e-03 1.587e-02 0.127 0.8991
## long.landing 1.358e+01 3.289e+03 0.004 0.9967
## aircraft.binary NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 240.724 on 194 degrees of freedom
## Residual deviance: 22.095 on 186 degrees of freedom
## (636 observations deleted due to missingness)
## AIC: 40.095
##
## Number of Fisher Scoring iterations: 21
good.model.risk<-glm(risky.landing ~ aircraft.binary+speed_ground,data=FAA.final.new,family=binomial)
summary(good.model)
##
## Call:
## glm(formula = long.landing ~ pitch + aircraft.binary + speed_ground,
## family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.11589 -0.01116 -0.00026 0.00000 2.40741
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -67.92855 10.48408 -6.479 9.22e-11 ***
## pitch 1.06599 0.60389 1.765 0.0775 .
## aircraft.binary 3.04348 0.73345 4.150 3.33e-05 ***
## speed_ground 0.61471 0.09184 6.694 2.18e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.778 on 830 degrees of freedom
## Residual deviance: 81.309 on 827 degrees of freedom
## AIC: 89.309
##
## Number of Fisher Scoring iterations: 10
null.model.new.risk<- glm(risky.landing ~ 1,data=FAA.final.new,family=binomial)
full.model.new.risk<- glm(risky.landing ~ .,data=FAA.final.new,family=binomial)
model.AIC.risk<-step(null.model.new.risk,scope=list(lower=null.model.new.risk, upper=full.model.new.risk),trace=0,direction = "forward")
summary(model.AIC.risk)
##
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft + no_pasg,
## family = binomial, data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.33913 -0.00009 0.00000 0.00000 1.87810
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -99.90780 25.57993 -3.906 9.39e-05 ***
## speed_ground 0.94963 0.23559 4.031 5.56e-05 ***
## aircraftboeing 4.64188 1.47520 3.147 0.00165 **
## no_pasg -0.08462 0.05732 -1.476 0.13987
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 37.707 on 827 degrees of freedom
## AIC: 45.707
##
## Number of Fisher Scoring iterations: 12
#only aircraft and speed_ground
model.BIC.risk<-step(null.model.new.risk,scope=list(lower=null.model.new.risk, upper=full.model.new.risk),k=log(nrow(FAA.final.new)))
## Start: AIC=442.77
## risky.landing ~ 1
##
## Df Deviance AIC
## + speed_ground 1 57.99 84.26
## + long.landing 1 134.60 160.87
## + aircraft 1 412.07 438.34
## + aircraft.binary 1 412.07 438.34
## <none> 423.22 442.77
## + no_pasg 1 421.18 447.45
## + pitch 1 421.54 447.82
## + duration 1 423.04 449.32
## + height 1 423.13 449.40
##
## Step: AIC=72.38
## risky.landing ~ speed_ground
##
## Df Deviance AIC
## + aircraft 1 39.96 61.07
## + aircraft.binary 1 39.96 61.07
## <none> 58.93 72.38
## + pitch 1 51.63 72.74
## + long.landing 1 53.53 74.64
## + no_pasg 1 57.18 78.29
## + height 1 57.79 78.90
## + duration 1 57.95 79.06
## - speed_ground 1 436.04 442.77
##
## Step: AIC=60.26
## risky.landing ~ speed_ground + aircraft
##
## Df Deviance AIC
## <none> 40.10 60.26
## + no_pasg 1 37.56 64.59
## + height 1 39.30 66.33
## + long.landing 1 39.46 66.49
## + duration 1 39.76 66.79
## + pitch 1 39.78 66.81
## - aircraft 1 58.93 72.38
## - speed_ground 1 422.74 436.18
summary(model.BIC.risk)
##
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft, family = binomial,
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.24398 -0.00011 0.00000 0.00000 1.61021
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.0772 24.7751 -4.120 3.79e-05 ***
## speed_ground 0.9263 0.2248 4.121 3.78e-05 ***
## aircraftboeing 4.0190 1.2494 3.217 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 40.097 on 828 degrees of freedom
## AIC: 46.097
##
## Number of Fisher Scoring iterations: 12
#only aircraft and speed_ground
mylogit <- glm(formula = long.landing ~ speed_ground + aircraft, family = binomial,
data = FAA.final.new)
mylogit.risky <- glm(formula = risky.landing ~ speed_ground + aircraft, family = binomial,
data = FAA.final.new)
summary(mylogit)
##
## Call:
## glm(formula = long.landing ~ speed_ground + aircraft, family = binomial,
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.28368 -0.01418 -0.00039 0.00000 2.56541
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -60.77049 8.67075 -7.009 2.41e-12 ***
## speed_ground 0.58534 0.08441 6.934 4.08e-12 ***
## aircraftboeing 3.23679 0.71189 4.547 5.45e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 622.778 on 830 degrees of freedom
## Residual deviance: 84.665 on 828 degrees of freedom
## AIC: 90.665
##
## Number of Fisher Scoring iterations: 10
summary(mylogit.risky)
##
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft, family = binomial,
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.24398 -0.00011 0.00000 0.00000 1.61021
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.0772 24.7751 -4.120 3.79e-05 ***
## speed_ground 0.9263 0.2248 4.121 3.78e-05 ***
## aircraftboeing 4.0190 1.2494 3.217 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 40.097 on 828 degrees of freedom
## AIC: 46.097
##
## Number of Fisher Scoring iterations: 12
library(ROCR)
#Long_AUC
pred1 <- prediction(predict(mylogit), FAA.final.new$long.landing)
perf1 <- performance(pred1,"tpr","fpr")
plot(perf1,colorize=TRUE)
unlist(slot(performance(pred1, "auc"), "y.values"))
## [1] 0.9964526
#Risky_AUC
pred.risky <- prediction(predict(mylogit.risky ), FAA.final.new$risky.landing)
perf.risky <- performance(pred.risky ,"tpr","fpr")
plot(perf.risky ,colorize=TRUE)
unlist(slot(performance(pred.risky , "auc"), "y.values"))
## [1] 0.9986161
new.data <- data.frame(aircraft = 'boeing', duration=200, no_pasg=80, speed_ground=115,
speed_air=120,
height=40, pitch=4)
#Long_Prob
prob.long <- predict(mylogit, newdata=new.data, type="response",se=T)
CI.long <- c((prob.long$fit-
(1.96*prob.long$se.fit)),(prob.long$fit+(1.96*prob.long$se.fit)))
#Risky_Prob
prob.risky <- predict(mylogit.risky, newdata=new.data, type="response",se=T)
CI.risky <- c((prob.risky$fit-
(1.96*prob.risky$se.fit)),(prob.risky$fit+(1.96*prob.risky$se.fit)))
prob.long
## $fit
## 1
## 0.9999434
##
## $se.fit
## 1
## 8.630536e-05
##
## $residual.scale
## [1] 1
CI.long
## 1 1
## 0.9997743 1.0001126
prob.risky
## $fit
## 1
## 0.999789
##
## $se.fit
## 1
## 0.0004408114
##
## $residual.scale
## [1] 1
CI.risky
## 1 1
## 0.998925 1.000653
Note: * The new data point was fitted into the model. * The results for the Probabilities , Standard Errors and Confidence Intervals are reported below:
For LONG LANDINGS * $fit - 0.9999434 * $se.fit - 8.630536e-05 * CI.long: 0.9997743 to 1.0001126
For RISKY LANDINGS * $fit - 0.999789 * $se.fit - 0.0004408114 * CI.risky: 0.998925 to 1.000653
#Step 14
probit.risky <- glm(risky.landing ~ aircraft+speed_ground,
family=binomial (link = "probit"),
data=FAA.final.new)
hazard.risky <- glm(risky.landing ~ aircraft+speed_ground,
family=binomial (link = "cloglog"),
data=FAA.final.new)
logit.risky <- glm(formula = risky.landing ~ speed_ground + aircraft,
family = binomial,
data = FAA.final.new)
summary(probit.risky)
##
## Call:
## glm(formula = risky.landing ~ aircraft + speed_ground, family = binomial(link = "probit"),
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.210 0.000 0.000 0.000 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -58.6931 13.3133 -4.409 1.04e-05 ***
## aircraftboeing 2.3567 0.7016 3.359 0.000782 ***
## speed_ground 0.5322 0.1207 4.411 1.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 39.436 on 828 degrees of freedom
## AIC: 45.436
##
## Number of Fisher Scoring iterations: 14
summary(hazard.risky)
##
## Call:
## glm(formula = risky.landing ~ aircraft + speed_ground, family = binomial(link = "cloglog"),
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.24103 -0.00183 -0.00004 0.00000 1.67963
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -69.2654 14.7396 -4.699 2.61e-06 ***
## aircraftboeing 2.8984 0.8002 3.622 0.000292 ***
## speed_ground 0.6221 0.1326 4.690 2.74e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 41.443 on 828 degrees of freedom
## AIC: 47.443
##
## Number of Fisher Scoring iterations: 13
summary(logit.risky)
##
## Call:
## glm(formula = risky.landing ~ speed_ground + aircraft, family = binomial,
## data = FAA.final.new)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.24398 -0.00011 0.00000 0.00000 1.61021
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.0772 24.7751 -4.120 3.79e-05 ***
## speed_ground 0.9263 0.2248 4.121 3.78e-05 ***
## aircraftboeing 4.0190 1.2494 3.217 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 436.043 on 830 degrees of freedom
## Residual deviance: 40.097 on 828 degrees of freedom
## AIC: 46.097
##
## Number of Fisher Scoring iterations: 12
#Logit AUC
pred.risky.logit <- prediction(predict(logit.risky ), FAA.final.new$risky.landing)
perf.risky.logit <- performance(pred.risky.logit ,"tpr","fpr")
plot(perf.risky.logit ,colorize=TRUE)
unlist(slot(performance(pred.risky.logit , "auc"), "y.values"))
## [1] 0.9986161
#Probit AUC
pred.risky.probit <- prediction(predict(probit.risky ), FAA.final.new$risky.landing)
perf.risky.probit <- performance(pred.risky.probit ,"tpr","fpr")
plot(perf.risky.probit ,colorize=TRUE)
unlist(slot(performance(pred.risky.probit , "auc"), "y.values"))
## [1] 0.9986161
#Hazard AUC
pred.risky.hazard <- prediction(predict(hazard.risky ), FAA.final.new$risky.landing)
perf.risky.hazard <- performance(pred.risky.hazard ,"tpr","fpr")
plot(perf.risky.hazard ,colorize=TRUE)
unlist(slot(performance(pred.risky.hazard , "auc"), "y.values"))
## [1] 0.9985736
pred.logit <- predict(logit.risky, type = "response")
pred.probit <- predict(probit.risky, type = "response")
pred.hazard <- predict(hazard.risky, type = "response")
head(sort(pred.logit, decreasing = TRUE),5)
## 362 307 64 387 408
## 1 1 1 1 1
head(sort(pred.probit, decreasing = TRUE),5)
## 56 64 134 176 179
## 1 1 1 1 1
head(sort(pred.hazard, decreasing = TRUE),5)
## 19 29 30 56 64
## 1 1 1 1 1
#Probit
prob.risky.probit <- predict(probit.risky, newdata=new.data, type="response",se=T)
prob.risky.probit
## $fit
## 1
## 0.9999994
##
## $se.fit
## 1
## 3.153557e-06
##
## $residual.scale
## [1] 1
CI.risky.probit <- c((prob.risky.probit$fit-
(1.96*prob.risky.probit$se.fit)),(prob.risky.probit$fit+(1.96*prob.risky.probit$se.fit)))
CI.risky.probit
## 1 1
## 0.9999933 1.0000056
#Hazard
prob.risky.hazard <- predict(hazard.risky, newdata=new.data, type="response",se=T)
prob.risky.hazard
## $fit
## 1
## 1
##
## $se.fit
## 1
## 2.605522e-16
##
## $residual.scale
## [1] 1
CI.risky.hazard <- c((prob.risky.hazard$fit-
(1.96*prob.risky.hazard$se.fit)),(prob.risky.hazard$fit+(1.96*prob.risky.hazard$se.fit)))
CI.risky.hazard
## 1 1
## 1 1