Reading the Data
library(psych)
library(dplyr)
library(data.table)
delay_df<-load("FDelay.rda")
delay_df<-FDelay.dt[1:150000,]
Dimensions of the Data
dim(delay_df)
## [1] 150000 40
Column Names
colnames(delay_df)
## [1] "V1" "FlightDate" "Reporting_Airline"
## [4] "Origin" "Dest" "CRSDepTime"
## [7] "DepTime" "DepDelay" "DepDelayMinutes"
## [10] "DepDel15" "DepartureDelayGroups" "DepTimeBlk"
## [13] "CRSArrTime" "ArrTime" "ArrDelay"
## [16] "ArrDelayMinutes" "ArrDel15" "ArrivalDelayGroups"
## [19] "ArrTimeBlk" "CRSElapsedTime" "ActualElapsedTime"
## [22] "AirTime" "Distance" "WEEKEND"
## [25] "PMDEP" "PMARR" "Southwest"
## [28] "American" "Delta" "United"
## [31] "Alaska" "Jetblue" "Skywest"
## [34] "Others" "DepStatus" "ArrStatus"
## [37] "airline" "timewindowdep" "timewindowarr"
## [40] "daywindow"
Summary Statistics
str(delay_df)
## Classes 'data.table' and 'data.frame': 150000 obs. of 40 variables:
## $ V1 : int 7 8 11 12 13 14 16 18 20 25 ...
## $ FlightDate : Factor w/ 92 levels "2018-10-01","2018-10-02",..: 21 22 25 26 28 29 31 2 4 10 ...
## $ Reporting_Airline : Factor w/ 17 levels "9E","AA","AS",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Origin : Factor w/ 350 levels "ABE","ABI","ABQ",..: 183 183 183 183 183 183 183 215 215 215 ...
## $ Dest : Factor w/ 350 levels "ABE","ABI","ABQ",..: 71 71 71 71 71 71 71 71 71 71 ...
## $ CRSDepTime : int 1123 1123 1123 1123 1123 1123 1123 846 846 846 ...
## $ DepTime : int 1124 1117 1358 1125 1248 1211 1133 841 846 844 ...
## $ DepDelay : int 1 -6 155 2 85 48 10 -5 0 -2 ...
## $ DepDelayMinutes : int 1 0 155 2 85 48 10 0 0 0 ...
## $ DepDel15 : int 0 0 1 0 1 1 0 0 0 0 ...
## $ DepartureDelayGroups: int 0 -1 10 0 5 3 0 -1 0 -1 ...
## $ DepTimeBlk : Factor w/ 19 levels "0001-0559","0600-0659",..: 7 7 7 7 7 7 7 4 4 4 ...
## $ CRSArrTime : int 1910 1910 1910 1910 1910 1910 1910 1053 1050 1050 ...
## $ ArrTime : int 1919 1927 2133 1922 2009 1940 1913 1054 1059 1118 ...
## $ ArrDelay : int 9 17 143 12 59 30 3 1 9 28 ...
## $ ArrDelayMinutes : int 9 17 143 12 59 30 3 1 9 28 ...
## $ ArrDel15 : int 0 1 1 0 1 1 0 0 0 1 ...
## $ ArrivalDelayGroups : int 0 1 9 0 3 2 0 0 0 1 ...
## $ ArrTimeBlk : Factor w/ 19 levels "0001-0559","0600-0659",..: 15 15 15 15 15 15 15 6 6 6 ...
## $ CRSElapsedTime : int 287 287 287 287 287 287 287 127 124 124 ...
## $ ActualElapsedTime : int 295 310 275 297 261 269 280 133 133 154 ...
## $ AirTime : int 250 244 246 257 234 241 256 96 97 100 ...
## $ Distance : int 2125 2125 2125 2125 2125 2125 2125 650 650 650 ...
## $ WEEKEND : int 1 0 0 0 1 0 0 0 0 0 ...
## $ PMDEP : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PMARR : int 1 1 1 1 1 1 1 0 0 0 ...
## $ Southwest : int 0 0 0 0 0 0 0 0 0 0 ...
## $ American : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Delta : int 0 0 0 0 0 0 0 0 0 0 ...
## $ United : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Alaska : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Jetblue : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Skywest : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Others : int 0 0 0 0 0 0 0 0 0 0 ...
## $ DepStatus : Factor w/ 2 levels "DelayedonDeparture",..: 1 2 1 1 1 1 1 2 2 2 ...
## $ ArrStatus : Factor w/ 2 levels "DelayedonArrival",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ airline : Factor w/ 11 levels "Alaska","American",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ timewindowdep : Factor w/ 2 levels "AM","PM": 1 1 1 1 1 1 1 1 1 1 ...
## $ timewindowarr : Factor w/ 2 levels "AM","PM": 2 2 2 2 2 2 2 1 1 1 ...
## $ daywindow : Factor w/ 2 levels "Weekday","Weekend": 2 1 1 1 2 1 1 1 1 1 ...
## - attr(*, ".internal.selfref")=<externalptr>
summary(delay_df)
## V1 FlightDate Reporting_Airline Origin
## Min. : 7 2018-10-15: 9236 WN :20576 ATL : 7971
## 1st Qu.:115988 2018-10-12: 6547 AA :20539 ORD : 7884
## Median :227201 2018-10-18: 6223 DL :20071 DFW : 7412
## Mean :226170 2018-10-05: 6192 OO :15721 CLT : 5120
## 3rd Qu.:340176 2018-10-19: 6103 UA :10987 BOS : 4907
## Max. :449631 2018-10-16: 6046 B6 : 9063 DEN : 4562
## (Other) :109653 (Other):53043 (Other):112144
## Dest CRSDepTime DepTime DepDelay
## ATL : 7979 Min. : 1 Min. : 1 Min. : -28.00
## ORD : 6881 1st Qu.:1000 1st Qu.:1017 1st Qu.: -2.00
## DFW : 6694 Median :1405 Median :1429 Median : 9.00
## LAX : 4996 Mean :1378 Mean :1407 Mean : 27.99
## DEN : 4929 3rd Qu.:1754 3rd Qu.:1816 3rd Qu.: 34.00
## IAH : 4895 Max. :2359 Max. :2400 Max. :2109.00
## (Other):113626
## DepDelayMinutes DepDel15 DepartureDelayGroups DepTimeBlk
## Min. : 0.00 Min. :0.0000 Min. :-2.000 1700-1759:10795
## 1st Qu.: 0.00 1st Qu.:0.0000 1st Qu.:-1.000 1800-1859:10199
## Median : 9.00 Median :0.0000 Median : 0.000 1200-1259:10178
## Mean : 29.46 Mean :0.4279 Mean : 1.152 1600-1659: 9831
## 3rd Qu.: 34.00 3rd Qu.:1.0000 3rd Qu.: 2.000 1500-1559: 9713
## Max. :2109.00 Max. :1.0000 Max. :12.000 1400-1459: 9640
## (Other) :89644
## CRSArrTime ArrTime ArrDelay ArrDelayMinutes
## Min. : 1 Min. : 1 Min. : 1.00 Min. : 1.00
## 1st Qu.:1150 1st Qu.:1144 1st Qu.: 6.00 1st Qu.: 6.00
## Median :1608 Median :1610 Median : 14.00 Median : 14.00
## Mean :1547 Mean :1529 Mean : 32.81 Mean : 32.81
## 3rd Qu.:1944 3rd Qu.:1953 3rd Qu.: 35.00 3rd Qu.: 35.00
## Max. :2400 Max. :2400 Max. :2153.00 Max. :2153.00
##
## ArrDel15 ArrivalDelayGroups ArrTimeBlk CRSElapsedTime
## Min. :0.000 Min. : 0.000 1900-1959:10704 Min. :-99.0
## 1st Qu.:0.000 1st Qu.: 0.000 1800-1859:10561 1st Qu.: 89.0
## Median :0.000 Median : 0.000 2100-2159:10306 Median :125.0
## Mean :0.493 Mean : 1.562 1700-1759:10088 Mean :143.2
## 3rd Qu.:1.000 3rd Qu.: 2.000 1600-1659: 9814 3rd Qu.:172.0
## Max. :1.000 Max. :12.000 2000-2059: 9553 Max. :655.0
## (Other) :88974
## ActualElapsedTime AirTime Distance WEEKEND
## Min. : 17 Min. : 8.0 Min. : 31.0 Min. :0.0000
## 1st Qu.: 93 1st Qu.: 63.0 1st Qu.: 383.0 1st Qu.:0.0000
## Median :129 Median : 99.0 Median : 667.0 Median :0.0000
## Mean :148 Mean :117.9 Mean : 826.8 Mean :0.2293
## 3rd Qu.:180 3rd Qu.:149.0 3rd Qu.:1061.0 3rd Qu.:0.0000
## Max. :684 Max. :653.0 Max. :4983.0 Max. :1.0000
##
## PMDEP PMARR Southwest American
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.6361 Mean :0.7395 Mean :0.1372 Mean :0.1369
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Delta United Alaska Jetblue
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1338 Mean :0.07325 Mean :0.05351 Mean :0.06042
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
##
## Skywest Others DepStatus
## Min. :0.0000 Min. :0.0000 DelayedonDeparture:96727
## 1st Qu.:0.0000 1st Qu.:0.0000 OntimeDep :53273
## Median :0.0000 Median :0.0000
## Mean :0.1048 Mean :0.3001
## 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
##
## ArrStatus airline timewindowdep timewindowarr
## DelayedonArrival:150000 Others :35995 AM:54590 AM: 39082
## OntimeArr : 0 Southwest:20576 PM:95410 PM:110918
## American :20539
## Delta :20071
## Skywest :15721
## United :10987
## (Other) :26111
## daywindow
## Weekday:115602
## Weekend: 34398
##
##
##
##
##
Descriptive Statistic
describe(delay_df)[,c(1:5,8,9)]
## vars n mean sd median min max
## V1 1 150000 226169.94 130786.75 227201 7 449631
## FlightDate* 2 150000 15.36 8.55 15 1 31
## Reporting_Airline* 3 150000 9.17 5.39 9 1 17
## Origin* 4 150000 167.91 93.63 173 1 350
## Dest* 5 150000 171.07 93.92 181 1 350
## CRSDepTime 6 150000 1378.39 471.41 1405 1 2359
## DepTime 7 150000 1406.70 490.03 1429 1 2400
## DepDelay 8 150000 27.99 64.56 9 -28 2109
## DepDelayMinutes 9 150000 29.46 63.83 9 0 2109
## DepDel15 10 150000 0.43 0.49 0 0 1
## DepartureDelayGroups 11 150000 1.15 2.84 0 -2 12
## DepTimeBlk* 12 150000 9.53 4.67 10 1 19
## CRSArrTime 13 150000 1546.62 501.90 1608 1 2400
## ArrTime 14 150000 1529.05 550.74 1610 1 2400
## ArrDelay 15 150000 32.81 62.78 14 1 2153
## ArrDelayMinutes 16 150000 32.81 62.78 14 1 2153
## ArrDel15 17 150000 0.49 0.50 0 0 1
## ArrivalDelayGroups 18 150000 1.56 2.63 0 0 12
## ArrTimeBlk* 19 150000 11.24 4.84 12 1 19
## CRSElapsedTime 20 150000 143.16 74.03 125 -99 655
## ActualElapsedTime 21 150000 147.99 76.03 129 17 684
## AirTime 22 150000 117.91 74.18 99 8 653
## Distance 23 150000 826.75 606.55 667 31 4983
## WEEKEND 24 150000 0.23 0.42 0 0 1
## PMDEP 25 150000 0.64 0.48 1 0 1
## PMARR 26 150000 0.74 0.44 1 0 1
## Southwest 27 150000 0.14 0.34 0 0 1
## American 28 150000 0.14 0.34 0 0 1
## Delta 29 150000 0.13 0.34 0 0 1
## United 30 150000 0.07 0.26 0 0 1
## Alaska 31 150000 0.05 0.23 0 0 1
## Jetblue 32 150000 0.06 0.24 0 0 1
## Skywest 33 150000 0.10 0.31 0 0 1
## Others 34 150000 0.30 0.46 0 0 1
## DepStatus* 35 150000 1.36 0.48 1 1 2
## ArrStatus* 36 150000 1.00 0.00 1 1 1
## airline* 37 150000 5.99 2.97 7 1 11
## timewindowdep* 38 150000 1.64 0.48 2 1 2
## timewindowarr* 39 150000 1.74 0.44 2 1 2
## daywindow* 40 150000 1.23 0.42 1 1 2
Percentage of Delayed flights at Departure
table.dep.delay = table(delay_df$DepDel15)
round(prop.table(table.dep.delay)*100,2)
##
## 0 1
## 57.21 42.79
Bar Chart of Flight Delay at Departure
tab1 <- round(prop.table(table.dep.delay)*100,2)
# bar-plot
bp <- barplot(tab1,
xlab = "Departure Delay (No/Yes)", ylab = "Percent (%)",
main = "Percentage of delay",
col = c("lightblue","red"),
legend = rownames(tab1),
beside = TRUE,
ylim = c(0, 90))
text(bp, 0, round(tab1, 1),cex=1,pos=3)
Percentage of Delayed flights at Arrival
table.arr.delay = table(delay_df$ArrDel15)
round(prop.table(table.arr.delay)*100,2)
##
## 0 1
## 50.7 49.3
Bar Chart of Flight Delay at Arrival
tab2 <- round(prop.table(table.arr.delay)*100,2)
# bar-plot
bp <- barplot(tab1,
xlab = "Arrival Delay (No/Yes)", ylab = "Percent (%)",
main = "Percentage of arrival",
col = c("lightblue","red"),
legend = rownames(tab1),
beside = TRUE,
ylim = c(0, 90))
text(bp, 0, round(tab2, 1),cex=1,pos=3)
Percentage of Flight Delay at Departure on weekend vs weekdays
t1<-table(delay_df$WEEKEND,delay_df$DepDel15)
t1<-prop.table(t1,1)
t1 = round(t1*100,2)
# bar-plot
bp <- barplot(t1[,2],
xlab = "Is weekend (No/Yes)", ylab = "Percent dept delay(%)",
main = "Percentage of dept delay split by is weekend",
col = c("lightblue","red"),
legend = rownames(tab1),
beside = TRUE,
ylim = c(0, 90))
text(bp, 0, round(t1[,2], 1),cex=1,pos=3)
Percentage of Flight Delay at Arrival on weekend vs weekdays
t2<-table(delay_df$WEEKEND,delay_df$ArrDel15)
t2<-prop.table(t2,1)
t2 = round(t2*100,2)
t2
##
## 0 1
## 0 50.80 49.20
## 1 50.35 49.65
# bar-plot
bp <- barplot(t2[,2],
xlab = "Is weekend (No/Yes)", ylab = "Percent arrival delay(%)",
main = "Percentage of arrival delay split by is weekend",
col = c("lightblue","red"),
legend = rownames(tab1),
beside = TRUE,
ylim = c(0, 90))
text(bp, 0, round(t2[,2], 1),cex=1,pos=3)
Table of Delay at Departure and Arrival respectively
t3<-table(delay_df$DepDel15,delay_df$ArrDel15)
t3<-prop.table(t3,1)
t3 = round(t3*100,2)
t3
##
## 0 1
## 0 76.37 23.63
## 1 16.38 83.62
Bar Chart of % of Departure Delays by Airlines
t4 <- table(delay_df$airline,delay_df$DepDel15)
t4 <- prop.table(t4,1)
tab5 = round(t4*100,2)
tab5
##
## 0 1
## Alaska 62.91 37.09
## American 56.52 43.48
## Delta 63.91 36.09
## Frontier 37.28 62.72
## Hawaiian 76.25 23.75
## Jetblue 50.86 49.14
## Others 56.23 43.77
## Skywest 60.64 39.36
## Southwest 52.35 47.65
## Spirit 57.59 42.41
## United 60.21 39.79
bp <- barplot(tab5[,2], beside = TRUE, main = "Bar Chart for % of departure delays by Airline",
col = c("lightblue"),
xlab = "Airline",
ylab = "Percent (%)",
args.legend = list(title = "Joined", x = "topright", cex = .7),
ylim = c(0, 100),
las = 2)
text(bp, 0, round(tab5[,2], 1),cex=1,pos=3)
Bar Chart of % of Arrival Delays by Airlines
tab6 <- round(prop.table(table(delay_df$airline,delay_df$ArrDel15),1)*100,2)
bp <- barplot(tab6[,2], beside = TRUE, main = "Bar Chart for % of arrival delays by Airline",
col = c("pink"),
xlab = "Airline",
ylab = "Percent (%)",
args.legend = list(title = "Joined", x = "topright", cex = .7),
ylim = c(0, 100),
las = 2)
text(bp, 0, round(tab6[,2], 1),cex=1,pos=3)
Bar Chart for % of departure delays by PM dept
t7 <- table(delay_df$PMDEP,delay_df$DepDel15)
t7 <- prop.table(t7,1)
tab7 = round(t7*100,2)
tab7
##
## 0 1
## 0 67.97 32.03
## 1 51.06 48.94
bp <- barplot(tab7[,2], beside = TRUE, main = "Bar Chart for % of departure delays by PM dept",
col = c("lightgreen"),
xlab = "Is Dept in PM",
ylab = "Percent (%)",
args.legend = list(title = "Joined", x = "topright", cex = .7),
ylim = c(0, 100))
text(bp, 0, round(tab7[,2], 1),cex=1,pos=3)
Bar Chart for % of arrival delays by PM arrival
t8 <- table(delay_df$PMARR,delay_df$ArrDel15)
t8 <- prop.table(t8,1)
tab8 = round(t7*100,2)
tab8
##
## 0 1
## 0 67.97 32.03
## 1 51.06 48.94
bp <- barplot(tab8[,2], beside = TRUE, main = "Bar Chart for % of arrival delays by PM arrival",
col = c("yellow"),
xlab = "Is Arr in PM",
ylab = "Percent (%)",
args.legend = list(title = "Joined", x = "topright", cex = .7),
ylim = c(0, 100))
text(bp, 0, round(tab8[,2], 1),cex=1,pos=3)
Mean of Delay (in mins) at Departure grouped by Airlines
delay_df %>%
group_by(airline) %>%
summarise(mean_Dep_delay = mean(DepDelayMinutes))
## # A tibble: 11 x 2
## airline mean_Dep_delay
## <fct> <dbl>
## 1 Alaska 21.3
## 2 American 31.9
## 3 Delta 22.6
## 4 Frontier 44.8
## 5 Hawaiian 15.4
## 6 Jetblue 32.8
## 7 Others 33.9
## 8 Skywest 33.3
## 9 Southwest 23.6
## 10 Spirit 30.2
## 11 United 26.9
Mean of Delay (in mins) at Arrival grouped by Airlines
delay_df %>%
group_by(airline) %>%
summarise(mean_Arr_delay = mean(ArrDelayMinutes))
## # A tibble: 11 x 2
## airline mean_Arr_delay
## <fct> <dbl>
## 1 Alaska 27.3
## 2 American 36.0
## 3 Delta 25.0
## 4 Frontier 45.2
## 5 Hawaiian 18.4
## 6 Jetblue 36.4
## 7 Others 38.3
## 8 Skywest 37.7
## 9 Southwest 23.0
## 10 Spirit 33.3
## 11 United 32.3
Box Plots and Mean Plots of Average distance Split by Delay at Departure and Arrival Respectively
dist_deptDelay = aggregate(delay_df$Distance,by = list(delay_df$DepDel15),mean)
setNames(dist_deptDelay, c("Is Delayed","Av. distance"))
## Is Delayed Av. distance
## 1 0 837.4344
## 2 1 812.4697
library(gplots)
## Warning: package 'gplots' was built under R version 3.5.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(delay_df$Distance ~ delay_df$DepDel15, data = delay_df,
mean.labels = TRUE, col= "Red",
main="Mean Plot for average distance, Split by Departure delay ")
boxplot(delay_df$Distance ~ delay_df$DepDel15,
main = "Boxplot for distance Split by Departure Delay",
col = c("red","light blue"))
plotmeans(delay_df$Distance ~ delay_df$ArrDel15, data = delay_df,
mean.labels = TRUE, col= "Red",
main="Mean Plot for average distance, Split by Arrival delay ")
boxplot(delay_df$Distance ~ delay_df$ArrDel15,
main = "Boxplot for distance Split by Arrival Delayed",
col = c("red","light blue"))
Box Plots and Mean Plots of Average distance Split by Delay at Departure and Arrival Respectively
airtime_deptDelay = aggregate(delay_df$AirTime,by = list(delay_df$DepDel15),mean)
setNames(airtime_deptDelay, c("Is Delayed","Av. airtime (min)"))
## Is Delayed Av. airtime (min)
## 1 0 122.0722
## 2 1 112.3394
plotmeans(delay_df$AirTime ~ delay_df$DepDel15, data = delay_df,
mean.labels = TRUE, col= "Red",
main="Mean Plot for average airtime, Split by Departure delay ")
boxplot(delay_df$AirTime ~ delay_df$DepDel15,
main = "Boxplot for airtime Split by Departure delay",
col = c("red","light blue"))
plotmeans(delay_df$AirTime ~ delay_df$ArrDel15, data = delay_df,
mean.labels = TRUE, col= "Red",
main="Mean Plot for average airtime, Split by Arrival delay ")
boxplot(delay_df$AirTime ~ delay_df$ArrDel15,
main = "Boxplot for airtime Split by arrival delay",
col = c("red","light blue"))
Correlation Matrix for all the continous variables
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.5.2
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, units
cormatrix<-cor(delay_df[,c(8:11,15:18,20:26)])
cormatrix
## DepDelay DepDelayMinutes DepDel15
## DepDelay 1.000000000 0.999145232 0.498637497
## DepDelayMinutes 0.999145232 1.000000000 0.484430181
## DepDel15 0.498637497 0.484430181 1.000000000
## DepartureDelayGroups 0.834132288 0.826162116 0.691874332
## ArrDelay 0.971283267 0.974144521 0.408441370
## ArrDelayMinutes 0.971283267 0.974144521 0.408441370
## ArrDel15 0.394078628 0.386050026 0.593690464
## ArrivalDelayGroups 0.800479548 0.798676773 0.543869864
## CRSElapsedTime -0.016983434 -0.016088038 -0.020575366
## ActualElapsedTime -0.063492528 -0.059531305 -0.106137424
## AirTime -0.042818798 -0.040864837 -0.064918918
## Distance -0.019136106 -0.018767724 -0.020364160
## WEEKEND 0.009658715 0.009635365 0.005054569
## PMDEP 0.068367134 0.062745627 0.164463444
## PMARR 0.061630591 0.055796584 0.150615342
## DepartureDelayGroups ArrDelay ArrDelayMinutes
## DepDelay 0.834132288 0.971283267 0.971283267
## DepDelayMinutes 0.826162116 0.974144521 0.974144521
## DepDel15 0.691874332 0.408441370 0.408441370
## DepartureDelayGroups 1.000000000 0.776933514 0.776933514
## ArrDelay 0.776933514 1.000000000 1.000000000
## ArrDelayMinutes 0.776933514 1.000000000 1.000000000
## ArrDel15 0.528994065 0.429015037 0.429015037
## ArrivalDelayGroups 0.924335190 0.827853634 0.827853634
## CRSElapsedTime -0.016000497 -0.010534275 -0.010534275
## ActualElapsedTime -0.082238284 -0.009117905 -0.009117905
## AirTime -0.051596505 -0.019277170 -0.019277170
## Distance -0.017806462 -0.015473569 -0.015473569
## WEEKEND 0.008850742 0.010543277 0.010543277
## PMDEP 0.108670171 0.041549168 0.041549168
## PMARR 0.100819019 0.037355466 0.037355466
## ArrDel15 ArrivalDelayGroups CRSElapsedTime
## DepDelay 0.394078628 0.800479548 -0.016983434
## DepDelayMinutes 0.386050026 0.798676773 -0.016088038
## DepDel15 0.593690464 0.543869864 -0.020575366
## DepartureDelayGroups 0.528994065 0.924335190 -0.016000497
## ArrDelay 0.429015037 0.827853634 -0.010534275
## ArrDelayMinutes 0.429015037 0.827853634 -0.010534275
## ArrDel15 1.000000000 0.603536524 0.024062663
## ArrivalDelayGroups 0.603536524 1.000000000 -0.007515994
## CRSElapsedTime 0.024062663 -0.007515994 1.000000000
## ActualElapsedTime 0.043094394 -0.003349743 0.979377903
## AirTime 0.018436956 -0.018606153 0.988311246
## Distance 0.015607520 -0.013820487 0.986639258
## WEEKEND 0.003799525 0.011259646 0.020933702
## PMDEP 0.093064263 0.065319341 -0.057943194
## PMARR 0.084134268 0.061051118 -0.002190770
## ActualElapsedTime AirTime Distance
## DepDelay -0.063492528 -0.04281880 -0.019136106
## DepDelayMinutes -0.059531305 -0.04086484 -0.018767724
## DepDel15 -0.106137424 -0.06491892 -0.020364160
## DepartureDelayGroups -0.082238284 -0.05159651 -0.017806462
## ArrDelay -0.009117905 -0.01927717 -0.015473569
## ArrDelayMinutes -0.009117905 -0.01927717 -0.015473569
## ArrDel15 0.043094394 0.01843696 0.015607520
## ArrivalDelayGroups -0.003349743 -0.01860615 -0.013820487
## CRSElapsedTime 0.979377903 0.98831125 0.986639258
## ActualElapsedTime 1.000000000 0.98275624 0.964160018
## AirTime 0.982756243 1.00000000 0.982260314
## Distance 0.964160018 0.98226031 1.000000000
## WEEKEND 0.020892406 0.02271114 0.024233121
## PMDEP -0.080159605 -0.06494807 -0.053403405
## PMARR -0.023620865 -0.01248622 0.002529857
## WEEKEND PMDEP PMARR
## DepDelay 0.009658715 0.068367134 0.061630591
## DepDelayMinutes 0.009635365 0.062745627 0.055796584
## DepDel15 0.005054569 0.164463444 0.150615342
## DepartureDelayGroups 0.008850742 0.108670171 0.100819019
## ArrDelay 0.010543277 0.041549168 0.037355466
## ArrDelayMinutes 0.010543277 0.041549168 0.037355466
## ArrDel15 0.003799525 0.093064263 0.084134268
## ArrivalDelayGroups 0.011259646 0.065319341 0.061051118
## CRSElapsedTime 0.020933702 -0.057943194 -0.002190770
## ActualElapsedTime 0.020892406 -0.080159605 -0.023620865
## AirTime 0.022711138 -0.064948066 -0.012486220
## Distance 0.024233121 -0.053403405 0.002529857
## WEEKEND 1.000000000 -0.005221578 0.002683795
## PMDEP -0.005221578 1.000000000 0.661975271
## PMARR 0.002683795 0.661975271 1.000000000
#Plotting Correlation Matrix
library(corrplot)
## corrplot 0.84 loaded
#par(mar=rep(2,4))
#plot(delay_df[,c(1,6:11,13:18,20:34)])
corrplot(cormatrix)
Scatter Plots
library(ggplot2)
ggplot(delay_df, aes(airline, ActualElapsedTime, color=DepDel15)) + geom_point()
library(ggplot2)
ggplot(delay_df, aes(airline, ActualElapsedTime, color=ArrDel15)) + geom_point()
library(ggplot2)
ggplot(delay_df, aes(airline, Distance, color=DepDel15)) + geom_point()
library(ggplot2)
ggplot(delay_df, aes(airline, Distance, color=ArrDel15)) + geom_point()