Introuduction

This page shows the non-linear relationship between the temperature and the Weather Flight Delays by plotting Time Series Plot.


Install and Load the packages

library(readr)
suppressMessages(library(zoo))
suppressMessages(library(dplyr))
suppressMessages(library(xts))
suppressMessages(library(plotly))

Loading the dataset : Boston_flight_information from local directory, and name it flights

BOS_airport <- read_csv("C:/Users/User/Desktop/BOS_airport.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   carrier = col_character(),
##   carrier_name = col_character(),
##   airport = col_character(),
##   airport_name = col_character(),
##   carrier_ct = col_double(),
##   weather_ct = col_double(),
##   nas_ct = col_double(),
##   security_ct = col_double(),
##   late_aircraft_ct = col_double()
## )
## See spec(...) for full column specifications.
flights <- BOS_airport

head(flights,10)
## # A tibble: 10 × 21
##     year month carrier                 carrier_name airport
##    <int> <int>   <chr>                        <chr>   <chr>
## 1   2003     6      AA       American Airlines Inc.     BOS
## 2   2003     6      AS         Alaska Airlines Inc.     BOS
## 3   2003     6      CO   Continental Air Lines Inc.     BOS
## 4   2003     6      DH      Atlantic Coast Airlines     BOS
## 5   2003     6      DL         Delta Air Lines Inc.     BOS
## 6   2003     6      EV  Atlantic Southeast Airlines     BOS
## 7   2003     6      FL  AirTran Airways Corporation     BOS
## 8   2003     6      HP   America West Airlines Inc.     BOS
## 9   2003     6      MQ American Eagle Airlines Inc.     BOS
## 10  2003     6      NW      Northwest Airlines Inc.     BOS
## # ... with 16 more variables: airport_name <chr>, arr_flights <int>,
## #   arr_del15 <int>, carrier_ct <dbl>, weather_ct <dbl>, nas_ct <dbl>,
## #   security_ct <dbl>, late_aircraft_ct <dbl>, arr_cancelled <int>,
## #   arr_diverted <int>, arr_delay <int>, carrier_delay <int>,
## #   weather_delay <int>, nas_delay <int>, security_delay <int>,
## #   late_aircraft_delay <int>

Covnert date into acceptable format for “as.Date fucntion”

flights$date <- paste(flights$year,sprintf("%02d",flights$month), sep= "-")  
flights$days <- c(1) 
flights$date <- paste(flights$date,sprintf("%02d",flights$days), sep= "-")
flights$date<-  as.Date(flights$date)

flights<-flights %>% select(date, everything())
head(flights,10) ## multiple same date is because there are different airline companys recorded
## # A tibble: 10 × 23
##          date  year month carrier                 carrier_name airport
##        <date> <int> <int>   <chr>                        <chr>   <chr>
## 1  2003-06-01  2003     6      AA       American Airlines Inc.     BOS
## 2  2003-06-01  2003     6      AS         Alaska Airlines Inc.     BOS
## 3  2003-06-01  2003     6      CO   Continental Air Lines Inc.     BOS
## 4  2003-06-01  2003     6      DH      Atlantic Coast Airlines     BOS
## 5  2003-06-01  2003     6      DL         Delta Air Lines Inc.     BOS
## 6  2003-06-01  2003     6      EV  Atlantic Southeast Airlines     BOS
## 7  2003-06-01  2003     6      FL  AirTran Airways Corporation     BOS
## 8  2003-06-01  2003     6      HP   America West Airlines Inc.     BOS
## 9  2003-06-01  2003     6      MQ American Eagle Airlines Inc.     BOS
## 10 2003-06-01  2003     6      NW      Northwest Airlines Inc.     BOS
## # ... with 17 more variables: airport_name <chr>, arr_flights <int>,
## #   arr_del15 <int>, carrier_ct <dbl>, weather_ct <dbl>, nas_ct <dbl>,
## #   security_ct <dbl>, late_aircraft_ct <dbl>, arr_cancelled <int>,
## #   arr_diverted <int>, arr_delay <int>, carrier_delay <int>,
## #   weather_delay <int>, nas_delay <int>, security_delay <int>,
## #   late_aircraft_delay <int>, days <dbl>
range(flights$date)
## [1] "2003-06-01" "2016-09-01"

Select Weather_delayed column and date, then convert both columns into Xts format

Weather Flight Delays column indicates the delay caused by the weather

weather_flights_delays <- select(flights , c(1,19)) 
weather_flights_delays_xts<- as.xts(weather_flights_delays[,-1],order.by =  weather_flights_delays$date)
head(weather_flights_delays_xts,10)
##            weather_delay
## 2003-06-01          1783
## 2003-06-01             0
## 2003-06-01           194
## 2003-06-01           312
## 2003-06-01           275
## 2003-06-01             0
## 2003-06-01            25
## 2003-06-01             0
## 2003-06-01           317
## 2003-06-01           550

Sum the the number of Weather_delay flights by month

weather_flights_delays_xts<- apply.monthly(weather_flights_delays_xts, sum)
colnames(weather_flights_delays_xts) <- c("weather_flights_delays")
head(weather_flights_delays_xts,10)
##            weather_flights_delays
## 2003-06-01                   4160
## 2003-07-01                   5024
## 2003-08-01                   7368
## 2003-09-01                   1751
## 2003-10-01                    653
## 2003-11-01                   1514
## 2003-12-01                   4979
## 2004-01-01                   4580
## 2004-02-01                   3425
## 2004-03-01                   2249

Fetch Boston temperature data from URL

library("weatherData")
weatherData2006_2016 <- do.call(
    rbind,
    lapply(
        2006:2016, 
        function(i) getWeatherForYear(station_id = "BOS", year = i)
    )
)
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC" 
##  [1] "CET"                        "Max_TemperatureC"          
##  [3] "Mean_TemperatureC"          "Min_TemperatureC"          
##  [5] "Dew_PointC"                 "MeanDew_PointC"            
##  [7] "Min_DewpointC"              "Max_Humidity"              
##  [9] "Mean_Humidity"              "Min_Humidity"              
## [11] "Max_Sea_Level_PressurehPa"  "Mean_Sea_Level_PressurehPa"
## [13] "Min_Sea_Level_PressurehPa"  "Max_VisibilityKm"          
## [15] "Mean_VisibilityKm"          "Min_VisibilitykM"          
## [17] "Max_Wind_SpeedKm_h"         "Mean_Wind_SpeedKm_h"       
## [19] "Max_Gust_SpeedKm_h"         "Precipitationmm"           
## [21] "CloudCover"                 "Events"                    
## [23] "WindDirDegrees"            
## [1] "Date"              "Max_TemperatureC"  "Mean_TemperatureC"
## [4] "Min_TemperatureC"
head(weatherData2006_2016,10)
##          Date Max_TemperatureC Mean_TemperatureC Min_TemperatureC
## 1  2006-01-01                8                 6                3
## 2  2006-01-02                8                 4                2
## 3  2006-01-03                8                 4                2
## 4  2006-01-04                5                 3                2
## 5  2006-01-05                4                 2                0
## 6  2006-01-06                6                 3                1
## 7  2006-01-07                6                 3                1
## 8  2006-01-08                3                 1               -1
## 9  2006-01-09                3                 0               -3
## 10 2006-01-10                2                -1               -4

Change all the dates in order to being concerted with weather_delay_flights_xts

 weatherData2006_2016$Date<-  substring(as.character(weatherData2006_2016$Date), 1,8)
 weatherData2006_2016$days <- c(1) 
 weatherData2006_2016$Date <- paste(weatherData2006_2016$Date,sprintf("%02d",weatherData2006_2016$days), sep= "")
 weatherData2006_2016$Date <- as.Date(weatherData2006_2016$Date)
 weatherData2006_2016$Mean_TemperatureC<-na.locf(weatherData2006_2016$Mean_TemperatureC)
 summary(weatherData2006_2016)
##       Date            Max_TemperatureC Mean_TemperatureC Min_TemperatureC 
##  Min.   :2006-01-01   Min.   :-7.00    Min.   :-9.00     Min.   :-15.000  
##  1st Qu.:2008-10-01   1st Qu.:10.00    1st Qu.: 6.00     1st Qu.:  1.000  
##  Median :2011-07-01   Median :17.00    Median :11.00     Median :  6.000  
##  Mean   :2011-06-17   Mean   :16.94    Mean   :11.36     Mean   :  6.097  
##  3rd Qu.:2014-04-01   3rd Qu.:24.00    3rd Qu.:18.00     3rd Qu.: 12.000  
##  Max.   :2016-12-01   Max.   :37.00    Max.   :28.00     Max.   : 25.000  
##       days  
##  Min.   :1  
##  1st Qu.:1  
##  Median :1  
##  Mean   :1  
##  3rd Qu.:1  
##  Max.   :1
 weatherData2006_2016<-na.omit(weatherData2006_2016)
 head(weatherData2006_2016$Date,10)
##  [1] "2006-01-01" "2006-01-01" "2006-01-01" "2006-01-01" "2006-01-01"
##  [6] "2006-01-01" "2006-01-01" "2006-01-01" "2006-01-01" "2006-01-01"

Convert weatherData2006_2016 to xts format, then calculate the monthly average temperature(2006-08 to 2016-08)

weatherData2006_2016_xts <- xts(weatherData2006_2016[-1], order.by = weatherData2006_2016$Date)
Monthly_Temp_Mean<- apply.monthly(weatherData2006_2016_xts[,2], mean)
head(Monthly_Temp_Mean,10)
##            Mean_TemperatureC
## 2006-01-01        -0.7741935
## 2006-02-01         1.2142857
## 2006-03-01         5.1935484
## 2006-04-01        10.3000000
## 2006-05-01        13.8387097
## 2006-06-01        17.1666667
## 2006-07-01        19.6129032
## 2006-08-01        19.3548387
## 2006-09-01        16.4000000
## 2006-10-01        12.0967742

Plot weather_delay flight and monthly average temperature individually

Total_Number_of_Weather_flight_delays <- weather_flights_delays_xts["2006-08-01/2016-08-01"]
Monthly_Average_Temperature_Boston <- Monthly_Temp_Mean["2006-08-01/2016-08-01"]
plot.xts(Total_Number_of_Weather_flight_delays, major.format = "%y",xlab="Year",ylab="Flights")

plot.xts(Monthly_Average_Temperature_Boston,major.format = "%y",xlab="Year",ylab="Temperature")

Plot weather delay flight and monthly average temperature together

Weather_Flights_Delay_Vs_Temperature <- merge(Total_Number_of_Weather_flight_delays,Monthly_Average_Temperature_Boston)
colnames(Weather_Flights_Delay_Vs_Temperature) <- c("Flights","Temperature")
plot.zoo(Weather_Flights_Delay_Vs_Temperature, plot.type = "multiple", xlab = "Year",col= c("blue", "red"),main = "Weather Flights Delay VS. Temperature in Boston",sub="My Sub-title")