library(nycflights13)
library(tidyverse)

13.4.6 (1)

compute average delay by destination

flights <- flights
delay <-
  flights %>% 
  filter(!is.na(dep_delay)) %>%
  group_by(dest) %>%
  summarise(avg_delay = mean(dep_delay))
airports <- airports
airports %>% 
  right_join(delay, c("faa" = "dest")) %>%
  ggplot(aes(lon, lat, color = avg_delay)) +
    borders("state") +
    geom_point() +
    coord_quickmap()

Make 2 dataframes with lon and lat columns:

origin_locations <- airports %>% select(c("faa", "lon", "lat"))
dest_locations <- airports %>% select(c("faa", "lon", "lat"))

change names to origin and destination:

origin_locations <- rename(origin_locations, c("origin_lon" = lon, "origin_lat" = lat))
dest_locations <- rename(dest_locations, c("dest_lon" = lon, "dest_lat" = lat))

join locations to new flights dataframe:

flights2 <- flights %>% left_join(origin_locations, by = c("origin" = "faa"))
flights2 <- flights2 %>% left_join(dest_locations, by = c("dest" = "faa"))
head(flights2)

Compute average arrival delay for each plane:

plane_delays <- flights %>% 
  filter(!is.na(arr_delay)) %>%
  group_by(tailnum) %>%
  summarise(mean_delay = mean(arr_delay))
planes <- planes
plane_data <- plane_delays %>% left_join(planes, by = c("tailnum" = "tailnum"))

Overall there is very little correlation, but to the extent that there is, it seems like newer planes are subject to longer delays

airport_weather %>%
  ggplot() +
  geom_point(mapping = aes(x = humid, y = arr_delay))

Delay time is weakly correlated to humidity

airport_weather %>%
  ggplot() +
  geom_point(mapping = aes(x = humid, y = arr_delay))
airport_weather %>%
  ggplot() +
  geom_point(mapping = aes(x = pressure, y = arr_delay))

No clear correlation between pressure and arrival delay

airport_weather %>%
  ggplot() +
  geom_point(mapping = aes(x = temp, y = arr_delay))

No clear correlation between temperature and delay

june_13_2013 <- flights %>%
  filter(year == "2013", month == "6", day == "13", !is.na(arr_delay)) %>%
  group_by(dest) %>%
  summarise(avg_delay = mean(arr_delay))
airports %>% 
  right_join(june_13_2013, c("faa" = "dest")) %>%
  ggplot(aes(lon, lat, color = avg_delay, size = avg_delay)) +
    borders("state") +
    geom_point() +
    coord_quickmap()

There were Tornados in Ohio on June 13, 2013, and we see large delays in Ohio on that day. We also see large delays elsewhere, but this could be due in part to a ripple effect from the delays and weather in Ohio.

13.5.1

They have scheduled arrival and departure times , but no actual arrival and departure times, or air time. My guess would be that they represent canceled flights.

flights_over_100 <- flights %>%
  group_by(tailnum) %>%
  summarise(total_flights = n()) %>%
  filter(total_flights > 100)
flights %>%
  filter(tailnum %in% flights_over_100$tailnum)
common <- fueleconomy::common
vehicles <- fueleconomy::vehicles
common_vehicles <- inner_join(x = vehicles, y = common)
Joining, by = c("make", "model")

Filter for 48 hours with most delay

worst_hours <- flights %>% 
  filter(!is.na(arr_delay)) %>%
  group_by(time_hour) %>%
  summarise(delay_this_hour = sum(arr_delay)) %>%
  arrange(desc(delay_this_hour)) %>%
  head(48)

Join with weather data

weather_delay <- inner_join(x = weather, y = worst_hours, by = c("time_hour"))

Compute stats for this time period and compare with overall data

summary(weather_delay)
    origin               year          month            day             hour            temp            dewp           humid           wind_dir    
 Length:143         Min.   :2013   Min.   : 3.00   Min.   : 2.00   Min.   : 8.00   Min.   :30.92   Min.   :28.04   Min.   : 30.95   Min.   :  0.0  
 Class :character   1st Qu.:2013   1st Qu.: 6.00   1st Qu.: 8.00   1st Qu.:17.00   1st Qu.:71.51   1st Qu.:62.33   1st Qu.: 63.53   1st Qu.:140.0  
 Mode  :character   Median :2013   Median : 7.00   Median :13.00   Median :17.00   Median :77.00   Median :69.08   Median : 78.96   Median :180.0  
                    Mean   :2013   Mean   : 6.42   Mean   :16.03   Mean   :17.15   Mean   :74.28   Mean   :64.43   Mean   : 74.44   Mean   :180.7  
                    3rd Qu.:2013   3rd Qu.: 7.00   3rd Qu.:23.50   3rd Qu.:18.50   3rd Qu.:82.94   3rd Qu.:71.33   3rd Qu.: 88.25   3rd Qu.:220.0  
                    Max.   :2013   Max.   :10.00   Max.   :30.00   Max.   :20.00   Max.   :96.08   Max.   :75.20   Max.   :100.00   Max.   :360.0  
                                                                                                                                    NA's   :2      
   wind_speed      wind_gust         precip           pressure          visib         time_hour                   delay_this_hour
 Min.   : 0.00   Min.   :16.11   Min.   :0.00000   Min.   : 998.4   Min.   : 0.12   Min.   :2013-03-08 08:00:00   Min.   :5548   
 1st Qu.:10.36   1st Qu.:20.71   1st Qu.:0.00000   1st Qu.:1009.4   1st Qu.: 8.00   1st Qu.:2013-06-13 18:00:00   1st Qu.:5919   
 Median :12.66   Median :23.02   Median :0.00000   Median :1010.8   Median :10.00   Median :2013-07-07 19:00:00   Median :6442   
 Mean   :13.37   Mean   :23.62   Mean   :0.02874   Mean   :1011.2   Mean   : 8.20   Mean   :2013-06-29 07:47:49   Mean   :6737   
 3rd Qu.:16.11   3rd Qu.:25.32   3rd Qu.:0.01000   3rd Qu.:1014.4   3rd Qu.:10.00   3rd Qu.:2013-07-28 15:00:00   3rd Qu.:7336   
 Max.   :33.37   Max.   :41.43   Max.   :0.48000   Max.   :1020.8   Max.   :10.00   Max.   :2013-10-07 17:00:00   Max.   :8961   
                 NA's   :97                        NA's   :40                                                                    
summary(weather)
    origin               year          month             day             hour            temp             dewp           humid           wind_dir    
 Length:26115       Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   : 0.00   Min.   : 10.94   Min.   :-9.94   Min.   : 12.74   Min.   :  0.0  
 Class :character   1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 6.00   1st Qu.: 39.92   1st Qu.:26.06   1st Qu.: 47.05   1st Qu.:120.0  
 Mode  :character   Median :2013   Median : 7.000   Median :16.00   Median :11.00   Median : 55.40   Median :42.08   Median : 61.79   Median :220.0  
                    Mean   :2013   Mean   : 6.504   Mean   :15.68   Mean   :11.49   Mean   : 55.26   Mean   :41.44   Mean   : 62.53   Mean   :199.8  
                    3rd Qu.:2013   3rd Qu.: 9.000   3rd Qu.:23.00   3rd Qu.:17.00   3rd Qu.: 69.98   3rd Qu.:57.92   3rd Qu.: 78.79   3rd Qu.:290.0  
                    Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :23.00   Max.   :100.04   Max.   :78.08   Max.   :100.00   Max.   :360.0  
                                                                                    NA's   :1        NA's   :1       NA's   :1        NA's   :460    
   wind_speed         wind_gust         precip            pressure          visib          time_hour                  
 Min.   :   0.000   Min.   :16.11   Min.   :0.000000   Min.   : 983.8   Min.   : 0.000   Min.   :2013-01-01 01:00:00  
 1st Qu.:   6.905   1st Qu.:20.71   1st Qu.:0.000000   1st Qu.:1012.9   1st Qu.:10.000   1st Qu.:2013-04-01 21:30:00  
 Median :  10.357   Median :24.17   Median :0.000000   Median :1017.6   Median :10.000   Median :2013-07-01 14:00:00  
 Mean   :  10.518   Mean   :25.49   Mean   :0.004469   Mean   :1017.9   Mean   : 9.255   Mean   :2013-07-01 18:26:37  
 3rd Qu.:  13.809   3rd Qu.:28.77   3rd Qu.:0.000000   3rd Qu.:1023.0   3rd Qu.:10.000   3rd Qu.:2013-09-30 13:00:00  
 Max.   :1048.361   Max.   :66.75   Max.   :1.210000   Max.   :1042.1   Max.   :10.000   Max.   :2013-12-30 18:00:00  
 NA's   :4          NA's   :20778                      NA's   :2729                                                   

Temperature, dewpoint, humid, wind speed, and precipitation were all higher on average during times of greater delay

anti_join() return all rows from x where there are not matching values in y, keeping just columns from x.

anti_join(flights, airports, by = c(“dest” = “faa”)) - will tell you the flights data for flights where the destination is not in airports

anti_join(airports, flights, by = c(“faa” = “dest”)) - will tell you the airports data for airports where no flights from the flights data were destined

planes_carriers <- flights %>%
  select(c("carrier", "tailnum"))
unique_carriers <- planes_carriers %>%
  group_by(tailnum) %>%
  summarise(carriers = length(unique(carrier)))

unique_carriers %>% filter(carriers > 1)

It seems that some planes correspond to more than one carrier

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkobnljZmxpZ2h0czEzKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgoKMTMuNC42CigxKQoKY29tcHV0ZSBhdmVyYWdlIGRlbGF5IGJ5IGRlc3RpbmF0aW9uCgpgYGB7cn0KZmxpZ2h0cyA8LSBmbGlnaHRzCmRlbGF5IDwtCiAgZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKCFpcy5uYShkZXBfZGVsYXkpKSAlPiUKICBncm91cF9ieShkZXN0KSAlPiUKICBzdW1tYXJpc2UoYXZnX2RlbGF5ID0gbWVhbihkZXBfZGVsYXkpKQpgYGAKCgpgYGB7cn0KYWlycG9ydHMgPC0gYWlycG9ydHMKYWlycG9ydHMgJT4lIAogIHJpZ2h0X2pvaW4oZGVsYXksIGMoImZhYSIgPSAiZGVzdCIpKSAlPiUKICBnZ3Bsb3QoYWVzKGxvbiwgbGF0LCBjb2xvciA9IGF2Z19kZWxheSkpICsKICAgIGJvcmRlcnMoInN0YXRlIikgKwogICAgZ2VvbV9wb2ludCgpICsKICAgIGNvb3JkX3F1aWNrbWFwKCkKYGBgCgooMikKCk1ha2UgMiBkYXRhZnJhbWVzIHdpdGggbG9uIGFuZCBsYXQgY29sdW1uczoKCmBgYHtyfQpvcmlnaW5fbG9jYXRpb25zIDwtIGFpcnBvcnRzICU+JSBzZWxlY3QoYygiZmFhIiwgImxvbiIsICJsYXQiKSkKZGVzdF9sb2NhdGlvbnMgPC0gYWlycG9ydHMgJT4lIHNlbGVjdChjKCJmYWEiLCAibG9uIiwgImxhdCIpKQpgYGAKCmNoYW5nZSBuYW1lcyB0byBvcmlnaW4gYW5kIGRlc3RpbmF0aW9uOgoKYGBge3J9Cm9yaWdpbl9sb2NhdGlvbnMgPC0gcmVuYW1lKG9yaWdpbl9sb2NhdGlvbnMsIGMoIm9yaWdpbl9sb24iID0gbG9uLCAib3JpZ2luX2xhdCIgPSBsYXQpKQpkZXN0X2xvY2F0aW9ucyA8LSByZW5hbWUoZGVzdF9sb2NhdGlvbnMsIGMoImRlc3RfbG9uIiA9IGxvbiwgImRlc3RfbGF0IiA9IGxhdCkpCmBgYAoKam9pbiBsb2NhdGlvbnMgdG8gbmV3IGZsaWdodHMgZGF0YWZyYW1lOgoKYGBge3J9CmZsaWdodHMyIDwtIGZsaWdodHMgJT4lIGxlZnRfam9pbihvcmlnaW5fbG9jYXRpb25zLCBieSA9IGMoIm9yaWdpbiIgPSAiZmFhIikpCmZsaWdodHMyIDwtIGZsaWdodHMyICU+JSBsZWZ0X2pvaW4oZGVzdF9sb2NhdGlvbnMsIGJ5ID0gYygiZGVzdCIgPSAiZmFhIikpCmBgYAoKYGBge3J9CmhlYWQoZmxpZ2h0czIpCmBgYAoKKDMpCgpDb21wdXRlIGF2ZXJhZ2UgYXJyaXZhbCBkZWxheSBmb3IgZWFjaCBwbGFuZToKCmBgYHtyfQpwbGFuZV9kZWxheXMgPC0gZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKCFpcy5uYShhcnJfZGVsYXkpKSAlPiUKICBncm91cF9ieSh0YWlsbnVtKSAlPiUKICBzdW1tYXJpc2UobWVhbl9kZWxheSA9IG1lYW4oYXJyX2RlbGF5KSkKYGBgCgpgYGB7cn0KcGxhbmVzIDwtIHBsYW5lcwpwbGFuZV9kYXRhIDwtIHBsYW5lX2RlbGF5cyAlPiUgbGVmdF9qb2luKHBsYW5lcywgYnkgPSBjKCJ0YWlsbnVtIiA9ICJ0YWlsbnVtIikpCmBgYAoKYGBge3J9CnBsYW5lX2RhdGEgJT4lCiAgZ2dwbG90KCkgKyAKICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IHllYXIsIHkgPSBtZWFuX2RlbGF5KSkKYGBgCgpPdmVyYWxsIHRoZXJlIGlzIHZlcnkgbGl0dGxlIGNvcnJlbGF0aW9uLCBidXQgdG8gdGhlIGV4dGVudCB0aGF0IHRoZXJlIGlzLAppdCBzZWVtcyBsaWtlIG5ld2VyIHBsYW5lcyBhcmUgc3ViamVjdCB0byBsb25nZXIgZGVsYXlzCgooNCkKCmBgYHtyfQphaXJwb3J0X3dlYXRoZXIgPC0gZmxpZ2h0cyAlPiUgZnVsbF9qb2luKHdlYXRoZXIsIGJ5ID0gYygib3JpZ2luIiA9ICJvcmlnaW4iLCAidGltZV9ob3VyIiA9ICJ0aW1lX2hvdXIiKSkKYGBgCgoKYGBge3J9CmFpcnBvcnRfd2VhdGhlciAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSBodW1pZCwgeSA9IGFycl9kZWxheSkpCmBgYAoKRGVsYXkgdGltZSBpcyB3ZWFrbHkgY29ycmVsYXRlZCB0byBodW1pZGl0eQoKYGBge3J9CmFpcnBvcnRfd2VhdGhlciAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSBodW1pZCwgeSA9IGFycl9kZWxheSkpCmBgYAoKYGBge3J9CmFpcnBvcnRfd2VhdGhlciAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSBwcmVzc3VyZSwgeSA9IGFycl9kZWxheSkpCmBgYAoKTm8gY2xlYXIgY29ycmVsYXRpb24gYmV0d2VlbiBwcmVzc3VyZSBhbmQgYXJyaXZhbCBkZWxheQoKYGBge3J9CmFpcnBvcnRfd2VhdGhlciAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSB0ZW1wLCB5ID0gYXJyX2RlbGF5KSkKYGBgCgpObyBjbGVhciBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRlbXBlcmF0dXJlIGFuZCBkZWxheQoKKDUpCgpgYGB7cn0KanVuZV8xM18yMDEzIDwtIGZsaWdodHMgJT4lCiAgZmlsdGVyKHllYXIgPT0gIjIwMTMiLCBtb250aCA9PSAiNiIsIGRheSA9PSAiMTMiLCAhaXMubmEoYXJyX2RlbGF5KSkgJT4lCiAgZ3JvdXBfYnkoZGVzdCkgJT4lCiAgc3VtbWFyaXNlKGF2Z19kZWxheSA9IG1lYW4oYXJyX2RlbGF5KSkKYGBgCgoKYGBge3J9CmFpcnBvcnRzICU+JSAKICByaWdodF9qb2luKGp1bmVfMTNfMjAxMywgYygiZmFhIiA9ICJkZXN0IikpICU+JQogIGdncGxvdChhZXMobG9uLCBsYXQsIGNvbG9yID0gYXZnX2RlbGF5LCBzaXplID0gYXZnX2RlbGF5KSkgKwogICAgYm9yZGVycygic3RhdGUiKSArCiAgICBnZW9tX3BvaW50KCkgKwogICAgY29vcmRfcXVpY2ttYXAoKQpgYGAKClRoZXJlIHdlcmUgVG9ybmFkb3MgaW4gT2hpbyBvbiBKdW5lIDEzLCAyMDEzLCBhbmQgd2Ugc2VlIGxhcmdlIGRlbGF5cyBpbiBPaGlvIG9uCnRoYXQgZGF5LiBXZSBhbHNvIHNlZSBsYXJnZSBkZWxheXMgZWxzZXdoZXJlLCBidXQgdGhpcyBjb3VsZCBiZSBkdWUgaW4gcGFydCB0byBhCnJpcHBsZSBlZmZlY3QgZnJvbSB0aGUgZGVsYXlzIGFuZCB3ZWF0aGVyIGluIE9oaW8uCgoKCjEzLjUuMQoKCigxKQoKYGBge3J9Cm1pc3NpbmdfbnVtIDwtIGZsaWdodHMgJT4lCiAgZmlsdGVyKGlzLm5hKHRhaWxudW0pKQpgYGAKClRoZXkgaGF2ZSBzY2hlZHVsZWQgYXJyaXZhbCBhbmQgZGVwYXJ0dXJlIHRpbWVzICwgYnV0IG5vIGFjdHVhbCBhcnJpdmFsIGFuZCAKZGVwYXJ0dXJlIHRpbWVzLCBvciBhaXIgdGltZS4gTXkgZ3Vlc3Mgd291bGQgYmUgdGhhdCB0aGV5IHJlcHJlc2VudCBjYW5jZWxlZApmbGlnaHRzLgoKKDIpCgpgYGB7cn0KZmxpZ2h0c19vdmVyXzEwMCA8LSBmbGlnaHRzICU+JQogIGdyb3VwX2J5KHRhaWxudW0pICU+JQogIHN1bW1hcmlzZSh0b3RhbF9mbGlnaHRzID0gbigpKSAlPiUKICBmaWx0ZXIodG90YWxfZmxpZ2h0cyA+IDEwMCkKZmxpZ2h0cyAlPiUKICBmaWx0ZXIodGFpbG51bSAlaW4lIGZsaWdodHNfb3Zlcl8xMDAkdGFpbG51bSkKYGBgCgooMykKCmBgYHtyfQpjb21tb24gPC0gZnVlbGVjb25vbXk6OmNvbW1vbgp2ZWhpY2xlcyA8LSBmdWVsZWNvbm9teTo6dmVoaWNsZXMKY29tbW9uX3ZlaGljbGVzIDwtIGlubmVyX2pvaW4oeCA9IHZlaGljbGVzLCB5ID0gY29tbW9uLCBieSA9IGMoIm1ha2UiLCAibW9kZWwiKSkKYGBgCgoKKDQpCgpGaWx0ZXIgZm9yIDQ4IGhvdXJzIHdpdGggbW9zdCBkZWxheQoKYGBge3J9CndvcnN0X2hvdXJzIDwtIGZsaWdodHMgJT4lIAogIGZpbHRlcighaXMubmEoYXJyX2RlbGF5KSkgJT4lCiAgZ3JvdXBfYnkodGltZV9ob3VyKSAlPiUKICBzdW1tYXJpc2UoZGVsYXlfdGhpc19ob3VyID0gc3VtKGFycl9kZWxheSkpICU+JQogIGFycmFuZ2UoZGVzYyhkZWxheV90aGlzX2hvdXIpKSAlPiUKICBoZWFkKDQ4KQpgYGAKCkpvaW4gd2l0aCB3ZWF0aGVyIGRhdGEKCmBgYHtyfQp3ZWF0aGVyX2RlbGF5IDwtIGlubmVyX2pvaW4oeCA9IHdlYXRoZXIsIHkgPSB3b3JzdF9ob3VycywgYnkgPSBjKCJ0aW1lX2hvdXIiKSkKYGBgCgpDb21wdXRlIHN0YXRzIGZvciB0aGlzIHRpbWUgcGVyaW9kIGFuZCBjb21wYXJlIHdpdGggb3ZlcmFsbCBkYXRhCgpgYGB7cn0Kc3VtbWFyeSh3ZWF0aGVyX2RlbGF5KQpzdW1tYXJ5KHdlYXRoZXIpCmBgYAoKVGVtcGVyYXR1cmUsIGRld3BvaW50LCBodW1pZCwgd2luZCBzcGVlZCwgYW5kIHByZWNpcGl0YXRpb24gd2VyZSBhbGwgaGlnaGVyIG9uCmF2ZXJhZ2UgZHVyaW5nIHRpbWVzIG9mIGdyZWF0ZXIgZGVsYXkKCig1KQoKYW50aV9qb2luKCkKcmV0dXJuIGFsbCByb3dzIGZyb20geCB3aGVyZSB0aGVyZSBhcmUgbm90IG1hdGNoaW5nIHZhbHVlcyBpbiB5LCBrZWVwaW5nIGp1c3QgY29sdW1ucyBmcm9tIHguCgphbnRpX2pvaW4oZmxpZ2h0cywgYWlycG9ydHMsIGJ5ID0gYygiZGVzdCIgPSAiZmFhIikpCiAtIHdpbGwgdGVsbCB5b3UgdGhlIGZsaWdodHMgZGF0YSBmb3IgZmxpZ2h0cyB3aGVyZSB0aGUgZGVzdGluYXRpb24gaXMgbm90IGluIGFpcnBvcnRzCgphbnRpX2pvaW4oYWlycG9ydHMsIGZsaWdodHMsIGJ5ID0gYygiZmFhIiA9ICJkZXN0IikpCiAtIHdpbGwgdGVsbCB5b3UgdGhlIGFpcnBvcnRzIGRhdGEgZm9yIGFpcnBvcnRzIHdoZXJlIG5vIGZsaWdodHMgZnJvbSB0aGUgZmxpZ2h0cyAKIGRhdGEgd2VyZSBkZXN0aW5lZAogCiAoNikKIApgYGB7cn0KcGxhbmVzX2NhcnJpZXJzIDwtIGZsaWdodHMgJT4lCiAgc2VsZWN0KGMoImNhcnJpZXIiLCAidGFpbG51bSIpKQpgYGAKIApgYGB7cn0KdW5pcXVlX2NhcnJpZXJzIDwtIHBsYW5lc19jYXJyaWVycyAlPiUKICBncm91cF9ieSh0YWlsbnVtKSAlPiUKICBzdW1tYXJpc2UoY2FycmllcnMgPSBsZW5ndGgodW5pcXVlKGNhcnJpZXIpKSkKCnVuaXF1ZV9jYXJyaWVycyAlPiUgZmlsdGVyKGNhcnJpZXJzID4gMSkKYGBgCkl0IHNlZW1zIHRoYXQgc29tZSBwbGFuZXMgY29ycmVzcG9uZCB0byBtb3JlIHRoYW4gb25lIGNhcnJpZXIK