library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.4
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
view(flights)
view(airports)
view(planes)
flights1 <- flights %>%
select(dest, arr_delay) %>%
filter(arr_delay >0)
flights2 <- flights1 %>%
group_by(dest) %>%
summarise(arr_delay = mean(arr_delay))
flights3 <- airports %>%
semi_join(flights2, c("faa" = "dest"))
ggplot(flights3, aes(lon, lat)) +
borders("state") +
geom_point() +
coord_quickmap()
### Add the location of the origin and destination (i.e. the lat and lon) to flights.
flights %>%
left_join(airports, c("dest" = "faa"))
## # A tibble: 336,776 x 26
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ... with 336,766 more rows, and 18 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## # name <chr>, lat <dbl>, lon <dbl>, alt <dbl>, tz <dbl>, dst <chr>,
## # tzone <chr>
flights %>%
left_join(airports, c("origin" = "faa"))
## # A tibble: 336,776 x 26
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ... with 336,766 more rows, and 18 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## # name <chr>, lat <dbl>, lon <dbl>, alt <dbl>, tz <dbl>, dst <chr>,
## # tzone <chr>
ageplane <-flights %>%
left_join(planes, by = "tailnum") %>%
select(year.y, tailnum, arr_delay) %>%
filter(arr_delay >0)
ageplane1 <- ageplane %>%
group_by(year.y) %>%
summarise(arr_delay = mean(arr_delay))
weather1 <- flights %>%
inner_join(weather, by = c ( "origin" = "origin",
"year" = "year",
"month" = "month",
"day" = "day",
"hour" = "hour"
))
weather2 <- flights %>%
filter(year == 2013, month == 6, day == 13) %>%
group_by(dest) %>%
summarise(delay = mean(arr_delay, na.rm = TRUE))
fltail <- flights %>%
select(year, tailnum, dep_time, arr_time, arr_delay)
plane100 <- flights %>%
filter(!is.na(tailnum)) %>%
group_by(tailnum) %>%
count() %>%
filter(n >= 100)
anti_join(flights, airports, by = c("dest" = "faa"))
## # A tibble: 7,602 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 544 545 -1 1004 1022
## 2 2013 1 1 615 615 0 1039 1100
## 3 2013 1 1 628 630 -2 1137 1140
## 4 2013 1 1 701 700 1 1123 1154
## 5 2013 1 1 711 715 -4 1151 1206
## 6 2013 1 1 820 820 0 1254 1310
## 7 2013 1 1 820 820 0 1249 1329
## 8 2013 1 1 840 845 -5 1311 1350
## 9 2013 1 1 909 810 59 1331 1315
## 10 2013 1 1 913 918 -5 1346 1416
## # ... with 7,592 more rows, and 11 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
anti_join(airports, flights, by = c("faa" = "dest"))
## # A tibble: 1,357 x 8
## faa name lat lon alt tz dst tzone
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A America/New_Yo~
## 2 06A Moton Field Municipal A~ 32.5 -85.7 264 -6 A America/Chicago
## 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A America/Chicago
## 4 06N Randall Airport 41.4 -74.4 523 -5 A America/New_Yo~
## 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A America/New_Yo~
## 6 0A9 Elizabethton Municipal ~ 36.4 -82.2 1593 -5 A America/New_Yo~
## 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A America/New_Yo~
## 8 0G7 Finger Lakes Regional A~ 42.9 -76.8 492 -5 A America/New_Yo~
## 9 0P2 Shoestring Aviation Air~ 39.8 -76.6 1000 -5 U America/New_Yo~
## 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A America/Los_An~
## # ... with 1,347 more rows
Tailcarrier <- flights %>%
filter(!is.na(tailnum)) %>%
group_by(tailnum, carrier)
```