anti_join(flights, airports, by = c("dest" = "faa"))
tell you? What does anti_join(airports, flights, by = c("faa" = "dest"))
tell you?routes <- c("origin", "dest") %>%
map_dfr(~ flights %>%
count(origin, dest) %>%
mutate(faa = .[[.x]]) %>%
left_join(airports, by = c("faa")) %>%
select(origin, dest, lat, lon, n) %>%
mutate(type = .x)) %>%
mutate(id = str_c(origin, dest, sep = "->"))
ggplot(routes, aes(lon, lat)) +
borders("state") +
geom_point(aes(color = type)) +
geom_line(aes(group = id, alpha = n), size = 2) +
guides(color = FALSE) +
coord_quickmap()
airport
との対応がとれなかったdest
一覧
flights %>%
select(dest) %>%
unique %>% anti_join(airports, by = c("dest" = "faa")) %>%
kable()
dest |
---|
BQN |
SJU |
STT |
PSE |
weather$origin
とairports$faa
dest?
special_days <- tribble(
~month, ~day, ~special,
1, 1, "元日",
12, 31, "大晦日",
)
flights %>% left_join(special_days)
flights %>% mutate(key = row_number())
Lahman::Batting
: playerID, yearID, stintbabynames::babynames
: year, name, sexnasaweather::atmos
: lat, long, year, monthfueleconomy::vehicles
: idggplot2::diamonds
: 無いめんどう
flights %>%
group_by(dest) %>%
summarise(ave_delay = mean(arr_delay, na.rm = TRUE)) %>%
left_join(airports, c("dest" = "faa")) %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point(aes(size = ave_delay, color = ave_delay)) +
coord_quickmap()
flights %>%
left_join(airports, c("origin" = "faa")) %>%
left_join(airports, c("dest" = "faa"), suffix = c(".origin", ".dest")) %>%
select(origin, dest, matches("^(lon|lat)"))
## # A tibble: 336,776 x 6
## origin dest lat.origin lon.origin lat.dest lon.dest
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 EWR IAH 40.7 -74.2 30.0 -95.3
## 2 LGA IAH 40.8 -73.9 30.0 -95.3
## 3 JFK MIA 40.6 -73.8 25.8 -80.3
## 4 JFK BQN 40.6 -73.8 NA NA
## 5 LGA ATL 40.8 -73.9 33.6 -84.4
## 6 EWR ORD 40.7 -74.2 42.0 -87.9
## 7 EWR FLL 40.7 -74.2 26.1 -80.2
## 8 LGA IAD 40.8 -73.9 38.9 -77.5
## 9 JFK MCO 40.6 -73.8 28.4 -81.3
## 10 LGA ORD 40.8 -73.9 42.0 -87.9
## # … with 336,766 more rows
飛行機ごとの平均
pf <- flights %>%
group_by(tailnum) %>%
summarise(ave_delay = mean(arr_delay, na.rm = TRUE), n = n()) %>%
left_join(planes, "tailnum") %>%
select(tailnum, year, n, ave_delay)
pf %>% filter(n > 20) %>%
ggplot(aes(x = year, y = ave_delay, size = n)) +
geom_point()
年ごとの平均
flights %>%
left_join(planes, "tailnum", suffix = c(".flight", ".plane")) %>%
group_by(year.plane) %>%
summarise(ave_delay = mean(arr_delay, na.rm = TRUE)) %>%
ggplot(aes(x = year.plane, y = ave_delay)) +
geom_point() +
geom_smooth()
w1 <- weather %>% mutate(id = row_number())
w2 <- flights %>% inner_join(w1) %>%
group_by(id) %>% summarise(ave_delay = mean(arr_delay, na.rm = TRUE), n_flights = n()) %>%
left_join(w1)
w3 <- w2 %>% gather(key = weather_var, value = weather_value, temp:visib) %>%
select(id, n_flights, ave_delay, weather_var, weather_value )
ggplot(w3) +
geom_smooth(aes(x = weather_value, y = ave_delay)) +
facet_wrap(~weather_var, scales = "free")
j13 <- flights %>%
filter(year == 2013, month == 6, day == 13) %>%
group_by(dest) %>%
summarise(ave_delay = mean(arr_delay, na.rm = TRUE), n = n()) %>%
left_join(airports, c("dest" = "faa")) %>%
select(dest, n, ave_delay, lat, lon)
j13 %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point(aes(size = ave_delay, color = ave_delay)) +
coord_quickmap()
https://en.wikipedia.org/wiki/June_12%E2%80%9313,_2013_derecho_series
わからん
flights %>% semi_join(
flights %>% count(tailnum) %>% filter(n >= 100, !is.na(tailnum))
)
## # A tibble: 228,390 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 544 545 -1 1004
## 4 2013 1 1 554 558 -4 740
## 5 2013 1 1 555 600 -5 913
## 6 2013 1 1 557 600 -3 709
## 7 2013 1 1 557 600 -3 838
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 558 600 -2 923
## # … with 228,380 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
fueleconomy::vehicles %>% semi_join(fueleconomy::common)
## # A tibble: 14,531 x 12
## id make model year class trans drive cyl displ fuel hwy cty
## <int> <chr> <chr> <int> <chr> <chr> <chr> <int> <dbl> <chr> <int> <int>
## 1 1833 Acura Integ… 1986 Subc… Auto… Fron… 4 1.6 Regu… 28 22
## 2 1834 Acura Integ… 1986 Subc… Manu… Fron… 4 1.6 Regu… 28 23
## 3 3037 Acura Integ… 1987 Subc… Auto… Fron… 4 1.6 Regu… 28 22
## 4 3038 Acura Integ… 1987 Subc… Manu… Fron… 4 1.6 Regu… 28 23
## 5 4183 Acura Integ… 1988 Subc… Auto… Fron… 4 1.6 Regu… 27 22
## 6 4184 Acura Integ… 1988 Subc… Manu… Fron… 4 1.6 Regu… 28 23
## 7 5303 Acura Integ… 1989 Subc… Auto… Fron… 4 1.6 Regu… 27 22
## 8 5304 Acura Integ… 1989 Subc… Manu… Fron… 4 1.6 Regu… 28 23
## 9 6442 Acura Integ… 1990 Subc… Auto… Fron… 4 1.8 Regu… 24 20
## 10 6443 Acura Integ… 1990 Subc… Manu… Fron… 4 1.8 Regu… 26 21
## # … with 14,521 more rows
library(zoo)
flights %>%
group_by(year, month, day, hour) %>%
summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
ungroup() %>%
mutate(mov_ave = rollapply(delay, 48, mean, na.rm = TRUE, align = "right", fill = NA)) %>%
arrange(mov_ave %>% desc) %>%
select(-delay) %>%
head(10) %>%
kable()
year | month | day | hour | mov_ave |
---|---|---|---|---|
2013 | 7 | 23 | 23 | 54.29261 |
2013 | 7 | 24 | 5 | 54.07572 |
2013 | 6 | 28 | 23 | 53.97791 |
2013 | 7 | 24 | 6 | 53.93745 |
2013 | 6 | 29 | 5 | 53.38781 |
2013 | 7 | 24 | 7 | 53.37115 |
2013 | 7 | 23 | 22 | 53.23787 |
2013 | 7 | 10 | 23 | 53.03761 |
2013 | 7 | 24 | 8 | 52.79645 |
2013 | 6 | 29 | 6 | 52.76755 |
anti_join(flights, airports, by = c("dest" = "faa"))
tell you? What does anti_join(airports, flights, by = c("faa" = "dest"))
tell you?anti_join(flights, airports, by = c("dest" = "faa"))
: airports
テーブルに存在しない飛行場へのフライトanti_join(airports, flights, by = c("faa" = "dest"))
: flights
デーブルに行き先として1度も出てこない飛行場carrier_count <- flights %>% left_join(planes) %>%
group_by(model) %>%
summarise(flown_by = n_distinct(carrier)) %>%
filter(!is.na(model)) %>% arrange(flown_by %>% desc)
kable(carrier_count)
model | flown_by |
---|---|
A321-231 | 2 |
737-8H4 | 1 |
737-924ER | 1 |
737-990ER | 1 |
A320-214 | 1 |
A320-232 | 1 |
A321-211 | 1 |
A330-243 | 1 |
CL-600-2D24 | 1 |
ERJ 190-100 IGW | 1 |
carrier_count %>% filter(flown_by > 1) %>%
inner_join(planes) %>%
inner_join(flights) %>%
select(model, tailnum, carrier) %>% unique() %>%
left_join(airlines) %>%
kable()
model | tailnum | carrier | name |
---|---|---|---|
A321-231 | N567UW | US | US Airways Inc. |
A321-231 | N568UW | US | US Airways Inc. |
A321-231 | N569UW | US | US Airways Inc. |
A321-231 | N570UW | US | US Airways Inc. |
A321-231 | N571UW | US | US Airways Inc. |
A321-231 | N572UW | US | US Airways Inc. |
A321-231 | N903JB | B6 | JetBlue Airways |
A321-231 | N907JB | B6 | JetBlue Airways |
A321-231 | N913JB | B6 | JetBlue Airways |