flights=nycflights13::flights
airports=nycflights13::airports
flights2 <- flights %>%
mutate(flights,tot_delay = arr_delay + dep_delay) %>%
group_by(dest) %>%
summarise(avg_delay = mean(tot_delay, na.rm = T)) %>%
left_join(airports, c("dest"="faa")) %>%
arrange(desc(avg_delay))
head(flights2)
## # A tibble: 6 × 9
## dest avg_delay name lat lon alt tz dst tzone
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 CAE 75.6 Columbia Metropolitan 33.9 -81.1 236 -5 A America/…
## 2 TUL 68.5 Tulsa Intl 36.2 -95.9 677 -6 A America/…
## 3 OKC 59.8 Will Rogers World 35.4 -97.6 1295 -6 A America/…
## 4 JAC 55.6 Jackson Hole Airport 43.6 -111. 6451 -7 A America/…
## 5 TYS 52.5 Mc Ghee Tyson 35.8 -84.0 981 -5 A America/…
## 6 BHM 45.9 Birmingham Intl 33.6 -86.8 644 -6 A America/…
#Vẽ bản đồ cho nước mỹ (US)
library(ggplot2)
states = map_data("state")
g <- ggplot(data=states)
g2 <- g + geom_polygon(mapping=aes(x=long,y=lat,group=group),color="white",fill="grey")
g2 <- g2 + ggtitle("Map of the USA minus Alaska")
g2
flights3 <- filter(flights2, lon > -140)
g3 <- g2 + geom_point(data=flights3, mapping = aes(x = lon, y = lat, color = avg_delay),
position = "jitter",size=2.5) + labs(color = "Average Delay") +
scale_colour_gradient(low = "white",high = "dark red")
print(g3)
flights4 <- flights %>%
mutate(flights,tot_delay = arr_delay + dep_delay) %>%
group_by(dest,month) %>%
summarise(avg_delay = mean(tot_delay, na.rm = T)) %>%
left_join(airports, c("dest"="faa")) %>%
arrange(sort(month))
## `summarise()` has grouped output by 'dest'. You can override using the
## `.groups` argument.
head(flights4)
## # A tibble: 6 × 10
## # Groups: dest [1]
## dest month avg_delay name lat lon alt tz dst tzone
## <chr> <int> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ABQ 4 14.9 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
## 2 ABQ 5 5.13 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
## 3 ABQ 6 18.4 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
## 4 ABQ 7 41.4 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
## 5 ABQ 8 12.4 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
## 6 ABQ 9 18.8 Albuquerque Interna… 35.0 -107. 5355 -7 A Amer…
flights5 <- flights4 %>% filter(lon > -140)
library(ggrepel)
g_plot<-for(m in c(1:12)) {
g4 <- g2 + geom_point(data=flights5 %>% filter(month==m),
mapping = aes(x = lon,
y = lat,
color = avg_delay),
position = "jitter",size=2.5) +
labs(color = "Average Delay",
title = stringr::str_glue("Chỉ số trung bình delay của từng\nsân bay trong tháng {m}")) +
scale_colour_gradient(low = "white",
high = "darkred")+
geom_text_repel(data=head(flights5 %>%
filter(month==m) %>%
arrange(desc(avg_delay))),
mapping=aes(x = lon, y = lat, label = dest), color ="red", nudge_y = -0.5)+
geom_text_repel(data=tail(flights5 %>%
filter(month==m) %>%
arrange(desc(avg_delay)))
, mapping=aes(x = lon, y = lat , label = dest), color ="blue",angle=20,nudge_y=-0.5)
print(g4)
}
library(flextable)
##
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
##
## compose
#Tổng hợp 6 airports tệ nhất trong 12 tháng thành 1 dataframe:
wo_df<-map_dfr(.x=sort(unique(flights5$month)),
.f=~head(flights5 %>%
filter(month==.x) %>%
arrange(desc(avg_delay)))
)
#Tính số lần xuất hiện trong bảng của từng airports và xếp hạng từ cao xuống thấp:
wo_df<-wo_df %>%
count(dest) %>%
arrange(desc(n)) %>%
left_join(airports, by=c("dest" = "faa")) %>%
select(c(dest,name,n))
#Convert dataframe to have a good presentation:
(wo_df<-flextable(wo_df) %>%
autofit())
dest | name | n |
|---|---|---|
CAE | Columbia Metropolitan | 9 |
TUL | Tulsa Intl | 8 |
OKC | Will Rogers World | 7 |
DSM | Des Moines Intl | 6 |
BHM | Birmingham Intl | 4 |
MSN | Dane Co Rgnl Truax Fld | 4 |
TYS | Mc Ghee Tyson | 4 |
GSP | Greenville-Spartanburg International | 3 |
BDL | Bradley Intl | 2 |
BUR | Bob Hope | 2 |
PVD | Theodore Francis Green State | 2 |
SBN | South Bend Rgnl | 2 |
ALB | Albany Intl | 1 |
AVL | Asheville Regional Airport | 1 |
BGR | Bangor Intl | 1 |
BZN | Gallatin Field | 1 |
CAK | Akron Canton Regional Airport | 1 |
CHO | Charlottesville-Albemarle | 1 |
CHS | Charleston Afb Intl | 1 |
DAY | James M Cox Dayton Intl | 1 |
GRR | Gerald R Ford Intl | 1 |
JAC | Jackson Hole Airport | 1 |
MDW | Chicago Midway Intl | 1 |
MHT | Manchester Regional Airport | 1 |
MKE | General Mitchell Intl | 1 |
OMA | Eppley Afld | 1 |
ORF | Norfolk Intl | 1 |
RIC | Richmond Intl | 1 |
SAV | Savannah Hilton Head Intl | 1 |
SMF | Sacramento Intl | 1 |
TVC | Cherry Capital Airport | 1 |
library(flextable)
#Tổng hợp 6 airports tốt nhất trong 12 tháng thành 1 dataframe:
be_df<-map_dfr(.x=sort(unique(flights5$month)),
.f=~tail(flights5 %>%
filter(month==.x) %>%
arrange(desc(avg_delay)))
)
#Tính số lần xuất hiện trong bảng của từng airports và xếp hạng từ cao xuống thấp:
be_df<-be_df %>%
count(dest) %>%
arrange(desc(n)) %>%
left_join(airports, by=c("dest" = "faa"))%>%
select(c(dest,name,n))
#Convert dataframe to have a good presentation:
(be_df<-flextable(be_df) %>%
autofit())
dest | name | n |
|---|---|---|
SNA | John Wayne Arpt Orange Co | 9 |
PSP | Palm Springs Intl | 5 |
LGB | Long Beach | 4 |
OAK | Metropolitan Oakland Intl | 4 |
SJC | Norman Y Mineta San Jose Intl | 4 |
ALB | Albany Intl | 3 |
AVL | Asheville Regional Airport | 3 |
BDL | Bradley Intl | 3 |
MYR | Myrtle Beach Intl | 3 |
BZN | Gallatin Field | 2 |
EYW | Key West Intl | 2 |
MVY | Martha\\'s Vineyard | 2 |
SEA | Seattle Tacoma Intl | 2 |
SRQ | Sarasota Bradenton Intl | 2 |
ABQ | Albuquerque International Sunport | 1 |
ACK | Nantucket Mem | 1 |
AUS | Austin Bergstrom Intl | 1 |
BGR | Bangor Intl | 1 |
BHM | Birmingham Intl | 1 |
BUR | Bob Hope | 1 |
DFW | Dallas Fort Worth Intl | 1 |
EGE | Eagle Co Rgnl | 1 |
HDN | Yampa Valley | 1 |
LAS | Mc Carran Intl | 1 |
LAX | Los Angeles Intl | 1 |
LEX | Blue Grass | 1 |
LGA | La Guardia | 1 |
MIA | Miami Intl | 1 |
MTJ | Montrose Regional Airport | 1 |
OMA | Eppley Afld | 1 |
PHL | Philadelphia Intl | 1 |
PVD | Theodore Francis Green State | 1 |
SBN | South Bend Rgnl | 1 |
SFO | San Francisco Intl | 1 |
SLC | Salt Lake City Intl | 1 |
TVC | Cherry Capital Airport | 1 |
TYS | Mc Ghee Tyson | 1 |
XNA | NW Arkansas Regional | 1 |
usairports <- filter(airports, lat < 48.5)
usairports <- filter(usairports, lon > -130)
usairports <- filter(usairports, faa!="JFK") #filter out jfk
jfk <- filter(airports, faa=="JFK") #separate df for jfk
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
#create basemap
map("world", regions=c("usa"), fill=T, col="grey8", bg="grey15", ylim=c(21.0,50.0), xlim=c(-130.0,-65.0))
#overlay airports
points(usairports$lon,usairports$lat, pch=3, cex=0.1, col="chocolate1")