Nhập dữ liệu:

flights=nycflights13::flights
airports=nycflights13::airports

Gọi packages:

Tính chỉ số delay trung bình ở các airports:

flights2 <- flights %>% 
  mutate(flights,tot_delay = arr_delay + dep_delay) %>% 
  group_by(dest) %>% 
  summarise(avg_delay = mean(tot_delay, na.rm = T)) %>% 
  left_join(airports, c("dest"="faa")) %>%
  arrange(desc(avg_delay))

head(flights2)
## # A tibble: 6 × 9
##   dest  avg_delay name                    lat    lon   alt    tz dst   tzone    
##   <chr>     <dbl> <chr>                 <dbl>  <dbl> <dbl> <dbl> <chr> <chr>    
## 1 CAE        75.6 Columbia Metropolitan  33.9  -81.1   236    -5 A     America/…
## 2 TUL        68.5 Tulsa Intl             36.2  -95.9   677    -6 A     America/…
## 3 OKC        59.8 Will Rogers World      35.4  -97.6  1295    -6 A     America/…
## 4 JAC        55.6 Jackson Hole Airport   43.6 -111.   6451    -7 A     America/…
## 5 TYS        52.5 Mc Ghee Tyson          35.8  -84.0   981    -5 A     America/…
## 6 BHM        45.9 Birmingham Intl        33.6  -86.8   644    -6 A     America/…

Vẽ bản đồ US và gán vào object g2:

#Vẽ bản đồ cho nước mỹ (US)
library(ggplot2)
states = map_data("state")

g <- ggplot(data=states)
g2 <- g + geom_polygon(mapping=aes(x=long,y=lat,group=group),color="white",fill="grey")
g2 <- g2 + ggtitle("Map of the USA minus Alaska")
g2

Vẽ biểu đồ biểu thị độ delay trung bình cả năm ở các airports:

flights3 <- filter(flights2, lon > -140)
g3 <- g2 + geom_point(data=flights3, mapping = aes(x = lon, y = lat, color = avg_delay), 
                position = "jitter",size=2.5) + labs(color = "Average Delay") +
                scale_colour_gradient(low = "white",high = "dark red")
print(g3)

Tính theo từng tháng:

flights4 <- flights %>% 
  mutate(flights,tot_delay = arr_delay + dep_delay) %>% 
  group_by(dest,month) %>% 
  summarise(avg_delay = mean(tot_delay, na.rm = T)) %>% 
  left_join(airports, c("dest"="faa")) %>%
  arrange(sort(month))
## `summarise()` has grouped output by 'dest'. You can override using the
## `.groups` argument.
head(flights4)
## # A tibble: 6 × 10
## # Groups:   dest [1]
##   dest  month avg_delay name                   lat   lon   alt    tz dst   tzone
##   <chr> <int>     <dbl> <chr>                <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ABQ       4     14.9  Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…
## 2 ABQ       5      5.13 Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…
## 3 ABQ       6     18.4  Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…
## 4 ABQ       7     41.4  Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…
## 5 ABQ       8     12.4  Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…
## 6 ABQ       9     18.8  Albuquerque Interna…  35.0 -107.  5355    -7 A     Amer…

Vẽ biểu đồ biểu thị độ delay trung bình theo từng tháng ở các airports:

flights5 <- flights4 %>% filter(lon > -140)
library(ggrepel)
g_plot<-for(m in c(1:12)) {
  g4 <- g2 + geom_point(data=flights5 %>% filter(month==m), 
                      mapping = aes(x = lon, 
                                    y = lat, 
                                    color = avg_delay), 
                      position = "jitter",size=2.5) + 
          labs(color = "Average Delay",
               title = stringr::str_glue("Chỉ số trung bình delay của từng\nsân bay trong tháng {m}")) +
          scale_colour_gradient(low = "white",
                                high = "darkred")+
          geom_text_repel(data=head(flights5 %>% 
                                filter(month==m) %>%
                                arrange(desc(avg_delay))),
                    mapping=aes(x = lon, y = lat, label = dest), color ="red", nudge_y = -0.5)+
    
          geom_text_repel(data=tail(flights5 %>% 
                                filter(month==m) %>%
                                arrange(desc(avg_delay)))
                      , mapping=aes(x = lon, y = lat , label = dest), color ="blue",angle=20,nudge_y=-0.5)
    
print(g4)
}

Tổng kết:

Thống kê các airports có độ delay nhiều nhất:

library(flextable)
## 
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
## 
##     compose
#Tổng hợp 6 airports tệ nhất trong 12 tháng thành 1 dataframe:
wo_df<-map_dfr(.x=sort(unique(flights5$month)),
       .f=~head(flights5 %>% 
                  filter(month==.x) %>% 
                  arrange(desc(avg_delay)))
)

#Tính số lần xuất hiện trong bảng của từng airports và xếp hạng từ cao xuống thấp:
wo_df<-wo_df %>% 
  count(dest) %>% 
  arrange(desc(n)) %>% 
  left_join(airports, by=c("dest" = "faa")) %>% 
  select(c(dest,name,n))

#Convert dataframe to have a good presentation:
(wo_df<-flextable(wo_df) %>% 
                  autofit())

dest

name

n

CAE

Columbia Metropolitan

9

TUL

Tulsa Intl

8

OKC

Will Rogers World

7

DSM

Des Moines Intl

6

BHM

Birmingham Intl

4

MSN

Dane Co Rgnl Truax Fld

4

TYS

Mc Ghee Tyson

4

GSP

Greenville-Spartanburg International

3

BDL

Bradley Intl

2

BUR

Bob Hope

2

PVD

Theodore Francis Green State

2

SBN

South Bend Rgnl

2

ALB

Albany Intl

1

AVL

Asheville Regional Airport

1

BGR

Bangor Intl

1

BZN

Gallatin Field

1

CAK

Akron Canton Regional Airport

1

CHO

Charlottesville-Albemarle

1

CHS

Charleston Afb Intl

1

DAY

James M Cox Dayton Intl

1

GRR

Gerald R Ford Intl

1

JAC

Jackson Hole Airport

1

MDW

Chicago Midway Intl

1

MHT

Manchester Regional Airport

1

MKE

General Mitchell Intl

1

OMA

Eppley Afld

1

ORF

Norfolk Intl

1

RIC

Richmond Intl

1

SAV

Savannah Hilton Head Intl

1

SMF

Sacramento Intl

1

TVC

Cherry Capital Airport

1

Thống kê các airports có độ delay ít nhất:

library(flextable)
#Tổng hợp 6 airports tốt nhất trong 12 tháng thành 1 dataframe:
be_df<-map_dfr(.x=sort(unique(flights5$month)),
       .f=~tail(flights5 %>% 
                  filter(month==.x) %>% 
                  arrange(desc(avg_delay)))

)
#Tính số lần xuất hiện trong bảng của từng airports và xếp hạng từ cao xuống thấp:
be_df<-be_df %>% 
  count(dest) %>% 
  arrange(desc(n)) %>% 
  left_join(airports, by=c("dest" = "faa"))%>% 
  select(c(dest,name,n)) 

#Convert dataframe to have a good presentation:
(be_df<-flextable(be_df) %>% 
                  autofit())

dest

name

n

SNA

John Wayne Arpt Orange Co

9

PSP

Palm Springs Intl

5

LGB

Long Beach

4

OAK

Metropolitan Oakland Intl

4

SJC

Norman Y Mineta San Jose Intl

4

ALB

Albany Intl

3

AVL

Asheville Regional Airport

3

BDL

Bradley Intl

3

MYR

Myrtle Beach Intl

3

BZN

Gallatin Field

2

EYW

Key West Intl

2

MVY

Martha\\'s Vineyard

2

SEA

Seattle Tacoma Intl

2

SRQ

Sarasota Bradenton Intl

2

ABQ

Albuquerque International Sunport

1

ACK

Nantucket Mem

1

AUS

Austin Bergstrom Intl

1

BGR

Bangor Intl

1

BHM

Birmingham Intl

1

BUR

Bob Hope

1

DFW

Dallas Fort Worth Intl

1

EGE

Eagle Co Rgnl

1

HDN

Yampa Valley

1

LAS

Mc Carran Intl

1

LAX

Los Angeles Intl

1

LEX

Blue Grass

1

LGA

La Guardia

1

MIA

Miami Intl

1

MTJ

Montrose Regional Airport

1

OMA

Eppley Afld

1

PHL

Philadelphia Intl

1

PVD

Theodore Francis Green State

1

SBN

South Bend Rgnl

1

SFO

San Francisco Intl

1

SLC

Salt Lake City Intl

1

TVC

Cherry Capital Airport

1

TYS

Mc Ghee Tyson

1

XNA

NW Arkansas Regional

1

Visualize the line trips in map:

usairports <- filter(airports, lat < 48.5)
usairports <- filter(usairports, lon > -130)
usairports <- filter(usairports, faa!="JFK") #filter out jfk
jfk <- filter(airports, faa=="JFK") #separate df for jfk
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
#create basemap
map("world", regions=c("usa"), fill=T, col="grey8", bg="grey15", ylim=c(21.0,50.0), xlim=c(-130.0,-65.0))
#overlay airports
points(usairports$lon,usairports$lat, pch=3, cex=0.1, col="chocolate1")