library(pacman)
p_load(nycflights13, tidyverse)
data(flights)
data(airlines)
data(weather)
data(airports)
data(flights)

Q1. 請利用filter找出1月1號所有航班資料。

filter(flights,month==1,day==1)
## # A tibble: 842 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # … with 832 more rows, and 11 more variables: arr_delay <dbl>, carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Q2. 利用filter找出抵達誤點超過二個小時以上所有航班資料,並將arr_delay顯示在第四欄。

flights %>% filter(arr_delay >= 120) %>%
  select(year:day, arr_delay, everything())
## # A tibble: 10,200 x 19
##     year month   day arr_delay dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>     <dbl>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1       137      811            630       101     1047
##  2  2013     1     1       851      848           1835       853     1001
##  3  2013     1     1       123      957            733       144     1056
##  4  2013     1     1       145     1114            900       134     1447
##  5  2013     1     1       127     1505           1310       115     1638
##  6  2013     1     1       125     1525           1340       105     1831
##  7  2013     1     1       136     1549           1445        64     1912
##  8  2013     1     1       123     1558           1359       119     1718
##  9  2013     1     1       123     1732           1630        62     2028
## 10  2013     1     1       138     1803           1620       103     2008
## # … with 10,190 more rows, and 11 more variables: sched_arr_time <int>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Q3. 利用arrange, slice將抵達誤點時間, 以遞減的順序進行排列,並找出誤點最久的班機資訊(carrier, flight)。

flights %>%
  arrange(desc(arr_delay)) %>% 
  select(year:day, carrier,flight,arr_delay,dep_time,sched_dep_time,dep_delay) %>%
  slice(1)
## # A tibble: 1 x 9
##    year month   day carrier flight arr_delay dep_time sched_dep_time dep_delay
##   <int> <int> <int> <chr>    <int>     <dbl>    <int>          <int>     <dbl>
## 1  2013     1     9 HA          51      1272      641            900      1301

Q4. 那家航空公司誤點(arr_delay)最嚴重?平均時間為多少?

flights %>% 
  group_by(carrier) %>% 
  summarise(arr_delay=mean(arr_delay,na.rm = TRUE)) %>% 
  arrange(desc(arr_delay))
## # A tibble: 16 x 2
##    carrier arr_delay
##    <chr>       <dbl>
##  1 F9         21.9  
##  2 FL         20.1  
##  3 EV         15.8  
##  4 YV         15.6  
##  5 OO         11.9  
##  6 MQ         10.8  
##  7 WN          9.65 
##  8 B6          9.46 
##  9 9E          7.38 
## 10 UA          3.56 
## 11 US          2.13 
## 12 VX          1.76 
## 13 DL          1.64 
## 14 AA          0.364
## 15 HA         -6.92 
## 16 AS         -9.93

Q5. 如果你想要盡可能避開誤點,應該搭乘一天中那個時間的航班?

flights %>% 
  group_by(hour) %>% 
  summarise(arr_delay=mean(arr_delay,na.rm = TRUE)) %>% 
  arrange(desc(arr_delay))
## # A tibble: 20 x 2
##     hour arr_delay
##    <dbl>     <dbl>
##  1    21    18.4  
##  2    20    16.7  
##  3    19    16.7  
##  4    17    16.0  
##  5    22    16.0  
##  6    18    14.8  
##  7    16    12.6  
##  8    15    12.3  
##  9    23    11.8  
## 10    14     9.20 
## 11    13     6.54 
## 12    12     3.49 
## 13    11     1.48 
## 14    10     0.954
## 15     8    -1.11 
## 16     9    -1.45 
## 17     6    -3.38 
## 18     5    -4.80 
## 19     7    -5.30 
## 20     1   NaN

Q6. 請從flights中「依序」選出year:day, hour, origin, dest, tailnum, carrier等欄位,再抽出前100筆記資料,最後和airlines做left_join;換言之,就是把airlines

select(flights,year:day,hour,origin,dest,tailnum,carrier)
## # A tibble: 336,776 x 8
##     year month   day  hour origin dest  tailnum carrier
##    <int> <int> <int> <dbl> <chr>  <chr> <chr>   <chr>  
##  1  2013     1     1     5 EWR    IAH   N14228  UA     
##  2  2013     1     1     5 LGA    IAH   N24211  UA     
##  3  2013     1     1     5 JFK    MIA   N619AA  AA     
##  4  2013     1     1     5 JFK    BQN   N804JB  B6     
##  5  2013     1     1     6 LGA    ATL   N668DN  DL     
##  6  2013     1     1     5 EWR    ORD   N39463  UA     
##  7  2013     1     1     6 EWR    FLL   N516JB  B6     
##  8  2013     1     1     6 LGA    IAD   N829AS  EV     
##  9  2013     1     1     6 JFK    MCO   N593JB  B6     
## 10  2013     1     1     6 LGA    ORD   N3ALAA  AA     
## # … with 336,766 more rows

Q7. 利用left_join將answer06與weather結合,再與airports結合。

Q8. 請計算每個月抵達ALB, BDL, BTV三個機場的班機總次數。

flights %>%
  group_by(year,month) %>%
  mutate()
## # A tibble: 336,776 x 19
## # Groups:   year, month [12]
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # … with 336,766 more rows, and 11 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Q9. 請依始發地、航空公司、及月份(origin, carrier, month) 計算平均起飛延誤(dep_delay)的時間。

flights %>%
  group_by(carrier,month,origin) %>%
  summarise(dep_delay=mean(dep_delay,na.rm = TRUE)) %>%
  arrange(desc(dep_delay))
## # A tibble: 399 x 4
## # Groups:   carrier, month [185]
##    carrier month origin dep_delay
##    <chr>   <int> <chr>      <dbl>
##  1 OO          1 LGA         67  
##  2 OO          8 LGA         64  
##  3 OO          6 EWR         61  
##  4 HA          1 JFK         54.4
##  5 YV          6 LGA         42.8
##  6 FL          7 LGA         41.2
##  7 VX          7 JFK         39.8
##  8 FL          6 LGA         38.8
##  9 WN          6 EWR         36.2
## 10 F9          5 LGA         35.9
## # … with 389 more rows

Q10. 請將weather中temp以直方圖畫出分配 (hist())

hist(weather$temp)