library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nycflights13)
library(dplyr)
library(tidyr)

airports
## # A tibble: 1,458 x 8
##    faa   name                             lat    lon   alt    tz dst   tzone    
##    <chr> <chr>                          <dbl>  <dbl> <dbl> <dbl> <chr> <chr>    
##  1 04G   Lansdowne Airport               41.1  -80.6  1044    -5 A     America/~
##  2 06A   Moton Field Municipal Airport   32.5  -85.7   264    -6 A     America/~
##  3 06C   Schaumburg Regional             42.0  -88.1   801    -6 A     America/~
##  4 06N   Randall Airport                 41.4  -74.4   523    -5 A     America/~
##  5 09J   Jekyll Island Airport           31.1  -81.4    11    -5 A     America/~
##  6 0A9   Elizabethton Municipal Airport  36.4  -82.2  1593    -5 A     America/~
##  7 0G6   Williams County Airport         41.5  -84.5   730    -5 A     America/~
##  8 0G7   Finger Lakes Regional Airport   42.9  -76.8   492    -5 A     America/~
##  9 0P2   Shoestring Aviation Airfield    39.8  -76.6  1000    -5 U     America/~
## 10 0S9   Jefferson County Intl           48.1 -123.    108    -8 A     America/~
## # ... with 1,448 more rows
flights %>%
  group_by(dest) %>%
  summarise(mean_delay=mean(arr_delay,na.rm=T)) %>%
  left_join(airports,by=c("dest"="faa"))%>%
  ggplot(aes(lon,lat)) +
  borders("state") +
  geom_point(aes(color = mean_delay,size = mean_delay, colour="red",hjust=0.5, vjust=1,alpha = 1)) +
  coord_quickmap()
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Ignoring unknown aesthetics: hjust, vjust
## Warning: Removed 5 rows containing missing values (geom_point).

flights %>%
  group_by(tailnum,carrier) %>%
  summarize(n=n())%>%
  group_by(tailnum)%>%
  summarise(n=n())%>%
  filter(n>1)
## `summarise()` has grouped output by 'tailnum'. You can override using the `.groups` argument.
## # A tibble: 18 x 2
##    tailnum     n
##    <chr>   <int>
##  1 N146PQ      2
##  2 N153PQ      2
##  3 N176PQ      2
##  4 N181PQ      2
##  5 N197PQ      2
##  6 N200PQ      2
##  7 N228PQ      2
##  8 N232PQ      2
##  9 N933AT      2
## 10 N935AT      2
## 11 N977AT      2
## 12 N978AT      2
## 13 N979AT      2
## 14 N981AT      2
## 15 N989AT      2
## 16 N990AT      2
## 17 N994AT      2
## 18 <NA>        7
flights%>%mutate(time_hours=air_time/60)
## # A tibble: 336,776 x 20
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ... with 336,766 more rows, and 12 more variables: arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## #   time_hours <dbl>
flights%>%
  mutate(time_hours = air_time/60,avg_speed = distance/time_hours)%>%
  group_by(tailnum)%>%
  summarize(mean_speed = mean(avg_speed))%>%
  ggplot(aes(x=mean_speed))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1855 rows containing non-finite values (stat_bin).

library(tidyverse)
library(nycflights13)
library(dplyr)
library(tidyr)

A2 <- flights %>%
  filter(complete.cases(arr_time))%>%
  group_by(carrier, dest) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
A1 <- flights %>%
  filter(complete.cases(arr_time))%>%
  group_by(carrier, dest) %>%
  summarise(n=n()) %>%
  pivot_wider(names_from = dest,
              values_from = n
              ,values_fill = 0)
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
A2
## # A tibble: 312 x 3
## # Groups:   carrier [16]
##    carrier dest      n
##    <chr>   <chr> <int>
##  1 9E      ATL      56
##  2 9E      AUS       2
##  3 9E      AVL      10
##  4 9E      BNA     452
##  5 9E      BOS     853
##  6 9E      BTV       2
##  7 9E      BUF     790
##  8 9E      BWI     816
##  9 9E      CAE       3
## 10 9E      CHS     332
## # ... with 302 more rows
A1
## # A tibble: 16 x 105
## # Groups:   carrier [16]
##    carrier   ATL   AUS   AVL   BNA   BOS   BTV   BUF   BWI   CAE   CHS   CLE
##    <chr>   <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
##  1 9E         56     2    10   452   853     2   790   816     3   332   322
##  2 AA          0   359     0     0  1430     0     0     0     0     0     0
##  3 AS          0     0     0     0     0     0     0     0     0     0     0
##  4 B6          0   744     0     0  4326  1348  2773     0     0   612     0
##  5 DL      10476   353     0     1   962     0     3     0     0     0     0
##  6 EV       1660     0   253  2059   156  1162  1005   339   103  1825   556
##  7 F9          0     0     0     0     0     0     0     0     0     0     0
##  8 FL       2284     0     0     0     0     0     0     0     0     0     0
##  9 HA          0     0     0     0     0     0     0     0     0     0     0
## 10 MQ       2237     0     0  2306     0     0     0   333     0     0  1636
## 11 OO          0     0     0     0     0     0     0     0     0     0    21
## 12 UA        102   664     0     0  3299     0     0     0     0     1  1864
## 13 US          0     0     0     0  4002     0     0     0     0     0     0
## 14 VX          0     0     0     0     0     0     0     0     0     0     0
## 15 WN         58   295     0  1273     0     0     0   200     0     0     0
## 16 YV          0     0     0     0     0     0     0     0     0     0     0
## # ... with 93 more variables: CLT <int>, CMH <int>, CVG <int>, DAY <int>,
## #   DCA <int>, DFW <int>, DSM <int>, DTW <int>, GRR <int>, GSO <int>,
## #   GSP <int>, IAD <int>, IND <int>, JAX <int>, LEX <int>, MCI <int>,
## #   MEM <int>, MHT <int>, MKE <int>, MSN <int>, MSP <int>, MSY <int>,
## #   MVY <int>, ORD <int>, ORF <int>, PHL <int>, PIT <int>, RDU <int>,
## #   RIC <int>, ROC <int>, RSW <int>, SAT <int>, SDF <int>, SRQ <int>,
## #   SYR <int>, TPA <int>, TYS <int>, EGE <int>, FLL <int>, IAH <int>, ...