library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(nycflights13)
library(dplyr)
library(tidyr)
airports
## # A tibble: 1,458 x 8
## faa name lat lon alt tz dst tzone
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A America/~
## 2 06A Moton Field Municipal Airport 32.5 -85.7 264 -6 A America/~
## 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A America/~
## 4 06N Randall Airport 41.4 -74.4 523 -5 A America/~
## 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A America/~
## 6 0A9 Elizabethton Municipal Airport 36.4 -82.2 1593 -5 A America/~
## 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A America/~
## 8 0G7 Finger Lakes Regional Airport 42.9 -76.8 492 -5 A America/~
## 9 0P2 Shoestring Aviation Airfield 39.8 -76.6 1000 -5 U America/~
## 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A America/~
## # ... with 1,448 more rows
flights %>%
group_by(dest) %>%
summarise(mean_delay=mean(arr_delay,na.rm=T)) %>%
left_join(airports,by=c("dest"="faa"))%>%
ggplot(aes(lon,lat)) +
borders("state") +
geom_point(aes(color = mean_delay,size = mean_delay, colour="red",hjust=0.5, vjust=1,alpha = 1)) +
coord_quickmap()
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Ignoring unknown aesthetics: hjust, vjust
## Warning: Removed 5 rows containing missing values (geom_point).

flights %>%
group_by(tailnum,carrier) %>%
summarize(n=n())%>%
group_by(tailnum)%>%
summarise(n=n())%>%
filter(n>1)
## `summarise()` has grouped output by 'tailnum'. You can override using the `.groups` argument.
## # A tibble: 18 x 2
## tailnum n
## <chr> <int>
## 1 N146PQ 2
## 2 N153PQ 2
## 3 N176PQ 2
## 4 N181PQ 2
## 5 N197PQ 2
## 6 N200PQ 2
## 7 N228PQ 2
## 8 N232PQ 2
## 9 N933AT 2
## 10 N935AT 2
## 11 N977AT 2
## 12 N978AT 2
## 13 N979AT 2
## 14 N981AT 2
## 15 N989AT 2
## 16 N990AT 2
## 17 N994AT 2
## 18 <NA> 7
flights%>%mutate(time_hours=air_time/60)
## # A tibble: 336,776 x 20
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ... with 336,766 more rows, and 12 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
## # time_hours <dbl>
flights%>%
mutate(time_hours = air_time/60,avg_speed = distance/time_hours)%>%
group_by(tailnum)%>%
summarize(mean_speed = mean(avg_speed))%>%
ggplot(aes(x=mean_speed))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1855 rows containing non-finite values (stat_bin).

library(tidyverse)
library(nycflights13)
library(dplyr)
library(tidyr)
A2 <- flights %>%
filter(complete.cases(arr_time))%>%
group_by(carrier, dest) %>%
summarise(n=n())
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
A1 <- flights %>%
filter(complete.cases(arr_time))%>%
group_by(carrier, dest) %>%
summarise(n=n()) %>%
pivot_wider(names_from = dest,
values_from = n
,values_fill = 0)
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
A2
## # A tibble: 312 x 3
## # Groups: carrier [16]
## carrier dest n
## <chr> <chr> <int>
## 1 9E ATL 56
## 2 9E AUS 2
## 3 9E AVL 10
## 4 9E BNA 452
## 5 9E BOS 853
## 6 9E BTV 2
## 7 9E BUF 790
## 8 9E BWI 816
## 9 9E CAE 3
## 10 9E CHS 332
## # ... with 302 more rows
A1
## # A tibble: 16 x 105
## # Groups: carrier [16]
## carrier ATL AUS AVL BNA BOS BTV BUF BWI CAE CHS CLE
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 9E 56 2 10 452 853 2 790 816 3 332 322
## 2 AA 0 359 0 0 1430 0 0 0 0 0 0
## 3 AS 0 0 0 0 0 0 0 0 0 0 0
## 4 B6 0 744 0 0 4326 1348 2773 0 0 612 0
## 5 DL 10476 353 0 1 962 0 3 0 0 0 0
## 6 EV 1660 0 253 2059 156 1162 1005 339 103 1825 556
## 7 F9 0 0 0 0 0 0 0 0 0 0 0
## 8 FL 2284 0 0 0 0 0 0 0 0 0 0
## 9 HA 0 0 0 0 0 0 0 0 0 0 0
## 10 MQ 2237 0 0 2306 0 0 0 333 0 0 1636
## 11 OO 0 0 0 0 0 0 0 0 0 0 21
## 12 UA 102 664 0 0 3299 0 0 0 0 1 1864
## 13 US 0 0 0 0 4002 0 0 0 0 0 0
## 14 VX 0 0 0 0 0 0 0 0 0 0 0
## 15 WN 58 295 0 1273 0 0 0 200 0 0 0
## 16 YV 0 0 0 0 0 0 0 0 0 0 0
## # ... with 93 more variables: CLT <int>, CMH <int>, CVG <int>, DAY <int>,
## # DCA <int>, DFW <int>, DSM <int>, DTW <int>, GRR <int>, GSO <int>,
## # GSP <int>, IAD <int>, IND <int>, JAX <int>, LEX <int>, MCI <int>,
## # MEM <int>, MHT <int>, MKE <int>, MSN <int>, MSP <int>, MSY <int>,
## # MVY <int>, ORD <int>, ORF <int>, PHL <int>, PIT <int>, RDU <int>,
## # RIC <int>, ROC <int>, RSW <int>, SAT <int>, SDF <int>, SRQ <int>,
## # SYR <int>, TPA <int>, TYS <int>, EGE <int>, FLL <int>, IAH <int>, ...