Utilizando o mesmo conjunto de dados para flights usado na aula, encontre todos os vôos que:
Resp:
library("nycflights13")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#names(nycflights13::flights)
voos_atrasados <-
flights %>%
filter(arr_delay<=2.0)
voos_atrasados
## # A tibble: 204,250 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 544 545 -1 1004 1022
## 2 2013 1 1 554 600 -6 812 837
## 3 2013 1 1 557 600 -3 709 723
## 4 2013 1 1 557 600 -3 838 846
## 5 2013 1 1 558 600 -2 849 851
## 6 2013 1 1 558 600 -2 853 856
## 7 2013 1 1 558 600 -2 923 937
## 8 2013 1 1 559 559 0 702 706
## 9 2013 1 1 559 600 -1 854 902
## 10 2013 1 1 600 600 0 851 858
## # ℹ 204,240 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Resp:
library("nycflights13")
library(dplyr)
#?nycflights13::flights
voos_dest_iah_hou <-
flights %>%
filter(dest == "IAH" | dest == "HOU")
voos_dest_iah_hou
## # A tibble: 9,313 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 623 627 -4 933 932
## 4 2013 1 1 728 732 -4 1041 1038
## 5 2013 1 1 739 739 0 1104 1038
## 6 2013 1 1 908 908 0 1228 1219
## 7 2013 1 1 1028 1026 2 1350 1339
## 8 2013 1 1 1044 1045 -1 1352 1351
## 9 2013 1 1 1114 900 134 1447 1222
## 10 2013 1 1 1205 1200 5 1503 1505
## # ℹ 9,303 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Resp:
library("nycflights13")
library(dplyr)
#?nycflights13::flights
#nycflights13::airlines
voos_comp <-
flights %>%
filter(carrier == "UA" | carrier == "AA" | carrier == "DL")
voos_comp
## # A tibble: 139,504 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 554 600 -6 812 837
## 5 2013 1 1 554 558 -4 740 728
## 6 2013 1 1 558 600 -2 753 745
## 7 2013 1 1 558 600 -2 924 917
## 8 2013 1 1 558 600 -2 923 937
## 9 2013 1 1 559 600 -1 941 910
## 10 2013 1 1 559 600 -1 854 902
## # ℹ 139,494 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Resp:
library("nycflights13")
library(dplyr)
#?nycflights13::flights
voos_verao <-
flights %>%
filter(month == "7" | carrier == "8" | carrier == "9")
voos_verao
## # A tibble: 29,425 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 7 1 1 2029 212 236 2359
## 2 2013 7 1 2 2359 3 344 344
## 3 2013 7 1 29 2245 104 151 1
## 4 2013 7 1 43 2130 193 322 14
## 5 2013 7 1 44 2150 174 300 100
## 6 2013 7 1 46 2051 235 304 2358
## 7 2013 7 1 48 2001 287 308 2305
## 8 2013 7 1 58 2155 183 335 43
## 9 2013 7 1 100 2146 194 327 30
## 10 2013 7 1 100 2245 135 337 135
## # ℹ 29,415 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
voos_atrasados_dest <-
flights %>%
filter(dep_delay == "0" | arr_delay > "2")
voos_atrasados_dest
## # A tibble: 99,113 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 533 529 4 850 830
## 2 2013 1 1 542 540 2 923 850
## 3 2013 1 1 558 600 -2 753 745
## 4 2013 1 1 558 600 -2 924 917
## 5 2013 1 1 559 600 -1 941 910
## 6 2013 1 1 559 559 0 702 706
## 7 2013 1 1 600 600 0 851 858
## 8 2013 1 1 600 600 0 837 825
## 9 2013 1 1 607 607 0 858 915
## 10 2013 1 1 608 600 8 807 735
## # ℹ 99,103 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights$hour)
#min(as.numeric(nycflights13::flights$hour))
voos_periodo <-
flights %>%
filter(hour >= 0 & hour <= 6)
voos_periodo
## # A tibble: 27,905 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # ℹ 27,895 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Classifique os vôos para encontrar os vôos mais atrasados.
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
voos_mais_atrasados <-
flights %>%
arrange(desc(dep_delay))
#?desc
voos_mais_atrasados
## # A tibble: 336,776 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 9 641 900 1301 1242 1530
## 2 2013 6 15 1432 1935 1137 1607 2120
## 3 2013 1 10 1121 1635 1126 1239 1810
## 4 2013 9 20 1139 1845 1014 1457 2210
## 5 2013 7 22 845 1600 1005 1044 1815
## 6 2013 4 10 1100 1900 960 1342 2211
## 7 2013 3 17 2321 810 911 135 1020
## 8 2013 6 27 959 1900 899 1236 2226
## 9 2013 7 22 2257 759 898 121 1026
## 10 2013 12 5 756 1700 896 1058 2020
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Classifique os vôos de forma a encontrar os mais rápidos (velocidade mais alta).
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
voos_mais_rapidos <-
flights %>%
mutate(velocidade = distance / air_time * 60) %>%
arrange(desc(velocidade))
voos_mais_rapidos
## # A tibble: 336,776 × 20
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 5 25 1709 1700 9 1923 1937
## 2 2013 7 2 1558 1513 45 1745 1719
## 3 2013 5 13 2040 2025 15 2225 2226
## 4 2013 3 23 1914 1910 4 2045 2043
## 5 2013 1 12 1559 1600 -1 1849 1917
## 6 2013 11 17 650 655 -5 1059 1150
## 7 2013 2 21 2355 2358 -3 412 438
## 8 2013 11 17 759 800 -1 1212 1255
## 9 2013 11 16 2003 1925 38 17 36
## 10 2013 11 16 2349 2359 -10 402 440
## # ℹ 336,766 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>, velocidade <dbl>
O que acontece se você incluir o nome de uma variável várias vezes dentro de select()?
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
voos_select <-
flights %>%
select(hour, hour, hour, distance, distance)
voos_select
## # A tibble: 336,776 × 2
## hour distance
## <dbl> <dbl>
## 1 5 1400
## 2 5 1416
## 3 5 1089
## 4 5 1576
## 5 6 762
## 6 5 719
## 7 6 1065
## 8 6 229
## 9 6 944
## 10 6 733
## # ℹ 336,766 more rows
Se o nome de uma variável for incluída várias vezes dentro de select(), o efeito é o mesmo do que incluir apenas uma única vez.
Encontre os 10 vôos mais atrados usando arrange() e a função min_rank(). Leia a documentação de min_rank() para aprender sobre ele. Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
#?min_rank
voos_atrasados_rankeados <-
flights %>%
arrange(desc(dep_delay)) %>%
mutate(rank = min_rank(dep_delay))
#head(voos_atrasados_rankeados)
voos_atrasados_top_10 <-
voos_atrasados_rankeados %>%
select(dep_delay, rank)
head(voos_atrasados_top_10, 10)
## # A tibble: 10 × 2
## dep_delay rank
## <dbl> <int>
## 1 1301 328521
## 2 1137 328520
## 3 1126 328519
## 4 1014 328518
## 5 1005 328517
## 6 960 328516
## 7 911 328515
## 8 899 328514
## 9 898 328513
## 10 896 328512
Usando mutate() crie uma coluna com a média da variável tempo em ar (air time). Você vai obter uma nova coluna de constante com a variável desejada.
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
flights_com_air_time <-
flights %>%
mutate(media_air_time = mean(air_time, na.rm = TRUE))
head(flights_com_air_time)
## # A tibble: 6 × 20
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>, media_air_time <dbl>
Verifique a coluna com dep_time. Ela não fornece uma variável em tempo contínuo. Converta essa coluna para uma representação mais apropriada de número de minutos a partir da meia-noite.
Resp:
OBS: dep_time: Actual departure time (format HHMM or HMM), local tz.
library("nycflights13")
library(dplyr)
summary(nycflights13::flights$dep_time)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 907 1401 1349 1744 2400 8255
departure_time <- nycflights13::flights$dep_time
departure_time_hours <- departure_time %/%100
departure_time_minutes <- departure_time %%100
departure_time_in_minutes <- departure_time_hours*60 + departure_time_minutes
flights_com_dep_time_apropriado <-
flights %>%
transform(dep_time = departure_time_in_minutes)
head(flights_com_dep_time_apropriado)
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## 1 2013 1 1 317 515 2 830 819
## 2 2013 1 1 333 529 4 850 830
## 3 2013 1 1 342 540 2 923 850
## 4 2013 1 1 344 545 -1 1004 1022
## 5 2013 1 1 354 600 -6 812 837
## 6 2013 1 1 354 558 -4 740 728
## arr_delay carrier flight tailnum origin dest air_time distance hour minute
## 1 11 UA 1545 N14228 EWR IAH 227 1400 5 15
## 2 20 UA 1714 N24211 LGA IAH 227 1416 5 29
## 3 33 AA 1141 N619AA JFK MIA 160 1089 5 40
## 4 -18 B6 725 N804JB JFK BQN 183 1576 5 45
## 5 -25 DL 461 N668DN LGA ATL 116 762 6 0
## 6 12 UA 1696 N39463 EWR ORD 150 719 5 58
## time_hour
## 1 2013-01-01 05:00:00
## 2 2013-01-01 05:00:00
## 3 2013-01-01 05:00:00
## 4 2013-01-01 05:00:00
## 5 2013-01-01 06:00:00
## 6 2013-01-01 05:00:00
Verifique qual companhia área tem os piores atrasos.
Resp:
library("nycflights13")
library(dplyr)
#head(nycflights13::flights)
voos_atrasados_rankeados <-
flights %>%
arrange(desc(dep_delay)) %>%
mutate(rank = min_rank(dep_delay))
voos_atrasados_top_3 <-
voos_atrasados_rankeados %>%
select(dep_delay, carrier, rank)
head(voos_atrasados_top_3, 10)
## # A tibble: 10 × 3
## dep_delay carrier rank
## <dbl> <chr> <int>
## 1 1301 HA 328521
## 2 1137 MQ 328520
## 3 1126 MQ 328519
## 4 1014 AA 328518
## 5 1005 MQ 328517
## 6 960 DL 328516
## 7 911 DL 328515
## 8 899 DL 328514
## 9 898 DL 328513
## 10 896 AA 328512
voos_atrasados_top_3 <- unique(voos_atrasados_top_3$carrier)
voos_atrasados_top_3
## [1] "HA" "MQ" "AA" "DL" "F9" "9E" "VX" "FL" "EV" "B6" "US" "UA" "WN" "YV" "AS"
## [16] "OO"
nycflights13::airlines
## # A tibble: 16 × 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## 7 F9 Frontier Airlines Inc.
## 8 FL AirTran Airways Corporation
## 9 HA Hawaiian Airlines Inc.
## 10 MQ Envoy Air
## 11 OO SkyWest Airlines Inc.
## 12 UA United Air Lines Inc.
## 13 US US Airways Inc.
## 14 VX Virgin America
## 15 WN Southwest Airlines Co.
## 16 YV Mesa Airlines Inc.
Companhias com piores atrasos:
1o) Hawaiian Airlines Inc. (HA)
2o) Envoy Air (MQ)
3o) American Airlines Inc. (AA)