Questão 1

Utilizando o mesmo conjunto de dados para flights usado na aula, encontre todos os vôos que:

  1. Tiveram um atraso na chegada (arrival) de duas horas ou mais.

Resp:

library("nycflights13")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#names(nycflights13::flights)
voos_atrasados <- 
  flights %>% 
  filter(arr_delay<=2.0)

voos_atrasados
## # A tibble: 204,250 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      544            545        -1     1004           1022
##  2  2013     1     1      554            600        -6      812            837
##  3  2013     1     1      557            600        -3      709            723
##  4  2013     1     1      557            600        -3      838            846
##  5  2013     1     1      558            600        -2      849            851
##  6  2013     1     1      558            600        -2      853            856
##  7  2013     1     1      558            600        -2      923            937
##  8  2013     1     1      559            559         0      702            706
##  9  2013     1     1      559            600        -1      854            902
## 10  2013     1     1      600            600         0      851            858
## # ℹ 204,240 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
  1. Voaram com destino a Houston (IAH ou HOU)

Resp:

library("nycflights13")
library(dplyr)
#?nycflights13::flights

voos_dest_iah_hou <- 
  flights %>% 
  filter(dest == "IAH" | dest == "HOU")

voos_dest_iah_hou
## # A tibble: 9,313 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      623            627        -4      933            932
##  4  2013     1     1      728            732        -4     1041           1038
##  5  2013     1     1      739            739         0     1104           1038
##  6  2013     1     1      908            908         0     1228           1219
##  7  2013     1     1     1028           1026         2     1350           1339
##  8  2013     1     1     1044           1045        -1     1352           1351
##  9  2013     1     1     1114            900       134     1447           1222
## 10  2013     1     1     1205           1200         5     1503           1505
## # ℹ 9,303 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
  1. Foram realizados pelas companhias aéreas United, American ou Delta Airlines.

Resp:

library("nycflights13")
library(dplyr)
#?nycflights13::flights
#nycflights13::airlines

voos_comp <-
  flights %>% 
  filter(carrier == "UA" | carrier == "AA" | carrier == "DL")

voos_comp
## # A tibble: 139,504 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      554            600        -6      812            837
##  5  2013     1     1      554            558        -4      740            728
##  6  2013     1     1      558            600        -2      753            745
##  7  2013     1     1      558            600        -2      924            917
##  8  2013     1     1      558            600        -2      923            937
##  9  2013     1     1      559            600        -1      941            910
## 10  2013     1     1      559            600        -1      854            902
## # ℹ 139,494 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
  1. Partiram no verão (julho, agosto e setembro).

Resp:

library("nycflights13")
library(dplyr)
#?nycflights13::flights


voos_verao <-
  flights %>% 
  filter(month == "7" | carrier == "8" | carrier == "9")

voos_verao
## # A tibble: 29,425 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     7     1        1           2029       212      236           2359
##  2  2013     7     1        2           2359         3      344            344
##  3  2013     7     1       29           2245       104      151              1
##  4  2013     7     1       43           2130       193      322             14
##  5  2013     7     1       44           2150       174      300            100
##  6  2013     7     1       46           2051       235      304           2358
##  7  2013     7     1       48           2001       287      308           2305
##  8  2013     7     1       58           2155       183      335             43
##  9  2013     7     1      100           2146       194      327             30
## 10  2013     7     1      100           2245       135      337            135
## # ℹ 29,415 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
  1. Não partiram atrasados, porém chegaram com mais de duas horas de atraso ao destino.

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

voos_atrasados_dest <-
  flights %>% 
  filter(dep_delay == "0" | arr_delay > "2")

voos_atrasados_dest
## # A tibble: 99,113 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      533            529         4      850            830
##  2  2013     1     1      542            540         2      923            850
##  3  2013     1     1      558            600        -2      753            745
##  4  2013     1     1      558            600        -2      924            917
##  5  2013     1     1      559            600        -1      941            910
##  6  2013     1     1      559            559         0      702            706
##  7  2013     1     1      600            600         0      851            858
##  8  2013     1     1      600            600         0      837            825
##  9  2013     1     1      607            607         0      858            915
## 10  2013     1     1      608            600         8      807            735
## # ℹ 99,103 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>
  1. Partiram entre meia noite e 6 h da manhã (inclusive).

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights$hour)
#min(as.numeric(nycflights13::flights$hour))

voos_periodo <-
  flights %>% 
  filter(hour >= 0 & hour <= 6)

voos_periodo
## # A tibble: 27,905 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 27,895 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Questão 2

Classifique os vôos para encontrar os vôos mais atrasados.

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

voos_mais_atrasados <-
  flights %>% 
  arrange(desc(dep_delay))
#?desc
voos_mais_atrasados
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     9      641            900      1301     1242           1530
##  2  2013     6    15     1432           1935      1137     1607           2120
##  3  2013     1    10     1121           1635      1126     1239           1810
##  4  2013     9    20     1139           1845      1014     1457           2210
##  5  2013     7    22      845           1600      1005     1044           1815
##  6  2013     4    10     1100           1900       960     1342           2211
##  7  2013     3    17     2321            810       911      135           1020
##  8  2013     6    27      959           1900       899     1236           2226
##  9  2013     7    22     2257            759       898      121           1026
## 10  2013    12     5      756           1700       896     1058           2020
## # ℹ 336,766 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>

Questão 3

Classifique os vôos de forma a encontrar os mais rápidos (velocidade mais alta).

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

voos_mais_rapidos <-
  flights %>% 
  mutate(velocidade = distance / air_time * 60) %>%
  arrange(desc(velocidade))
  
voos_mais_rapidos
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     5    25     1709           1700         9     1923           1937
##  2  2013     7     2     1558           1513        45     1745           1719
##  3  2013     5    13     2040           2025        15     2225           2226
##  4  2013     3    23     1914           1910         4     2045           2043
##  5  2013     1    12     1559           1600        -1     1849           1917
##  6  2013    11    17      650            655        -5     1059           1150
##  7  2013     2    21     2355           2358        -3      412            438
##  8  2013    11    17      759            800        -1     1212           1255
##  9  2013    11    16     2003           1925        38       17             36
## 10  2013    11    16     2349           2359       -10      402            440
## # ℹ 336,766 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, velocidade <dbl>

Questão 4

O que acontece se você incluir o nome de uma variável várias vezes dentro de select()?

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

voos_select <-
  flights %>% 
  select(hour, hour, hour, distance, distance)
  
voos_select
## # A tibble: 336,776 × 2
##     hour distance
##    <dbl>    <dbl>
##  1     5     1400
##  2     5     1416
##  3     5     1089
##  4     5     1576
##  5     6      762
##  6     5      719
##  7     6     1065
##  8     6      229
##  9     6      944
## 10     6      733
## # ℹ 336,766 more rows

Se o nome de uma variável for incluída várias vezes dentro de select(), o efeito é o mesmo do que incluir apenas uma única vez.

Questão 5

Encontre os 10 vôos mais atrados usando arrange() e a função min_rank(). Leia a documentação de min_rank() para aprender sobre ele. Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

#?min_rank

voos_atrasados_rankeados <-
  flights %>% 
  arrange(desc(dep_delay)) %>%
  mutate(rank = min_rank(dep_delay))

#head(voos_atrasados_rankeados)

voos_atrasados_top_10 <- 
  voos_atrasados_rankeados %>%
  select(dep_delay, rank)
  
head(voos_atrasados_top_10, 10)
## # A tibble: 10 × 2
##    dep_delay   rank
##        <dbl>  <int>
##  1      1301 328521
##  2      1137 328520
##  3      1126 328519
##  4      1014 328518
##  5      1005 328517
##  6       960 328516
##  7       911 328515
##  8       899 328514
##  9       898 328513
## 10       896 328512

Questão 6

Usando mutate() crie uma coluna com a média da variável tempo em ar (air time). Você vai obter uma nova coluna de constante com a variável desejada.

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)

flights_com_air_time <-
  flights %>% 
  mutate(media_air_time = mean(air_time, na.rm = TRUE))

head(flights_com_air_time)
## # A tibble: 6 × 20
##    year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##   <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
## 1  2013     1     1      517            515         2      830            819
## 2  2013     1     1      533            529         4      850            830
## 3  2013     1     1      542            540         2      923            850
## 4  2013     1     1      544            545        -1     1004           1022
## 5  2013     1     1      554            600        -6      812            837
## 6  2013     1     1      554            558        -4      740            728
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, media_air_time <dbl>

Questão 7

Verifique a coluna com dep_time. Ela não fornece uma variável em tempo contínuo. Converta essa coluna para uma representação mais apropriada de número de minutos a partir da meia-noite.

Resp:

OBS: dep_time: Actual departure time (format HHMM or HMM), local tz.

library("nycflights13")
library(dplyr)
summary(nycflights13::flights$dep_time)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1     907    1401    1349    1744    2400    8255
departure_time <- nycflights13::flights$dep_time

departure_time_hours <- departure_time %/%100
departure_time_minutes <- departure_time %%100

departure_time_in_minutes <- departure_time_hours*60 + departure_time_minutes

flights_com_dep_time_apropriado <-
  flights %>% 
  transform(dep_time = departure_time_in_minutes)

head(flights_com_dep_time_apropriado)
##   year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## 1 2013     1   1      317            515         2      830            819
## 2 2013     1   1      333            529         4      850            830
## 3 2013     1   1      342            540         2      923            850
## 4 2013     1   1      344            545        -1     1004           1022
## 5 2013     1   1      354            600        -6      812            837
## 6 2013     1   1      354            558        -4      740            728
##   arr_delay carrier flight tailnum origin dest air_time distance hour minute
## 1        11      UA   1545  N14228    EWR  IAH      227     1400    5     15
## 2        20      UA   1714  N24211    LGA  IAH      227     1416    5     29
## 3        33      AA   1141  N619AA    JFK  MIA      160     1089    5     40
## 4       -18      B6    725  N804JB    JFK  BQN      183     1576    5     45
## 5       -25      DL    461  N668DN    LGA  ATL      116      762    6      0
## 6        12      UA   1696  N39463    EWR  ORD      150      719    5     58
##             time_hour
## 1 2013-01-01 05:00:00
## 2 2013-01-01 05:00:00
## 3 2013-01-01 05:00:00
## 4 2013-01-01 05:00:00
## 5 2013-01-01 06:00:00
## 6 2013-01-01 05:00:00

Questão 8

Verifique qual companhia área tem os piores atrasos.

Resp:

library("nycflights13")
library(dplyr)
#head(nycflights13::flights)


voos_atrasados_rankeados <-
  flights %>% 
  arrange(desc(dep_delay)) %>%
  mutate(rank = min_rank(dep_delay))

voos_atrasados_top_3 <- 
  voos_atrasados_rankeados %>%
  select(dep_delay, carrier, rank)

head(voos_atrasados_top_3, 10)
## # A tibble: 10 × 3
##    dep_delay carrier   rank
##        <dbl> <chr>    <int>
##  1      1301 HA      328521
##  2      1137 MQ      328520
##  3      1126 MQ      328519
##  4      1014 AA      328518
##  5      1005 MQ      328517
##  6       960 DL      328516
##  7       911 DL      328515
##  8       899 DL      328514
##  9       898 DL      328513
## 10       896 AA      328512
voos_atrasados_top_3 <-  unique(voos_atrasados_top_3$carrier)

voos_atrasados_top_3
##  [1] "HA" "MQ" "AA" "DL" "F9" "9E" "VX" "FL" "EV" "B6" "US" "UA" "WN" "YV" "AS"
## [16] "OO"
nycflights13::airlines
## # A tibble: 16 × 2
##    carrier name                       
##    <chr>   <chr>                      
##  1 9E      Endeavor Air Inc.          
##  2 AA      American Airlines Inc.     
##  3 AS      Alaska Airlines Inc.       
##  4 B6      JetBlue Airways            
##  5 DL      Delta Air Lines Inc.       
##  6 EV      ExpressJet Airlines Inc.   
##  7 F9      Frontier Airlines Inc.     
##  8 FL      AirTran Airways Corporation
##  9 HA      Hawaiian Airlines Inc.     
## 10 MQ      Envoy Air                  
## 11 OO      SkyWest Airlines Inc.      
## 12 UA      United Air Lines Inc.      
## 13 US      US Airways Inc.            
## 14 VX      Virgin America             
## 15 WN      Southwest Airlines Co.     
## 16 YV      Mesa Airlines Inc.

Companhias com piores atrasos:

1o) Hawaiian Airlines Inc. (HA)

2o) Envoy Air (MQ)

3o) American Airlines Inc. (AA)