Baixando e carregando os pacotes a serem usados no desenvolvimento:

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("nycflights13")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(nycflights13)
#renomeando o banco de dados:
flights <- nycflights13::flights
flights
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
?flights
  1. Utilizando o mesmo conjunto de dados para flights usado na aula, encontre todos os vôos que:
  1. Tiveram um atraso na chegada (arrival) de duas horas ou mais.
#Usando a função filter:
a = filter(flights, arr_delay >= 120)
a
## # A tibble: 10,200 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      811        630     101    1047     830     137 MQ     
##  2  2013     1     1      848       1835     853    1001    1950     851 MQ     
##  3  2013     1     1      957        733     144    1056     853     123 UA     
##  4  2013     1     1     1114        900     134    1447    1222     145 UA     
##  5  2013     1     1     1505       1310     115    1638    1431     127 EV     
##  6  2013     1     1     1525       1340     105    1831    1626     125 B6     
##  7  2013     1     1     1549       1445      64    1912    1656     136 EV     
##  8  2013     1     1     1558       1359     119    1718    1515     123 EV     
##  9  2013     1     1     1732       1630      62    2028    1825     123 EV     
## 10  2013     1     1     1803       1620     103    2008    1750     138 MQ     
## # … with 10,190 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Voaram com destino a Houston (IAH ou HOU)
b = filter(flights, dest == c("IAH", "HOU"))
b
## # A tibble: 4,655 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      623        627      -4     933     932       1 UA     
##  3  2013     1     1     1028       1026       2    1350    1339      11 UA     
##  4  2013     1     1     1114        900     134    1447    1222     145 UA     
##  5  2013     1     1     1208       1158      10    1540    1502      38 B6     
##  6  2013     1     1     1306       1300       6    1622    1610      12 WN     
##  7  2013     1     1     1527       1515      12    1854    1810      44 UA     
##  8  2013     1     1     1620       1620       0    1945    1922      23 UA     
##  9  2013     1     1     1725       1720       5    2045    2021      24 UA     
## 10  2013     1     1     1855       1848       7    2203    2200       3 UA     
## # … with 4,645 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Foram realizados pelas companhias áreas United, American ou Delta Airlines.
#United Airlines = UA, American Arlines = AA, Delta Airlines = DL
c = filter(flights, carrier == c("UA","AA", "DL"))
## Warning in carrier == c("UA", "AA", "DL"): longer object length is not a
## multiple of shorter object length
c
## # A tibble: 46,913 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      558        600      -2     924     917       7 UA     
##  3  2013     1     1      602        610      -8     812     820      -8 DL     
##  4  2013     1     1      606        610      -4     858     910     -12 AA     
##  5  2013     1     1      606        610      -4     837     845      -8 DL     
##  6  2013     1     1      607        607       0     858     915     -17 UA     
##  7  2013     1     1      615        615       0     833     842      -9 DL     
##  8  2013     1     1      623        610      13     920     915       5 AA     
##  9  2013     1     1      643        646      -3     922     940     -18 UA     
## 10  2013     1     1      653        700      -7     936    1009     -33 DL     
## # … with 46,903 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Partiram no verão (julho, agosto e setembro).
d = filter(flights, month == 7 & 8 & 9)
d
## # A tibble: 29,425 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     7     1        1       2029     212     236    2359     157 B6     
##  2  2013     7     1        2       2359       3     344     344       0 B6     
##  3  2013     7     1       29       2245     104     151       1     110 B6     
##  4  2013     7     1       43       2130     193     322      14     188 B6     
##  5  2013     7     1       44       2150     174     300     100     120 AA     
##  6  2013     7     1       46       2051     235     304    2358     186 B6     
##  7  2013     7     1       48       2001     287     308    2305     243 VX     
##  8  2013     7     1       58       2155     183     335      43     172 B6     
##  9  2013     7     1      100       2146     194     327      30     177 B6     
## 10  2013     7     1      100       2245     135     337     135     122 B6     
## # … with 29,415 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Não partiram atrasados, porém chegaram com mais de duas horas de atraso ao destino.
e = filter(flights, dep_delay <= 0 & arr_delay >= 120)
e
## # A tibble: 29 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1    27     1419       1420      -1    1754    1550     124 MQ     
##  2  2013    10     7     1350       1350       0    1736    1526     130 EV     
##  3  2013    10     7     1357       1359      -2    1858    1654     124 AA     
##  4  2013    10    16      657        700      -3    1258    1056     122 B6     
##  5  2013    11     1      658        700      -2    1329    1015     194 VX     
##  6  2013     3    18     1844       1847      -3      39    2219     140 UA     
##  7  2013     4    17     1635       1640      -5    2049    1845     124 MQ     
##  8  2013     4    18      558        600      -2    1149     850     179 AA     
##  9  2013     4    18      655        700      -5    1213     950     143 AA     
## 10  2013     5    22     1827       1830      -3    2217    2010     127 MQ     
## # … with 19 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Partiram entre meia noite e 6 h da manhã (inclusive).
f = filter(flights, dep_time == 0000 : 600)
## Warning in dep_time == 0:600: longer object length is not a multiple of shorter
## object length
f
## # A tibble: 25 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1    30      556        600      -4     709     658      11 US     
##  2  2013     1    30      557        600      -3     711     709       2 B6     
##  3  2013     1    30      559        601      -2     739     725      14 EV     
##  4  2013    12    11      542        545      -3     841     832       9 UA     
##  5  2013    12    11      544        550      -6    1021    1027      -6 B6     
##  6  2013    12    11      557        600      -3     853     846       7 B6     
##  7  2013     2    20      557        600      -3     733     745     -12 AA     
##  8  2013     2    24      556        600      -4     914     909       5 UA     
##  9  2013     3    30      550        600     -10     721     759     -38 DL     
## 10  2013     3    30      555        600      -5     804     829     -25 DL     
## # … with 15 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Classifique os vôos para encontrar os vôos mais atrasados.
delayed = arrange(flights, desc(dep_delay))
            flights %>% arrange(desc(arr_delay))
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     7    22     2257        759     898     121    1026     895 DL     
##  9  2013    12     5      756       1700     896    1058    2020     878 AA     
## 10  2013     5     3     1133       2055     878    1250    2215     875 MQ     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
delayed 
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     6    27      959       1900     899    1236    2226     850 DL     
##  9  2013     7    22     2257        759     898     121    1026     895 DL     
## 10  2013    12     5      756       1700     896    1058    2020     878 AA     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. Classifique os vôos de forma a encontrar os mais rápidos (velocidade mais alta).
fastest = mutate(flights, FlightSpeed = (distance/air_time)*60)
          fastest = arrange(fastest, desc(FlightSpeed))
fastest          
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     5    25     1709       1700       9    1923    1937     -14 DL     
##  2  2013     7     2     1558       1513      45    1745    1719      26 EV     
##  3  2013     5    13     2040       2025      15    2225    2226      -1 EV     
##  4  2013     3    23     1914       1910       4    2045    2043       2 EV     
##  5  2013     1    12     1559       1600      -1    1849    1917     -28 DL     
##  6  2013    11    17      650        655      -5    1059    1150     -51 DL     
##  7  2013     2    21     2355       2358      -3     412     438     -26 B6     
##  8  2013    11    17      759        800      -1    1212    1255     -43 AA     
##  9  2013    11    16     2003       1925      38      17      36     -19 DL     
## 10  2013    11    16     2349       2359     -10     402     440     -38 B6     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, FlightSpeed <dbl>, and abbreviated variable
## #   names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
  1. O que acontece se você incluir o nome de uma variável várias vezes dentro de select()?
# independente de quantas vezes ela for repetida dentro da função select(), a variável apenas aparece uma vez quando rodamos o programa.

o_que_acontece = flights %>% select(origin, origin, origin, origin, origin)
o_que_acontece 
## # A tibble: 336,776 × 1
##    origin
##    <chr> 
##  1 EWR   
##  2 LGA   
##  3 JFK   
##  4 JFK   
##  5 LGA   
##  6 EWR   
##  7 EWR   
##  8 LGA   
##  9 JFK   
## 10 LGA   
## # … with 336,766 more rows
  1. Encontre os 10 vôos mais atrados usando arrange() e a função min rank(). Leia a documentação de min rank() para aprender sobre ele.
#Procurando o signifado da Função min_rank()
?min_rank() 
#Usando arrange e a função min_rank()
TenDelayed = arrange(flights, min_rank(desc(dep_delay)))
TenDelayed = mutate(TenDelayed, Ten_most_delayed = dep_delay)
TenDelayed
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     6    27      959       1900     899    1236    2226     850 DL     
##  9  2013     7    22     2257        759     898     121    1026     895 DL     
## 10  2013    12     5      756       1700     896    1058    2020     878 AA     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, Ten_most_delayed <dbl>, and abbreviated
## #   variable names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time,
## #   ⁵​arr_delay
  1. Usando mutate() crie uma coluna com a média da variável tempo em ar (air time).
#pesquisando como usar a função de média aritmética
?mean()

#calculando a média de tempo de vôo em uma nova coluna
average = mutate(flights, average_air_time = mean(air_time, na.rm = TRUE))
average
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, average_air_time <dbl>, and abbreviated
## #   variable names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time,
## #   ⁵​arr_delay