library('dplyr')
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('nycflights13')
## Warning: package 'nycflights13' was built under R version 4.2.3
flights <- nycflights13::flights
flights
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
#EXERCÍCIO 1
# a)
A=filter(flights, arr_delay>=120)
A
## # A tibble: 10,200 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      811        630     101    1047     830     137 MQ     
##  2  2013     1     1      848       1835     853    1001    1950     851 MQ     
##  3  2013     1     1      957        733     144    1056     853     123 UA     
##  4  2013     1     1     1114        900     134    1447    1222     145 UA     
##  5  2013     1     1     1505       1310     115    1638    1431     127 EV     
##  6  2013     1     1     1525       1340     105    1831    1626     125 B6     
##  7  2013     1     1     1549       1445      64    1912    1656     136 EV     
##  8  2013     1     1     1558       1359     119    1718    1515     123 EV     
##  9  2013     1     1     1732       1630      62    2028    1825     123 EV     
## 10  2013     1     1     1803       1620     103    2008    1750     138 MQ     
## # … with 10,190 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
# b)
B=filter(flights, dest == c("IAH", "HOU"))
B
## # A tibble: 4,655 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      623        627      -4     933     932       1 UA     
##  3  2013     1     1     1028       1026       2    1350    1339      11 UA     
##  4  2013     1     1     1114        900     134    1447    1222     145 UA     
##  5  2013     1     1     1208       1158      10    1540    1502      38 B6     
##  6  2013     1     1     1306       1300       6    1622    1610      12 WN     
##  7  2013     1     1     1527       1515      12    1854    1810      44 UA     
##  8  2013     1     1     1620       1620       0    1945    1922      23 UA     
##  9  2013     1     1     1725       1720       5    2045    2021      24 UA     
## 10  2013     1     1     1855       1848       7    2203    2200       3 UA     
## # … with 4,645 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
# c)
C=filter(flights, carrier == c("UA", "AA", "DL"))
## Warning in carrier == c("UA", "AA", "DL"): comprimento do objeto maior não é
## múltiplo do comprimento do objeto menor
C
## # A tibble: 46,913 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      558        600      -2     924     917       7 UA     
##  3  2013     1     1      602        610      -8     812     820      -8 DL     
##  4  2013     1     1      606        610      -4     858     910     -12 AA     
##  5  2013     1     1      606        610      -4     837     845      -8 DL     
##  6  2013     1     1      607        607       0     858     915     -17 UA     
##  7  2013     1     1      615        615       0     833     842      -9 DL     
##  8  2013     1     1      623        610      13     920     915       5 AA     
##  9  2013     1     1      643        646      -3     922     940     -18 UA     
## 10  2013     1     1      653        700      -7     936    1009     -33 DL     
## # … with 46,903 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
# d)
D=filter(flights, month == c(7 & 8 & 9))
D
## # A tibble: 27,004 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 26,994 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
# e)
E=filter(flights, dep_delay<=0 & arr_delay>=120)
E
## # A tibble: 29 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1    27     1419       1420      -1    1754    1550     124 MQ     
##  2  2013    10     7     1350       1350       0    1736    1526     130 EV     
##  3  2013    10     7     1357       1359      -2    1858    1654     124 AA     
##  4  2013    10    16      657        700      -3    1258    1056     122 B6     
##  5  2013    11     1      658        700      -2    1329    1015     194 VX     
##  6  2013     3    18     1844       1847      -3      39    2219     140 UA     
##  7  2013     4    17     1635       1640      -5    2049    1845     124 MQ     
##  8  2013     4    18      558        600      -2    1149     850     179 AA     
##  9  2013     4    18      655        700      -5    1213     950     143 AA     
## 10  2013     5    22     1827       1830      -3    2217    2010     127 MQ     
## # … with 19 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
#EXERCÍCIO 2
ex2=arrange(flights, desc(dep_delay))
flights%>% arrange(desc(arr_delay))
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     7    22     2257        759     898     121    1026     895 DL     
##  9  2013    12     5      756       1700     896    1058    2020     878 AA     
## 10  2013     5     3     1133       2055     878    1250    2215     875 MQ     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
ex2
## # A tibble: 336,776 × 19
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     6    27      959       1900     899    1236    2226     850 DL     
##  9  2013     7    22     2257        759     898     121    1026     895 DL     
## 10  2013    12     5      756       1700     896    1058    2020     878 AA     
## # … with 336,766 more rows, 9 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
#EXERCÍCIO 3
ex3=mutate(flights, velocidade = distance/air_time*60)
ex3=arrange(ex3, desc(velocidade))
ex3
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     5    25     1709       1700       9    1923    1937     -14 DL     
##  2  2013     7     2     1558       1513      45    1745    1719      26 EV     
##  3  2013     5    13     2040       2025      15    2225    2226      -1 EV     
##  4  2013     3    23     1914       1910       4    2045    2043       2 EV     
##  5  2013     1    12     1559       1600      -1    1849    1917     -28 DL     
##  6  2013    11    17      650        655      -5    1059    1150     -51 DL     
##  7  2013     2    21     2355       2358      -3     412     438     -26 B6     
##  8  2013    11    17      759        800      -1    1212    1255     -43 AA     
##  9  2013    11    16     2003       1925      38      17      36     -19 DL     
## 10  2013    11    16     2349       2359     -10     402     440     -38 B6     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, velocidade <dbl>, and abbreviated variable
## #   names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time, ⁵​arr_delay
#EXERCÍCIO 4
ex4=flights%>% select(hour, hour, hour)
ex4
## # A tibble: 336,776 × 1
##     hour
##    <dbl>
##  1     5
##  2     5
##  3     5
##  4     5
##  5     6
##  6     5
##  7     6
##  8     6
##  9     6
## 10     6
## # … with 336,766 more rows
#colocar mais vezes não muda em nada
#EXERCÍCIO 5
ex5=arrange(flights, min_rank(desc(dep_delay)))
ex5=mutate(ex5, voos_mais_atrasados = dep_delay)
ex5
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     9      641        900    1301    1242    1530    1272 HA     
##  2  2013     6    15     1432       1935    1137    1607    2120    1127 MQ     
##  3  2013     1    10     1121       1635    1126    1239    1810    1109 MQ     
##  4  2013     9    20     1139       1845    1014    1457    2210    1007 AA     
##  5  2013     7    22      845       1600    1005    1044    1815     989 MQ     
##  6  2013     4    10     1100       1900     960    1342    2211     931 DL     
##  7  2013     3    17     2321        810     911     135    1020     915 DL     
##  8  2013     6    27      959       1900     899    1236    2226     850 DL     
##  9  2013     7    22     2257        759     898     121    1026     895 DL     
## 10  2013    12     5      756       1700     896    1058    2020     878 AA     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, voos_mais_atrasados <dbl>, and abbreviated
## #   variable names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time,
## #   ⁵​arr_delay
#EXERCÍCIO 6
ex6=mutate(flights, media_air_time = mean(air_time, na.rm = TRUE))
ex6
## # A tibble: 336,776 × 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁴ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, media_air_time <dbl>, and abbreviated
## #   variable names ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁴​sched_arr_time,
## #   ⁵​arr_delay