# Lisset HernƔndez A01284611
#Santiago Llaguno A01721838
#Evelyn DĆ­az- A00829373
#A01721951 Jenaro MartĆ­nez

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(nycflights13)
data(airlines)
aeronom <- flights %>%
  left_join(airlines, by="carrier")

aeronom
## # A tibble: 336,776 Ɨ 20
##     year month   day dep_time sched_de…¹ dep_d…² arr_t…³ sched…⁓ arr_d…⁵ carrier
##    <int> <int> <int>    <int>      <int>   <dbl>   <int>   <int>   <dbl> <chr>  
##  1  2013     1     1      517        515       2     830     819      11 UA     
##  2  2013     1     1      533        529       4     850     830      20 UA     
##  3  2013     1     1      542        540       2     923     850      33 AA     
##  4  2013     1     1      544        545      -1    1004    1022     -18 B6     
##  5  2013     1     1      554        600      -6     812     837     -25 DL     
##  6  2013     1     1      554        558      -4     740     728      12 UA     
##  7  2013     1     1      555        600      -5     913     854      19 B6     
##  8  2013     1     1      557        600      -3     709     723     -14 EV     
##  9  2013     1     1      557        600      -3     838     846      -8 B6     
## 10  2013     1     1      558        600      -2     753     745       8 AA     
## # … with 336,766 more rows, 10 more variables: flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, name <chr>, and abbreviated variable names
## #   ¹​sched_dep_time, ²​dep_delay, ³​arr_time, ⁓​sched_arr_time, ⁵​arr_delay
#2.  Obtén una tabla que indique el número de vuelos por aerolínea.
aeronoms <- select(aeronom, carrier, flight, tailnum, name)

df <- aeronoms %>%
  group_by(name) %>%
  summarise(count=n()) %>%
  arrange(desc(count))
df
## # A tibble: 16 Ɨ 2
##    name                        count
##    <chr>                       <int>
##  1 United Air Lines Inc.       58665
##  2 JetBlue Airways             54635
##  3 ExpressJet Airlines Inc.    54173
##  4 Delta Air Lines Inc.        48110
##  5 American Airlines Inc.      32729
##  6 Envoy Air                   26397
##  7 US Airways Inc.             20536
##  8 Endeavor Air Inc.           18460
##  9 Southwest Airlines Co.      12275
## 10 Virgin America               5162
## 11 AirTran Airways Corporation  3260
## 12 Alaska Airlines Inc.          714
## 13 Frontier Airlines Inc.        685
## 14 Mesa Airlines Inc.            601
## 15 Hawaiian Airlines Inc.        342
## 16 SkyWest Airlines Inc.          32
#1.  Elabora una grÔfica de barras que refleje el número de vuelos por aerolínea que han salido de NYC en el año 2013.
flight <- flights

flights_nyc_2013 <- filter(flights, origin %in% c("EWR", "JFK", "LGA") & year == 2013)

flights_per_airline <- count(flights_nyc_2013, carrier)

ggplot(data = flights_per_airline, aes(x = carrier, y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  ggtitle("Number of flights per airline from NYC in 2013") +
  xlab("Airline") +
  ylab("Number of flights")

#3. Elabora una grÔfica de barras que refleje el número de vuelos por aerolínea que han salido de NYC en el año 2013 para cada uno de los tres aeropuertos. ( John F. Kennedy, LaGuardia and Newark Liberty)

aeropuertos <- select(aeronom, carrier, name, origin)

df2 <- aeropuertos %>%
  group_by(origin, carrier) %>%
  summarize(count=n())
## `summarise()` has grouped output by 'origin'. You can override using the
## `.groups` argument.
ggplot(data = df2, mapping = aes(x = carrier, y = count)) +
geom_col() +
  facet_wrap(~ origin) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
   ggtitle("Number of flights per airline from NYC in 2013 divided by airport")