Load Packages

library(dplyr)
library(magrittr)
library(nycflights13)
library(tibble)

Project 1

Tibble of Flights

flights.tbl <- tbl_df(flights)
flights.tbl
# A tibble: 336,776 x 19
    year month   day dep_time sched_dep_time dep_delay arr_time
   <int> <int> <int>    <int>          <int>     <dbl>    <int>
 1  2013     1     1      517            515         2      830
 2  2013     1     1      533            529         4      850
 3  2013     1     1      542            540         2      923
 4  2013     1     1      544            545        -1     1004
 5  2013     1     1      554            600        -6      812
 6  2013     1     1      554            558        -4      740
 7  2013     1     1      555            600        -5      913
 8  2013     1     1      557            600        -3      709
 9  2013     1     1      557            600        -3      838
10  2013     1     1      558            600        -2      753
# ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
#   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
#   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
#   minute <dbl>, time_hour <dttm>

Filtered and Selected to Pull Out Needed Items

new.flights <- filter(flights.tbl, origin=="JFK", carrier=="UA", month==3)
new.flights
# A tibble: 378 x 19
    year month   day dep_time sched_dep_time dep_delay arr_time
   <int> <int> <int>    <int>          <int>     <dbl>    <int>
 1  2013     3     1      600            600         0      906
 2  2013     3     1      607            610        -3      832
 3  2013     3     1      655            700        -5      954
 4  2013     3     1      758            800        -2     1106
 5  2013     3     1      836            840        -4     1111
 6  2013     3     1     1103           1106        -3     1400
 7  2013     3     1     1125           1130        -5     1350
 8  2013     3     1     1423           1425        -2     1728
 9  2013     3     1     1621           1530        51     1844
10  2013     3     1     1723           1729        -6     2010
# ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
#   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
#   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
#   minute <dbl>, time_hour <dttm>
newer.flights <- select(new.flights, origin, carrier, month, air_time, distance)
newer.flights
# A tibble: 378 x 5
   origin carrier month air_time distance
    <chr>   <chr> <int>    <dbl>    <dbl>
 1    JFK      UA     3      342     2586
 2    JFK      UA     3      292     2475
 3    JFK      UA     3      343     2586
 4    JFK      UA     3      342     2586
 5    JFK      UA     3      301     2475
 6    JFK      UA     3      338     2586
 7    JFK      UA     3      307     2475
 8    JFK      UA     3      337     2586
 9    JFK      UA     3      300     2475
10    JFK      UA     3      320     2586
# ... with 368 more rows

The Minimum, Maximum, and Average Flight Time of all United Airlines (UA) Flights departing John F. Kennedy Airport (JFK) during March 2013

Min
min.air_time <- min(newer.flights$air_time, na.rm = TRUE)
min.air_time
[1] 281
Max
max.air_time <- max(newer.flights$air_time, na.rm = TRUE)
max.air_time
[1] 394
Average
mean.air_time <- mean(newer.flights$air_time, na.rm = TRUE)
round.mean.air_time <- round (mean.air_time, digits = 2)
round.mean.air_time
[1] 342.93

Average Distance Traveled by all United Airline Flights Departing JFK during March 2013

mean.distance <- mean(newer.flights$distance, na.rm = TRUE)
round.mean.distance <- round(mean.distance, digits = 2)
round.mean.distance
[1] 2534.32

Project 2

Filtered and Selected Needed Items from Tibbled Flights

departure.delays <- filter(flights.tbl, dep_delay>0, month==6)
departure.delays
# A tibble: 12,655 x 19
    year month   day dep_time sched_dep_time dep_delay arr_time
   <int> <int> <int>    <int>          <int>     <dbl>    <int>
 1  2013     6     1        2           2359         3      341
 2  2013     6     1      607            600         7      853
 3  2013     6     1      614            605         9      844
 4  2013     6     1      614            600        14      829
 5  2013     6     1      615            610         5      837
 6  2013     6     1      624            600        24      727
 7  2013     6     1      632            630         2      738
 8  2013     6     1      638            635         3      855
 9  2013     6     1      638            630         8      741
10  2013     6     1      644            642         2      824
# ... with 12,645 more rows, and 12 more variables: sched_arr_time <int>,
#   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
#   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
#   minute <dbl>, time_hour <dttm>
new.departure.delays <- select(departure.delays, dep_delay, month, origin)
new.departure.delays
# A tibble: 12,655 x 3
   dep_delay month origin
       <dbl> <int>  <chr>
 1         3     6    JFK
 2         7     6    EWR
 3         9     6    EWR
 4        14     6    EWR
 5         5     6    JFK
 6        24     6    EWR
 7         2     6    EWR
 8         3     6    JFK
 9         8     6    EWR
10         2     6    EWR
# ... with 12,645 more rows

Arranged by Airport (Flight Origin) including John F. Kennedy (JFK), LaGuaria Airport(LGA), and Newark Liberty Airport (EWA)

arrange.new.departure.delays <- new.departure.delays %>%
  arrange(origin)
arrange.new.departure.delays
# A tibble: 12,655 x 3
   dep_delay month origin
       <dbl> <int>  <chr>
 1         7     6    EWR
 2         9     6    EWR
 3        14     6    EWR
 4        24     6    EWR
 5         2     6    EWR
 6         8     6    EWR
 7         2     6    EWR
 8        21     6    EWR
 9        36     6    EWR
10         1     6    EWR
# ... with 12,645 more rows

Grouped by Airport to find each Airports Minimum, Maximum, and Average departure delays in minutes for June 2013

summarise.arrange.new.departure.delays <- arrange.new.departure.delays %>%
  group_by(origin) %>%
  summarise(min.dep_delay=min(dep_delay),
            max.dep_delay=max(dep_delay),
            mean.dep_delay=mean(dep_delay))
summarise.arrange.new.departure.delays
# A tibble: 3 x 4
  origin min.dep_delay max.dep_delay mean.dep_delay
   <chr>         <dbl>         <dbl>          <dbl>
1    EWR             1           502       47.92212
2    JFK             1          1137       47.98522
3    LGA             1           803       54.96745

Project 3

Selected and Filtered Needed Items from Tibbled Flights

flights.mph <- select(flights.tbl, origin, carrier, dest, distance, air_time, month)
flights.mph
# A tibble: 336,776 x 6
   origin carrier  dest distance air_time month
    <chr>   <chr> <chr>    <dbl>    <dbl> <int>
 1    EWR      UA   IAH     1400      227     1
 2    LGA      UA   IAH     1416      227     1
 3    JFK      AA   MIA     1089      160     1
 4    JFK      B6   BQN     1576      183     1
 5    LGA      DL   ATL      762      116     1
 6    EWR      UA   ORD      719      150     1
 7    EWR      B6   FLL     1065      158     1
 8    LGA      EV   IAD      229       53     1
 9    JFK      B6   MCO      944      140     1
10    LGA      AA   ORD      733      138     1
# ... with 336,766 more rows
new.flights.mph <- filter(flights.mph, month %in% c(6,7,8), carrier %in% c("AA","UA"), dest=="ORD")
new.flights.mph
# A tibble: 3,500 x 6
   origin carrier  dest distance air_time month
    <chr>   <chr> <chr>    <dbl>    <dbl> <int>
 1    LGA      AA   ORD      733      108     6
 2    LGA      UA   ORD      733      109     6
 3    LGA      AA   ORD      733      110     6
 4    EWR      UA   ORD      719      108     6
 5    LGA      AA   ORD      733      109     6
 6    LGA      UA   ORD      733      108     6
 7    LGA      AA   ORD      733      112     6
 8    LGA      AA   ORD      733      105     6
 9    EWR      UA   ORD      719      110     6
10    LGA      AA   ORD      733      109     6
# ... with 3,490 more rows

Selected Variables and Created Mutations so Miles per Hour could be found

newer.flights.mph <- new.flights.mph %>%
  select(air_time, distance) %>%
  mutate(hour= air_time/60) %>%
  mutate(mph=distance/hour)
newer.flights.mph
# A tibble: 3,500 x 4
   air_time distance     hour      mph
      <dbl>    <dbl>    <dbl>    <dbl>
 1      108      733 1.800000 407.2222
 2      109      733 1.816667 403.4862
 3      110      733 1.833333 399.8182
 4      108      719 1.800000 399.4444
 5      109      733 1.816667 403.4862
 6      108      733 1.800000 407.2222
 7      112      733 1.866667 392.6786
 8      105      733 1.750000 418.8571
 9      110      719 1.833333 392.1818
10      109      733 1.816667 403.4862
# ... with 3,490 more rows

The Minimum, Maximum, and Average Miles Traveled per Hour for United Airlines(UA) and American Airlines(AA) flights flying between JFK, LGA, EWA, and O’Hare International Airport (ORD) during June, July, and August 2013.

summarise.newer.flights.mph <- newer.flights.mph %>%
  summarise(min.mph=min(mph,na.rm=TRUE),
            max.mph=max(mph,na.rm=TRUE),
            mean.mph=mean(mph,na.rm=TRUE))
summarise.newer.flights.mph
# A tibble: 1 x 3
   min.mph  max.mph mean.mph
     <dbl>    <dbl>    <dbl>
1 231.4737 495.8621 396.5095