Project One

display the min, max, average flight time and average distance traveled of all UA airlines flights dept. from JFK in March 2013

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(nycflights13)
library(tibble)

flights.tbl <- tbl_df(flights)
flights.tbl
## # A tibble: 336,776 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

pulling out information needed

new.flights <- filter(flights.tbl, origin=="JFK", carrier== "UA", month==3)
new.flights
## # A tibble: 378 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     3     1      600            600         0      906
##  2  2013     3     1      607            610        -3      832
##  3  2013     3     1      655            700        -5      954
##  4  2013     3     1      758            800        -2     1106
##  5  2013     3     1      836            840        -4     1111
##  6  2013     3     1     1103           1106        -3     1400
##  7  2013     3     1     1125           1130        -5     1350
##  8  2013     3     1     1423           1425        -2     1728
##  9  2013     3     1     1621           1530        51     1844
## 10  2013     3     1     1723           1729        -6     2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
newer.flights <- select(new.flights, origin, carrier, month, air_time, distance)
newer.flights
## # A tibble: 378 x 5
##    origin carrier month air_time distance
##     <chr>   <chr> <int>    <dbl>    <dbl>
##  1    JFK      UA     3      342     2586
##  2    JFK      UA     3      292     2475
##  3    JFK      UA     3      343     2586
##  4    JFK      UA     3      342     2586
##  5    JFK      UA     3      301     2475
##  6    JFK      UA     3      338     2586
##  7    JFK      UA     3      307     2475
##  8    JFK      UA     3      337     2586
##  9    JFK      UA     3      300     2475
## 10    JFK      UA     3      320     2586
## # ... with 368 more rows

minimum flight time

min.air_time<- min(newer.flights$air_time, na.rm = TRUE)
min.air_time
## [1] 281

Maximum flight time

max.air_time <- max(newer.flights$air_time, na.rm = TRUE)
max.air_time
## [1] 394

Average flight time

mean.air_time <- mean(newer.flights$air_time, na.rm = TRUE)
round.mean <- round(mean.air_time, digits = 2)
round.mean
## [1] 342.93

Average distance traveled

mean.distance <- mean(newer.flights$distance, na.rm = TRUE)
mean.distance
## [1] 2534.317
round.mean.distance <- round(mean.distance, digits = 2)
round.mean.distance
## [1] 2534.32

Project 2

display the min, max, and average dept. delays in minutes for June grouped by airport

pulling out data

departure.delays <- filter(flights.tbl, dep_delay>0, month==6)
departure.delays
## # A tibble: 12,655 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     6     1        2           2359         3      341
##  2  2013     6     1      607            600         7      853
##  3  2013     6     1      614            605         9      844
##  4  2013     6     1      614            600        14      829
##  5  2013     6     1      615            610         5      837
##  6  2013     6     1      624            600        24      727
##  7  2013     6     1      632            630         2      738
##  8  2013     6     1      638            635         3      855
##  9  2013     6     1      638            630         8      741
## 10  2013     6     1      644            642         2      824
## # ... with 12,645 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
new.departure.delays <- select(departure.delays, month, dep_delay, origin)
new.departure.delays
## # A tibble: 12,655 x 3
##    month dep_delay origin
##    <int>     <dbl>  <chr>
##  1     6         3    JFK
##  2     6         7    EWR
##  3     6         9    EWR
##  4     6        14    EWR
##  5     6         5    JFK
##  6     6        24    EWR
##  7     6         2    EWR
##  8     6         3    JFK
##  9     6         8    EWR
## 10     6         2    EWR
## # ... with 12,645 more rows
newest.departure.delays <- new.departure.delays %>%
  arrange(origin)
newest.departure.delays
## # A tibble: 12,655 x 3
##    month dep_delay origin
##    <int>     <dbl>  <chr>
##  1     6         7    EWR
##  2     6         9    EWR
##  3     6        14    EWR
##  4     6        24    EWR
##  5     6         2    EWR
##  6     6         8    EWR
##  7     6         2    EWR
##  8     6        21    EWR
##  9     6        36    EWR
## 10     6         1    EWR
## # ... with 12,645 more rows

final table

summarise.newest.departure.delays <- newest.departure.delays %>%
  group_by(origin) %>%
  summarise(min.dep_delay=min(dep_delay), 
            max.dep_delay=max(dep_delay), 
            mean.dep_delay=mean(dep_delay))
summarise.newest.departure.delays
## # A tibble: 3 x 4
##   origin min.dep_delay max.dep_delay mean.dep_delay
##    <chr>         <dbl>         <dbl>          <dbl>
## 1    EWR             1           502       47.92212
## 2    JFK             1          1137       47.98522
## 3    LGA             1           803       54.96745

Project 3 Start

display the min, max, and average miles traveled per hour for UA, and AA flights flying between all 3 airports and ORD in June, July and August

pulling out the data needed

flights.mph <- select(flights.tbl, distance, air_time, dest, origin, month, carrier)
flights.mph
## # A tibble: 336,776 x 6
##    distance air_time  dest origin month carrier
##       <dbl>    <dbl> <chr>  <chr> <int>   <chr>
##  1     1400      227   IAH    EWR     1      UA
##  2     1416      227   IAH    LGA     1      UA
##  3     1089      160   MIA    JFK     1      AA
##  4     1576      183   BQN    JFK     1      B6
##  5      762      116   ATL    LGA     1      DL
##  6      719      150   ORD    EWR     1      UA
##  7     1065      158   FLL    EWR     1      B6
##  8      229       53   IAD    LGA     1      EV
##  9      944      140   MCO    JFK     1      B6
## 10      733      138   ORD    LGA     1      AA
## # ... with 336,766 more rows
new.flights.mph <- filter(flights.mph, month %in% c(6,7,8), carrier %in% c("AA", "UA"), dest=="ORD")
new.flights.mph
## # A tibble: 3,500 x 6
##    distance air_time  dest origin month carrier
##       <dbl>    <dbl> <chr>  <chr> <int>   <chr>
##  1      733      108   ORD    LGA     6      AA
##  2      733      109   ORD    LGA     6      UA
##  3      733      110   ORD    LGA     6      AA
##  4      719      108   ORD    EWR     6      UA
##  5      733      109   ORD    LGA     6      AA
##  6      733      108   ORD    LGA     6      UA
##  7      733      112   ORD    LGA     6      AA
##  8      733      105   ORD    LGA     6      AA
##  9      719      110   ORD    EWR     6      UA
## 10      733      109   ORD    LGA     6      AA
## # ... with 3,490 more rows

mutating

finding air time in hours

creating miles per hour (mph) column

newer.flights.mph <- new.flights.mph %>%
  select(air_time, distance) %>%
  mutate(hour=air_time/60) %>%
  mutate(mph=distance/hour)
newer.flights.mph   
## # A tibble: 3,500 x 4
##    air_time distance     hour      mph
##       <dbl>    <dbl>    <dbl>    <dbl>
##  1      108      733 1.800000 407.2222
##  2      109      733 1.816667 403.4862
##  3      110      733 1.833333 399.8182
##  4      108      719 1.800000 399.4444
##  5      109      733 1.816667 403.4862
##  6      108      733 1.800000 407.2222
##  7      112      733 1.866667 392.6786
##  8      105      733 1.750000 418.8571
##  9      110      719 1.833333 392.1818
## 10      109      733 1.816667 403.4862
## # ... with 3,490 more rows

final table

summarise.newer.flights.mph <- newer.flights.mph %>%
  summarise(min.mph=min(mph, na.rm = TRUE),
            max.mph=max(mph, na.rm = TRUE),
            mean.mph=mean(mph, na.rm = TRUE))
summarise.newer.flights.mph
## # A tibble: 1 x 3
##    min.mph  max.mph mean.mph
##      <dbl>    <dbl>    <dbl>
## 1 231.4737 495.8621 396.5095