PROJECT 1:

Min, Max and Average flight time and average distance for United Airlines flights departing JFK in March of 2013

Project 1, Step 1: First, load the libraries we need, including the nycflights13 dataset, and create a tibble to work with.

library(magrittr)
library(dplyr)
library(nycflights13)
#Load pander to better control tables and rename column headers. Need to upgrade rStudio to latest version for this to work!
library(pander)

#Project 1; First, let's get the data set 'flights'
tbl_flights <- tbl_df(flights)

Project 1, Step 2: Filter the date to just have March for JFK for United. I’ll do this in 2 steps here, but we could do it on one.

#Next, filter out the flight information for March.  Be sure to assign a variable so we can use this later.
tbl_flights_mar <- tbl_flights %>%
  filter(month == 3)
#For JFK
tbl_flights_mar_JFK <- tbl_flights_mar %>%
  filter (origin == "JFK")
#for United
tbl_flights_mar_JFK_United <- tbl_flights_mar_JFK %>%
  filter (carrier == "UA")

Let’s see if we have it.

distinct (tbl_flights_mar_JFK_United, origin, month,carrier)
# A tibble: 1 x 3
  month carrier origin
  <int>   <chr>  <chr>
1     3      UA    JFK

Project 1, Steps 2-6

Find the Min, Max, Avg AirTime and Avg Distance. I’ll do this is separate steps, but will combine them in the next projects.

#Find the Minimum air time
tbl_flights_mar_JFK_United %>%
  summarise(UA_JFK_Mar_Min = min(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
  UA_JFK_Mar_Min
           <dbl>
1            281
#Find the Maximum air time
tbl_flights_mar_JFK_United %>%
  summarise(UA_JFK_Mar_Max = max(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
  UA_JFK_Mar_Max
           <dbl>
1            394
#Find the average fligth time
tbl_flights_mar_JFK_United %>%
  summarise(UA_JFK_Mar_Avg = mean(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
  UA_JFK_Mar_Avg
           <dbl>
1       342.9253
#Average Distance Travelled
tbl_flights_mar_JFK_United %>%
  summarise(UA_JFK_Mar_Max = mean(tbl_flights_mar_JFK_United$distance, na.rm=TRUE))
# A tibble: 1 x 1
  UA_JFK_Mar_Max
           <dbl>
1       2534.317

PROJECT 2: Departure Delays for June 2013

First, filter out to get June

# Project 2: Now, filter out a dataset for just June 2013
tbl_flights_jun <- tbl_flights %>%
  filter(month == 6)

Now, we calculate the delays and diplay them all together

#Min Max Avg depart delays 
#Each Aiport group by
tbl_flights_jun %>%
  group_by(tbl_flights_jun$origin) %>%
  summarise(tbl_flights_jun_min = min(tbl_flights_jun$dep_delay, na.rm=TRUE),
            tbl_flights_jun_max = max(tbl_flights_jun$dep_delay, na.rm=TRUE),
            tbl_flights_jun_avg = mean(tbl_flights_jun$dep_delay, na.rm=TRUE))
# A tibble: 3 x 4
  `tbl_flights_jun$origin` tbl_flights_jun_min tbl_flights_jun_max tbl_flights_jun_avg
                     <chr>               <dbl>               <dbl>               <dbl>
1                      EWR                 -21                1137            20.84633
2                      JFK                 -21                1137            20.84633
3                      LGA                 -21                1137            20.84633

PROJECT 3: Average Miles per Hour travelled

Step 1: Filter out the date we want

#Project 3: Min, Max, Avg Miles traveled per hour 
#for UH and AAin June and July 2013
#First, limit to June and July
jj <- c(6,7)
tbl_flights_jj <- tbl_flights %>%
  filter(month %in% jj)

#Next, filter to United and American
tbl_flights_jj_AAUA <- tbl_flights_jj %>%
  filter(carrier =="AA" | carrier == "UA")

#Then, only include destination of ORD
tbl_flights_jj_AAUA_ORD <- tbl_flights_jj_AAUA %>%
  filter(dest =="ORD")

Step 2: Mutate air time from minutes to hours

#convert air_time to hours using mutate
tbl_flights_mutate <- tbl_flights_jj_AAUA_ORD %>%
  mutate(air_hour = tbl_flights_jj_AAUA_ORD$air_time/60)

Step 3: Finally, calc the values in a table

#Distance travelled per hour

air_DistancePerHour <- tbl_flights_mutate %>%
  mutate(air_DistancePerHour_v =tbl_flights_mutate$distance/tbl_flights_mutate$air_hour)

air_DistancePerHour_min <- air_DistancePerHour %>%
  group_by(carrier) %>%
  summarise(air_distance_min = min(air_DistancePerHour$air_DistancePerHour_v, na.rm=TRUE),
            air_distance_max = max(air_DistancePerHour$air_DistancePerHour_v,na.rm=TRUE),
            air_distance_avg = mean(air_DistancePerHour$air_DistancePerHour_v,na.rm=TRUE)) 

colnames(air_DistancePerHour_min) <- c("Airline","Minimum","Maximum", "Average" )
air_DistancePerHour_min
# A tibble: 2 x 4
  Airline  Minimum Maximum  Average
    <chr>    <dbl>   <dbl>    <dbl>
1      AA 231.4737 463.871 399.1846
2      UA 231.4737 463.871 399.1846