library(magrittr)
library(dplyr)
library(nycflights13)
#Load pander to better control tables and rename column headers. Need to upgrade rStudio to latest version for this to work!
library(pander)
#Project 1; First, let's get the data set 'flights'
tbl_flights <- tbl_df(flights)
#Next, filter out the flight information for March. Be sure to assign a variable so we can use this later.
tbl_flights_mar <- tbl_flights %>%
filter(month == 3)
#For JFK
tbl_flights_mar_JFK <- tbl_flights_mar %>%
filter (origin == "JFK")
#for United
tbl_flights_mar_JFK_United <- tbl_flights_mar_JFK %>%
filter (carrier == "UA")
distinct (tbl_flights_mar_JFK_United, origin, month,carrier)
# A tibble: 1 x 3
month carrier origin
<int> <chr> <chr>
1 3 UA JFK
#Find the Minimum air time
tbl_flights_mar_JFK_United %>%
summarise(UA_JFK_Mar_Min = min(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
UA_JFK_Mar_Min
<dbl>
1 281
#Find the Maximum air time
tbl_flights_mar_JFK_United %>%
summarise(UA_JFK_Mar_Max = max(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
UA_JFK_Mar_Max
<dbl>
1 394
#Find the average fligth time
tbl_flights_mar_JFK_United %>%
summarise(UA_JFK_Mar_Avg = mean(tbl_flights_mar_JFK_United$air_time, na.rm=TRUE))
# A tibble: 1 x 1
UA_JFK_Mar_Avg
<dbl>
1 342.9253
#Average Distance Travelled
tbl_flights_mar_JFK_United %>%
summarise(UA_JFK_Mar_Max = mean(tbl_flights_mar_JFK_United$distance, na.rm=TRUE))
# A tibble: 1 x 1
UA_JFK_Mar_Max
<dbl>
1 2534.317
# Project 2: Now, filter out a dataset for just June 2013
tbl_flights_jun <- tbl_flights %>%
filter(month == 6)
#Min Max Avg depart delays
#Each Aiport group by
tbl_flights_jun %>%
group_by(tbl_flights_jun$origin) %>%
summarise(tbl_flights_jun_min = min(tbl_flights_jun$dep_delay, na.rm=TRUE),
tbl_flights_jun_max = max(tbl_flights_jun$dep_delay, na.rm=TRUE),
tbl_flights_jun_avg = mean(tbl_flights_jun$dep_delay, na.rm=TRUE))
# A tibble: 3 x 4
`tbl_flights_jun$origin` tbl_flights_jun_min tbl_flights_jun_max tbl_flights_jun_avg
<chr> <dbl> <dbl> <dbl>
1 EWR -21 1137 20.84633
2 JFK -21 1137 20.84633
3 LGA -21 1137 20.84633
#Project 3: Min, Max, Avg Miles traveled per hour
#for UH and AAin June and July 2013
#First, limit to June and July
jj <- c(6,7)
tbl_flights_jj <- tbl_flights %>%
filter(month %in% jj)
#Next, filter to United and American
tbl_flights_jj_AAUA <- tbl_flights_jj %>%
filter(carrier =="AA" | carrier == "UA")
#Then, only include destination of ORD
tbl_flights_jj_AAUA_ORD <- tbl_flights_jj_AAUA %>%
filter(dest =="ORD")
#convert air_time to hours using mutate
tbl_flights_mutate <- tbl_flights_jj_AAUA_ORD %>%
mutate(air_hour = tbl_flights_jj_AAUA_ORD$air_time/60)
#Distance travelled per hour
air_DistancePerHour <- tbl_flights_mutate %>%
mutate(air_DistancePerHour_v =tbl_flights_mutate$distance/tbl_flights_mutate$air_hour)
air_DistancePerHour_min <- air_DistancePerHour %>%
group_by(carrier) %>%
summarise(air_distance_min = min(air_DistancePerHour$air_DistancePerHour_v, na.rm=TRUE),
air_distance_max = max(air_DistancePerHour$air_DistancePerHour_v,na.rm=TRUE),
air_distance_avg = mean(air_DistancePerHour$air_DistancePerHour_v,na.rm=TRUE))
colnames(air_DistancePerHour_min) <- c("Airline","Minimum","Maximum", "Average" )
air_DistancePerHour_min
# A tibble: 2 x 4
Airline Minimum Maximum Average
<chr> <dbl> <dbl> <dbl>
1 AA 231.4737 463.871 399.1846
2 UA 231.4737 463.871 399.1846