First lets reduce the dataset down to the 5 variables (carrier, origin, distance, airtime, and month).
library(dplyr)
Warning: package 'dplyr' was built under R version 3.4.2
library(nycflights13)
jfkflight<- filter(select(flights,carrier, origin, distance, air_time, month ))
jfkflight
# A tibble: 336,776 x 5
carrier origin distance air_time month
<chr> <chr> <dbl> <dbl> <int>
1 UA EWR 1400 227 1
2 UA LGA 1416 227 1
3 AA JFK 1089 160 1
4 B6 JFK 1576 183 1
5 DL LGA 762 116 1
6 UA EWR 719 150 1
7 B6 EWR 1065 158 1
8 EV LGA 229 53 1
9 B6 JFK 944 140 1
10 AA LGA 733 138 1
# ... with 336,766 more rows
Next filter the dataset to only United Airline flights out of JFK airport in March
filter(jfkflight, carrier=="UA" & origin=="JFK" & month==3)
# A tibble: 378 x 5
carrier origin distance air_time month
<chr> <chr> <dbl> <dbl> <int>
1 UA JFK 2586 342 3
2 UA JFK 2475 292 3
3 UA JFK 2586 343 3
4 UA JFK 2586 342 3
5 UA JFK 2475 301 3
6 UA JFK 2586 338 3
7 UA JFK 2475 307 3
8 UA JFK 2586 337 3
9 UA JFK 2475 300 3
10 UA JFK 2586 320 3
# ... with 368 more rows
Finally display the minimum, maximum, and average flight time and average distance traveled of all United Airline flights departing JFK during March 2013
uaflight<- filter(jfkflight, carrier=="UA" & origin=="JFK" & month==3)
summarise(uaflight, min_airtime = min(na.rm = TRUE,air_time),
max_airtime = max(na.rm = TRUE,air_time),
avg_airtime = mean(na.rm = TRUE,air_time),
avg_distance = mean(distance))
# A tibble: 1 x 4
min_airtime max_airtime avg_airtime avg_distance
<dbl> <dbl> <dbl> <dbl>
1 281 394 342.9253 2534.317
dlyflts <-filter(select(flights, dep_delay, month, origin))
junedlys<-filter(dlyflts, dep_delay >=0 & month==6)
junedlys%>% group_by(origin)%>%
summarise(min_delay= min(dep_delay, na.rm = TRUE),
max_delay= (max(dep_delay, na.rm = TRUE)),
avg_delay= (mean(dep_delay, na.rm = TRUE)))
# A tibble: 3 x 4
origin min_delay max_delay avg_delay
<chr> <dbl> <dbl> <dbl>
1 EWR 0 502 44.10844
2 JFK 0 1137 43.00787
3 LGA 0 803 49.27911
fltr_dataset<-filter(select(flights,carrier, origin, air_time, month, dest, distance ))
final<- filter(fltr_dataset, carrier==c("UA","AA") & dest=="ORD" & month %in% c("6","7","8"))
mph<- mutate(final,mph= distance/(air_time/60))
summarise(mph, min_mph = min(na.rm = TRUE,mph),
max_mph = max(na.rm = TRUE,mph),
avg_mph = mean(na.rm = TRUE,mph))
# A tibble: 1 x 3
min_mph max_mph avg_mph
<dbl> <dbl> <dbl>
1 231.4737 495.8621 396.5622