knitr::opts_chunk$set(echo = TRUE)
#Project 1 = Dislpay the minimum, maximum, and average flight time and average distance traveled of all United Airline flights departing JFK during March 2013. First the variables in the dataset. Variables = airtime, carrier, month, origin, and distance.
library(nycflights13)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(magrittr)
jfk1<- filter(select(flights,carrier, origin, distance, air_time, month ))
jfk1
## # A tibble: 336,776 x 5
## carrier origin distance air_time month
## <chr> <chr> <dbl> <dbl> <int>
## 1 UA EWR 1400 227 1
## 2 UA LGA 1416 227 1
## 3 AA JFK 1089 160 1
## 4 B6 JFK 1576 183 1
## 5 DL LGA 762 116 1
## 6 UA EWR 719 150 1
## 7 B6 EWR 1065 158 1
## 8 EV LGA 229 53 1
## 9 B6 JFK 944 140 1
## 10 AA LGA 733 138 1
## # ... with 336,766 more rows
filter(jfk1, carrier=="UA" & origin=="JFK" & month==3)
## # A tibble: 378 x 5
## carrier origin distance air_time month
## <chr> <chr> <dbl> <dbl> <int>
## 1 UA JFK 2586 342 3
## 2 UA JFK 2475 292 3
## 3 UA JFK 2586 343 3
## 4 UA JFK 2586 342 3
## 5 UA JFK 2475 301 3
## 6 UA JFK 2586 338 3
## 7 UA JFK 2475 307 3
## 8 UA JFK 2586 337 3
## 9 UA JFK 2475 300 3
## 10 UA JFK 2586 320 3
## # ... with 368 more rows
uaflight<- filter(jfk1, carrier=="UA" & origin=="JFK" & month==3)
summarise(uaflight, min_airtime = min(na.rm = TRUE,air_time),
max_airtime = max(na.rm = TRUE,air_time),
avg_airtime = mean(na.rm = TRUE,air_time),
avg_distance = mean(distance))
## # A tibble: 1 x 4
## min_airtime max_airtime avg_airtime avg_distance
## <dbl> <dbl> <dbl> <dbl>
## 1 281 394 342.9253 2534.317
#Project 2 = Display the Minimum, maximum, and average departure delays in minutes for June 2013 grouped by airport.
delays <-filter(select(flights, dep_delay, month, origin))
june13_delay<-filter(delays,dep_delay >0 & month==6)
june13_delay%>% group_by(origin)%>%
summarise(min_delay= min(dep_delay, na.rm = TRUE),
max_delay= (max(dep_delay, na.rm = TRUE)),
avg_delay= (mean(dep_delay, na.rm = TRUE)))
## # A tibble: 3 x 4
## origin min_delay max_delay avg_delay
## <chr> <dbl> <dbl> <dbl>
## 1 EWR 1 502 47.92212
## 2 JFK 1 1137 47.98522
## 3 LGA 1 803 54.96745
#Project 3 Display the minimum, maximum, and average miles traveled per hour for United Airlines (UA) and American Airlines (AA) flights flying between all three airports and Chicago’s O ’Hare International Airport (ORD) in June, July, and August 2013.
allflightdata <-filter(select(flights,carrier, origin, air_time, month, dest, distance ))
results<- filter(allflightdata, carrier==c("UA","AA") & dest=="ORD" & month %in% c("6","7","8"))
mph<- mutate(results,mph= distance/(air_time/60))
summarise(mph, min_mph = min(na.rm = TRUE,mph),
max_mph = max(na.rm = TRUE,mph),
avg_mph = mean(na.rm = TRUE,mph))
## # A tibble: 1 x 3
## min_mph max_mph avg_mph
## <dbl> <dbl> <dbl>
## 1 231.4737 495.8621 396.5622
Personal Reflection: I feel like I’m still struggling with some of the RStudio functions….and why I have to keep reloading libraries and packages even when I haven’t started a new session. Though this is done, I feel like it’s sloppy and I’m not quite proficient.