library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 3.4.2
proflights <- tbl_df(flights)
proflights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
#######################
###Project 1Display the minimum, maximum, and average flight time and average distance
##traveled of all United Airline2 flights departing JFK during March 2013.
proflights<- flights%>%
filter(origin== "JFK", carrier== "UA", month== "3")
proflights
## # A tibble: 378 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 3 1 600 600 0 906
## 2 2013 3 1 607 610 -3 832
## 3 2013 3 1 655 700 -5 954
## 4 2013 3 1 758 800 -2 1106
## 5 2013 3 1 836 840 -4 1111
## 6 2013 3 1 1103 1106 -3 1400
## 7 2013 3 1 1125 1130 -5 1350
## 8 2013 3 1 1423 1425 -2 1728
## 9 2013 3 1 1621 1530 51 1844
## 10 2013 3 1 1723 1729 -6 2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
proflights%>%
summarise(mean.distance = mean(distance, na.rm=TRUE))
## # A tibble: 1 x 1
## mean.distance
## <dbl>
## 1 2534.317
proflights%>%
summarise(min.air_time = min(air_time, na.rm=TRUE))
## # A tibble: 1 x 1
## min.air_time
## <dbl>
## 1 281
proflights%>%
summarise(max.air_time = max(air_time,na.rm= TRUE))
## # A tibble: 1 x 1
## max.air_time
## <dbl>
## 1 394
#########################################
###Project 2 Display the minimum, maximum, and average departure delays in minutes
##for June 2013 grouped by airport.
mmaflights<- tbl_df(flights)
mmaflights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
mmaflights<-filter(select(flights,dep_delay,month,origin))
mmaflights
## # A tibble: 336,776 x 3
## dep_delay month origin
## <dbl> <int> <chr>
## 1 2 1 EWR
## 2 4 1 LGA
## 3 2 1 JFK
## 4 -1 1 JFK
## 5 -6 1 LGA
## 6 -4 1 EWR
## 7 -5 1 EWR
## 8 -3 1 LGA
## 9 -3 1 JFK
## 10 -2 1 LGA
## # ... with 336,766 more rows
jdelay<- filter(mmaflights,dep_delay>0,month==6)
mmaflights
## # A tibble: 336,776 x 3
## dep_delay month origin
## <dbl> <int> <chr>
## 1 2 1 EWR
## 2 4 1 LGA
## 3 2 1 JFK
## 4 -1 1 JFK
## 5 -6 1 LGA
## 6 -4 1 EWR
## 7 -5 1 EWR
## 8 -3 1 LGA
## 9 -3 1 JFK
## 10 -2 1 LGA
## # ... with 336,766 more rows
jdelay%>%
group_by(origin)%>%
summarise(min_delay=min(dep_delay,na.rm=TRUE),
max_delay=max(dep_delay,na.rm=TRUE),
avg_delay=mean(dep_delay,na.rm=TRUE))
## # A tibble: 3 x 4
## origin min_delay max_delay avg_delay
## <chr> <dbl> <dbl> <dbl>
## 1 EWR 1 502 47.92212
## 2 JFK 1 1137 47.98522
## 3 LGA 1 803 54.96745
##########################################
### Project 3 – Display the minimum, maximum, and average miles traveled per hour for United Airlines (UA)
##and American Airlines (AA) flights flying between all three airports and Chicago’s O’Hare International Airport (ORD) in June, July, and August 2013. Note: the variable, air_time, in the flights dataset is recorded in minutes,
##not hours.4 Looks like you must do some mutation!
proflights
## # A tibble: 378 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 3 1 600 600 0 906
## 2 2013 3 1 607 610 -3 832
## 3 2013 3 1 655 700 -5 954
## 4 2013 3 1 758 800 -2 1106
## 5 2013 3 1 836 840 -4 1111
## 6 2013 3 1 1103 1106 -3 1400
## 7 2013 3 1 1125 1130 -5 1350
## 8 2013 3 1 1423 1425 -2 1728
## 9 2013 3 1 1621 1530 51 1844
## 10 2013 3 1 1723 1729 -6 2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
fltr_dataset<-filter(select(flights,carrier,origin,air_time,month,dest,distance))
final<-filter(fltr_dataset,carrier== c("UA","AA")& dest == "ORD" & month %in% c("6","7","8"))
mph<-mutate(final,mph= distance/(air_time/60))
summarise(mph,min_mph = min(na.rm=TRUE,mph),
max_mph = max(mph,na.rm =TRUE),
avg_mph = mean(mph,na.rm=TRUE))
## # A tibble: 1 x 3
## min_mph max_mph avg_mph
## <dbl> <dbl> <dbl>
## 1 231.4737 495.8621 396.5622