library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 3.4.2
proflights <- tbl_df(flights)
proflights
## # A tibble: 336,776 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
#######################
###Project 1Display the minimum, maximum, and average flight time and average distance 
##traveled of all United Airline2 flights departing JFK during March 2013. 


proflights<- flights%>%
  filter(origin== "JFK", carrier== "UA", month== "3")
proflights
## # A tibble: 378 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     3     1      600            600         0      906
##  2  2013     3     1      607            610        -3      832
##  3  2013     3     1      655            700        -5      954
##  4  2013     3     1      758            800        -2     1106
##  5  2013     3     1      836            840        -4     1111
##  6  2013     3     1     1103           1106        -3     1400
##  7  2013     3     1     1125           1130        -5     1350
##  8  2013     3     1     1423           1425        -2     1728
##  9  2013     3     1     1621           1530        51     1844
## 10  2013     3     1     1723           1729        -6     2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
proflights%>%
  summarise(mean.distance = mean(distance, na.rm=TRUE))
## # A tibble: 1 x 1
##   mean.distance
##           <dbl>
## 1      2534.317
proflights%>%
  summarise(min.air_time = min(air_time, na.rm=TRUE))
## # A tibble: 1 x 1
##   min.air_time
##          <dbl>
## 1          281
proflights%>%
  summarise(max.air_time = max(air_time,na.rm= TRUE))        
## # A tibble: 1 x 1
##   max.air_time
##          <dbl>
## 1          394
#########################################
###Project 2 Display the minimum, maximum, and average departure delays in minutes 
##for June 2013 grouped by airport.  

mmaflights<- tbl_df(flights)
mmaflights
## # A tibble: 336,776 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
mmaflights<-filter(select(flights,dep_delay,month,origin))
mmaflights
## # A tibble: 336,776 x 3
##    dep_delay month origin
##        <dbl> <int>  <chr>
##  1         2     1    EWR
##  2         4     1    LGA
##  3         2     1    JFK
##  4        -1     1    JFK
##  5        -6     1    LGA
##  6        -4     1    EWR
##  7        -5     1    EWR
##  8        -3     1    LGA
##  9        -3     1    JFK
## 10        -2     1    LGA
## # ... with 336,766 more rows
jdelay<- filter(mmaflights,dep_delay>0,month==6)

mmaflights
## # A tibble: 336,776 x 3
##    dep_delay month origin
##        <dbl> <int>  <chr>
##  1         2     1    EWR
##  2         4     1    LGA
##  3         2     1    JFK
##  4        -1     1    JFK
##  5        -6     1    LGA
##  6        -4     1    EWR
##  7        -5     1    EWR
##  8        -3     1    LGA
##  9        -3     1    JFK
## 10        -2     1    LGA
## # ... with 336,766 more rows
jdelay%>%
  group_by(origin)%>%
  summarise(min_delay=min(dep_delay,na.rm=TRUE),
            max_delay=max(dep_delay,na.rm=TRUE),
            avg_delay=mean(dep_delay,na.rm=TRUE))
## # A tibble: 3 x 4
##   origin min_delay max_delay avg_delay
##    <chr>     <dbl>     <dbl>     <dbl>
## 1    EWR         1       502  47.92212
## 2    JFK         1      1137  47.98522
## 3    LGA         1       803  54.96745
##########################################
### Project 3 – Display the minimum, maximum, and average miles traveled per hour for United Airlines (UA) 
##and American Airlines (AA) flights flying between all three airports and Chicago’s O’Hare International Airport (ORD) in June, July, and August 2013. Note: the variable, air_time, in the flights dataset is recorded in minutes, 
##not hours.4 Looks like you must do some mutation!

proflights
## # A tibble: 378 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     3     1      600            600         0      906
##  2  2013     3     1      607            610        -3      832
##  3  2013     3     1      655            700        -5      954
##  4  2013     3     1      758            800        -2     1106
##  5  2013     3     1      836            840        -4     1111
##  6  2013     3     1     1103           1106        -3     1400
##  7  2013     3     1     1125           1130        -5     1350
##  8  2013     3     1     1423           1425        -2     1728
##  9  2013     3     1     1621           1530        51     1844
## 10  2013     3     1     1723           1729        -6     2010
## # ... with 368 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
fltr_dataset<-filter(select(flights,carrier,origin,air_time,month,dest,distance))
final<-filter(fltr_dataset,carrier== c("UA","AA")& dest == "ORD" & month %in% c("6","7","8"))
mph<-mutate(final,mph= distance/(air_time/60))
summarise(mph,min_mph = min(na.rm=TRUE,mph),
          max_mph = max(mph,na.rm =TRUE),
          avg_mph = mean(mph,na.rm=TRUE))
## # A tibble: 1 x 3
##    min_mph  max_mph  avg_mph
##      <dbl>    <dbl>    <dbl>
## 1 231.4737 495.8621 396.5622