install.packages(“nycflights13”) library(nycflights13) install.packages(“dplyr”) library(dplyr) View(flights) glimpse(flights) summary(flights)
sortf <- arrange(flights,desc(dep_delay)) select(sortf, carrier, flight, tailnum, everything())
maxdep <- max(flights\(dep_delay, na.rm=TRUE) maxdep_id <- which(flights\)dep_delay==maxdep) flights[maxdep_id, 10:12]
not_cancelled <- flights %>% filter(!is.na(dep_delay))
not_cancelled %>% group_by(year, month, day) %>% summarise(mean = mean(dep_delay))
flights %>% group_by(year, month, day) %>% summarise(mean = mean(dep_delay, na.rm = TRUE))
avg_delay_per_tailnum <- not_cancelled %>% group_by(tailnum) %>% summarise(avg_arr_delay = mean(arr_delay)) %>% arrange(avg_arr_delay) lowest_delay_tailnum <- head(avg_delay_per_tailnum, 1) print(lowest_delay_tailnum)
not_cancelled %>% group_by(year, month, day) %>% summarise( first = min(dep_time), last = max(dep_time) )
flights %>% mutate(hour_delay = dep_delay > 60) %>% group_by(month) %>% summarise(prop = mean(hour_delay, na.rm = TRUE)) %>% arrange(desc(prop))
flights %>% group_by(dest) %>% summarise(carriers = n_distinct(carrier)) %>% arrange(desc(carriers))
delays <- flights %>% group_by(dest) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) ) %>% filter(count > 20, dest != “HNL”)