install.packages(“nycflights13”) library(nycflights13)
install.packages(“dplyr”)
library(dplyr) View(flights)
glimpse(flights)
summary(flights)
dim(flights)
summarise(flights, delay=mean(dep_delay,na.rm=TRUE))
sortf <- arrange(flights,desc(dep_delay))
select(sortf, carrier, flight, tailnum, everything())
select(flights, starts_with(“dep”))
maxdep <- max(flights$dep_delay, na.rm=TRUE)
maxdep_id <- which(flights$dep_delay==maxdep)
flights[maxdep_id, 10:12]
flights %>%
group_by(year, month, day) %>%
summarise(mean = mean(dep_delay, na.rm = TRUE))
not_cancelled <- flights %>%
filter(!is.na(dep_delay))
not_cancelled %>%
group_by(year, month, day) %>%
summarise(mean = mean(dep_delay))
avg_delay_per_tailnum <- not_cancelled %>% group_by(tailnum) %>% summarise(avg_arr_delay = mean(arr_delay)) %>% arrange(avg_arr_delay) lowest_delay_tailnum <- head(avg_delay_per_tailnum, 1) print(lowest_delay_tailnum)
not_cancelled %>%
group_by(year, month, day) %>%
summarise(
first = min(dep_time),
last = max(dep_time)
)
monthly_delay_proportion <- flights %>% group_by(month) %>% summarise( total_flights = n(), delayed_over_hour = sum(dep_delay > 60, na.rm = TRUE), proportion = delayed_over_hour / total_flights ) %>% arrange(desc(proportion))
carrier_count <- flights %>% group_by(dest) %>% summarise(num_carriers = n_distinct(carrier)) %>% arrange(desc(num_carriers))
delays <- flights %>%
group_by(dest) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter(count > 20, dest != “HNL”)