install.packages(“nycflights13”) library(nycflights13) install.packages(“dplyr”)
library(dplyr) View(flights)
glimpse(flights)
summary(flights)
dim(flights)

summarise(flights, delay=mean(dep_delay,na.rm=TRUE))

sortf <- arrange(flights,desc(dep_delay))

select(sortf, carrier, flight, tailnum, everything())

select(flights, starts_with(“dep”))

maxdep <- max(flights$dep_delay, na.rm=TRUE)

maxdep_id <- which(flights$dep_delay==maxdep)

flights[maxdep_id, 10:12]

flights %>%

group_by(year, month, day) %>%

summarise(mean = mean(dep_delay, na.rm = TRUE))

not_cancelled <- flights %>%

filter(!is.na(dep_delay))

not_cancelled %>%

group_by(year, month, day) %>%

summarise(mean = mean(dep_delay))

avg_delay_per_tailnum <- not_cancelled %>% group_by(tailnum) %>% summarise(avg_arr_delay = mean(arr_delay)) %>% arrange(avg_arr_delay) lowest_delay_tailnum <- head(avg_delay_per_tailnum, 1) print(lowest_delay_tailnum)

not_cancelled %>%

group_by(year, month, day) %>%

summarise(

first = min(dep_time),

last = max(dep_time)

)

monthly_delay_proportion <- flights %>% group_by(month) %>% summarise( total_flights = n(), delayed_over_hour = sum(dep_delay > 60, na.rm = TRUE), proportion = delayed_over_hour / total_flights ) %>% arrange(desc(proportion))

carrier_count <- flights %>% group_by(dest) %>% summarise(num_carriers = n_distinct(carrier)) %>% arrange(desc(num_carriers))

delays <- flights %>%

group_by(dest) %>%

summarise(

count = n(),

dist = mean(distance, na.rm = TRUE),

delay = mean(arr_delay, na.rm = TRUE)

) %>%

filter(count > 20, dest != “HNL”)