install.packages("nycflights13")
install.packages("dplyr")
library(nycflights13)
library(dplyr)
flights
?flights
str(flights)filter())arrange())select())mutate())summarise())group_by()filter()filter(flights, month == 1, day == 1)
jan1 <- filter(flights, month == 1, day == 1)
(dec25 <- filter(flights, month == 12, day == 25))>, >=, <, <=, !=, ==Encuentre todos los vuelos que:
arrange()arrange(flights, year, month, day)
arrange(flights, desc(arr_delay))select()select()select(flights, year, month, day) ## Selecciona año, mes y día
select(flights, year:day) ## Selecciona todas las columnas entre año y día
select(flights, -(year:day)) ## Selecciona todas las columnas menos las que están entre año y díastarts_with("abc")ends_with("xyz")contains("ijk")mutate()flights_sml <- select(flights,
year:day,
ends_with("delay"),
distance,
air_time
)
mutate(flights_sml,
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
)mutate(flights_sml,
gain = arr_delay - dep_delay,
hours = air_time / 60,
gain_per_hour = gain / hours
)transmutate()transmute(flights,
gain = arr_delay - dep_delay,
hours = air_time / 60,
gain_per_hour = gain / hours
)summarize(flights, delay = mean(dep_delay, na.rm = TRUE))by_day <- group_by(flights, year, month, day)
summarize(by_day, delay = mean(dep_delay, na.rm = TRUE))%>% The pipeby_dest <- group_by(flights, dest)
delay <- summarize(by_dest,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
)
delay <- filter(delay, count >20, dest != "HNL")
ggplot(data = delay, mapping = aes(x = dist, y = delay)) +
geom_point(aes(size = count), alpha = 1/3) +
geom_smooth(se = FALSE)%>% The pipedelays <- flights %>%
group_by(dest) %>%
summarize(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter(count > 20, dest != "HNL")
ggplot(data = delay, mapping = aes(x = dist, y = delay)) +
geom_point(aes(size = count), alpha = 1/3) +
geom_smooth(se = FALSE)