library(tidyverse)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
hist(mpg$cyl, col = rgb(0,0,1,1/4))
hist(mpg$cty, col = 'skyblue', xlim = c(0,50), ylim = c(0,100))
hist(mpg$hwy, col = scales::alpha('red',.5), add = TRUE)
ggplot(mpg, aes(cty, fill = drv)) +
geom_histogram(alpha = 0.5, bins=10, aes(y = ..count..))
ggplot(data = mpg) +
geom_bar(mapping = aes(x = class))
ggplot(data = mpg) +
geom_bar(mapping = aes(x = class, y = ..prop.., group = 1))
ggplot(data = mpg) +
geom_bar(mapping = aes(x = class, fill = drv))
ggplot(data = mpg) +
geom_bar(mapping = aes(x = class, fill = drv), position = "fill")
ggplot(data = mpg) +
geom_bar(mapping = aes(x = class, fill = drv), position = "dodge")
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()
ggplot(data = mpg, aes(x = class, y = hwy)) + geom_boxplot(aes(fill = drv))
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
geom_jitter()
ggplot(data = mpg, mapping = aes(x = drv, y = hwy)) +
geom_boxplot() +
facet_wrap(~ class, nrow = 3)
library(nycflights13)
filter(flights, month == 1, day == 1)
filter(flights, month %in% c(11, 12))
filter(flights, !(arr_delay > 120 | dep_delay > 120))
filter(flights, is.na(dep_time))
filter(flights, between(distance, 700, 1000))
arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))
arrange(flights, desc(dep_delay), arr_delay)
arrange(flights, desc(is.na(dep_time)))
colnames(flights)
## [1] "year" "month" "day" "dep_time"
## [5] "sched_dep_time" "dep_delay" "arr_time" "sched_arr_time"
## [9] "arr_delay" "carrier" "flight" "tailnum"
## [13] "origin" "dest" "air_time" "distance"
## [17] "hour" "minute" "time_hour"
select(flights, c(year, month, day))
select(flights, c(year:day))
select(flights, -c(year,day))
select(flights, c(1, 3))
select(flights, c(1:3))
select(flights, -c(1, 3))
select(flights, c(1:3, 5:7))
select(flights, contains('time'))
Some helper functions:
mutate(flights,
gain = dep_delay - arr_delay,
speed = distance / air_time * 60
)
transmute(flights,
gain = dep_delay - arr_delay,
hours = air_time / 60,
gain_per_hour = gain / hours
)
Functions for creating new variables:
flights %>%
group_by(dest) %>%
summarise(dist = mean(distance, na.rm = TRUE))
flights %>%
group_by(dest) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
)
flights %>%
group_by(year, month, day) %>%
summarise(mean = mean(dep_delay, na.rm = TRUE))
Some helper functions can be used: