This file covers basic plotting functions with ggplot2. Material covered includes: - Plotting by category by color, shape, etc. - Plotting multiple geoms - Plotting and highlighting subsets of data - Transforming data with stat_count() to determine best fit plot type - Aesthetic adjustments - Plot template at end
Ctrl + Alt + I #creates new chunks Ctrl + Enter #to run selected line Ctrl + Shift + Enter #to run whole chunk
when kniting: more # = smaller text on printed documents
# install.packages("tidyverse")
# install.packages("knitr")
# remove.packages()
library("tidyverse")
library("knitr")
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 224 more rows
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
#displ is engine size, hwy is fuel efficency
#geom_points creates scatter plot, aes() is used to select x and y variables
#ggplot(data = <DATA>) +
# <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = displ < 5))
ggplot(data = mpg)
+ geom_point(mapping = aes(x = displ, y = hwy))
## Error:
## ! Cannot use `+` with a single argument.
## ℹ Did you accidentally put `+` on a new line?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2) #~ for object, nrow for no. of rows
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))+
facet_grid(. ~ cyl)
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
#geom_point(mapping = aes(x = displ, y = hwy)) # points horrible
geom_smooth(mapping = aes(x = displ, y = hwy)) # smooth line better
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
ggplot(data = mpg) +
geom_smooth(
mapping = aes(x = displ, y = hwy, color = drv),
show.legend = FALSE,
)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = mpg, mapping = aes(x = displ, y = hwy))
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE)
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_line()
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
demo <- tribble(
~cut, ~freq,
"Fair", 1610,
"Good", 4906,
"Very Good", 12082,
"Premium", 13791,
"Ideal", 21551)
demo
## # A tibble: 5 × 2
## cut freq
## <chr> <dbl>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
ggplot(data = demo) +
geom_bar(mapping = aes(x = cut, y = freq), stat = "identity")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity") #Identity uses raw data points instead
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 3/5, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) +
geom_bar(fill = NA, position = "identity")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")
#ggplot(data = <DATA>) +
# <GEOM_FUNCTION>(
# mapping = aes(<MAPPINGS>),
# stat = <STAT>,
# position = <POSITION>
# ) +
# <FACET_FUNCTION>