#2.1 A worked example
#load package
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# load data
data(CPS85 , package = "mosaicData")
# specify dataset and mapping
library(ggplot2)
ggplot(data = CPS85,
mapping = aes(x = exper, y = wage))
data frame containing the data to be plotted
the mapping of the variables to visual properties of the graph. The mappings are placed within the aes function (where aes stands for aesthetics).
# add points
ggplot(data = CPS85,
mapping = aes(x = exper, y = wage)) +
geom_point()
# delete outlier
library(dplyr)
plotdata <- filter(CPS85, wage < 40)
# redraw scatterplot
ggplot(data = plotdata,
mapping = aes(x = exper, y = wage)) +
geom_point()
# make points blue, larger, and semi-transparent
ggplot(data = plotdata,
mapping = aes(x = exper, y = wage)) +
geom_point(color = "cornflowerblue",
alpha = .7,
size = 3)
# add a line of best fit.
ggplot(data = plotdata,
mapping = aes(x = exper, y = wage)) +
geom_point(color = "cornflowerblue",
alpha = .7,
size = 3) +
geom_smooth(method = "lm")
They are added using functions that start with geom_. In this example, we’ll add points using the geom_point function, creating a scatterplot
# indicate sex using color
ggplot(data = plotdata,
mapping = aes(x = exper,
y = wage,
color = sex)) +
geom_point(alpha = .7,
size = 3) +
geom_smooth(method = "lm",
se = FALSE,
size = 1.5)
In addition to mapping variables to the x and y axes, variables can be mapped to the color, shape, size, transparency, and other visual characteristics of geometric objects. This allows groups of observations to be superimposed in a single graph.
# indicate sex using color
ggplot(data = plotdata,
mapping = aes(x = exper,
y = wage,
color = sex)) +
geom_point(alpha = .7,
size = 3) +
geom_smooth(method = "lm",
se = FALSE,
size = 1.5)
Scales control how variables are mapped to the visual characteristics of the plot. Scale functions (which start with scale_) allow you to modify this mapping. In the next plot, we’ll change the x and y axis scaling, and the colors employed.
# reproduce plot for each level of job sector
ggplot(data = plotdata,
mapping = aes(x = exper,
y = wage,
color = sex)) +
geom_point(alpha = .7) +
geom_smooth(method = "lm",
se = FALSE) +
scale_x_continuous(breaks = seq(0, 60, 10)) +
scale_y_continuous(breaks = seq(0, 30, 5),
label = scales::dollar) +
scale_color_manual(values = c("indianred3",
"cornflowerblue")) +
facet_wrap(~sector)
Facets reproduce a graph for each level a given variable (or combination of variables). Facets are created using functions that start with facet_. Here, facets will be defined by the eight levels of the sector variable.
# add informative labels
ggplot(data = plotdata,
mapping = aes(x = exper,
y = wage,
color = sex)) +
geom_point(alpha = .7) +
geom_smooth(method = "lm",
se = FALSE) +
scale_x_continuous(breaks = seq(0, 60, 10)) +
scale_y_continuous(breaks = seq(0, 30, 5),
label = scales::dollar) +
scale_color_manual(values = c("indianred3",
"cornflowerblue")) +
facet_wrap(~sector) +
labs(title = "Relationship between wages and experience",
subtitle = "Current Population Survey",
caption = "source: http://mosaic-web.org/",
x = " Years of Experience",
y = "Hourly Wage",
color = "Gender")
Graphs should be easy to interpret and informative labels are a key element in achieving this goal. The labs function provides customized labels for the axes and legends. Additionally, a custom title, subtitle, and caption can be added.
# use a minimalist theme
ggplot(data = plotdata,
mapping = aes(x = exper,
y = wage,
color = sex)) +
geom_point(alpha = .6) +
geom_smooth(method = "lm",
se = FALSE) +
scale_x_continuous(breaks = seq(0, 60, 10)) +
scale_y_continuous(breaks = seq(0, 30, 5),
label = scales::dollar) +
scale_color_manual(values = c("indianred3",
"cornflowerblue")) +
facet_wrap(~sector) +
labs(title = "Relationship between wages and experience",
subtitle = "Current Population Survey",
caption = "source: http://mosaic-web.org/",
x = " Years of Experience",
y = "Hourly Wage",
color = "Gender") +
theme_minimal()
Finally, we can fine tune the appearance of the graph using themes. Theme functions (which start with theme_) control background colors, fonts, grid-lines, legend placement, and other non-data related features of the graph. Let’s use a cleaner theme.