Libraries
library(tidyverse)
library(forcats)
library(lubridate)
library(ggthemes)
library(quantreg)
library(openair)
library(lattice)
library(RColorBrewer)
library(rvest)
Data
data("iris")
data("mtcars")
data("diamonds")
data("economics")
mtcars$fcyl <- as.factor(mtcars$cyl)
mtcars$fam <- as.factor(mtcars$am)
mtcars$car <- row.names(mtcars)
load("~/R Data/fish.RData")
Vocab <- read.csv("https://raw.githubusercontent.com/sigmasigmaiota/vocab/master/vocab.csv")
gm2007 <- read.csv("gm2007.csv")
head(Vocab)
## X.1 X year sex education vocabulary
## 1 1 19740001 1974 Male 14 9
## 2 2 19740002 1974 Male 16 9
## 3 3 19740003 1974 Female 10 9
## 4 4 19740004 1974 Female 10 5
## 5 5 19740005 1974 Female 12 8
## 6 6 19740006 1974 Male 16 8
The ability to produce meaningful and beautiful data visualizations is an essential part of your skill set as a data scientist. This course, the first R data visualization tutorial in the series, introduces you to the principles of good visualizations and the grammar of graphics plotting concepts implemented in the ggplot2 package.
In this chapter we’ll get you into the right frame of mind for developing meaningful visualizations with R. You’ll understand that as a communications tool, visualizations require you to think about your audience first. You’ll also be introduced to the basics of ggplot2 - the 7 different grammatical elements (layers) and aesthetic mappings.
# Explore the mtcars data frame with str()
str(mtcars)
## 'data.frame': 32 obs. of 14 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
## $ fcyl: Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
## $ fam : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
## $ car : chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
# Execute the following command
ggplot(mtcars, aes(cyl, mpg)) +
geom_point()
# Change the command below so that cyl is treated as factor
ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_point()
he data, aesthetics, and geom layers. We’ll get to making pretty plots in the last chapter with the themes layer.
# Edit to add a color aesthetic mapped to disp
ggplot(mtcars, aes(wt, mpg, color = disp)) +
geom_point()
# Change the color aesthetic to a size aesthetic
ggplot(mtcars, aes(wt, mpg, size = disp)) +
geom_point()
The diamonds dataset contains details of 1,000 diamonds. Among the variables included are carat (a measurement of the diamond’s size) and price.
# Add geom_point() with +
ggplot(diamonds, aes(carat, price)) +
geom_point()
# Add geom_smooth() with +
ggplot(diamonds, aes(carat, price)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Map the color aesthetic to clarity
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Make the points 40% opaque
ggplot(diamonds, aes(carat, price, color = clarity)) +
geom_point(alpha = 0.4) +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Draw a ggplot
plt_price_vs_carat <- ggplot(diamonds, aes(carat, price))
# Add a point layer to plt_price_vs_carat
plt_price_vs_carat + geom_point()
# Edit this to make points 20% opaque: plt_price_vs_carat_transparent
plt_price_vs_carat_transparent <- plt_price_vs_carat + geom_point(alpha = 0.2)
# See the plot
plt_price_vs_carat_transparent
# Edit this to map color to clarity, # Assign the updated plot to a new object
plt_price_vs_carat_by_clarity <- plt_price_vs_carat + geom_point(aes(color = clarity))
# See the plot
plt_price_vs_carat_by_clarity
Aesthetic mappings are the cornerstone of the grammar of graphics plotting concept. This is where the magic happens - converting continuous and categorical data into visual scales that provide access to a large amount of information in a very short time. In this chapter you’ll understand how to choose the best aesthetic mappings for your data.
These are the aesthetics you can consider within aes() in this chapter: x, y, color, fill, size, alpha, labels and shape.
In the following exercise the fcyl column is categorical. It is cyl transformed into a factor.
mtcars$fcyl <- as.factor(mtcars$cyl)
# Map x to mpg and y to fcyl
ggplot(mtcars, aes(x=mpg, y=fcyl)) +
geom_point()
# Swap mpg and fcyl
ggplot(mtcars, aes(x=fcyl, y=mpg)) +
geom_point()
# Map x to wt, y to mpg and color to fcyl
ggplot(mtcars, aes(x=wt,y= mpg, color=fcyl)) +
geom_point()
ggplot(mtcars, aes(wt, mpg, color = fcyl)) +
# Set the shape and size of the points
geom_point(shape=1, size=4)
# Map fcyl to fill
ggplot(mtcars, aes(wt, mpg, fill = fcyl)) +
geom_point(shape = 1, size = 4)
ggplot(mtcars, aes(wt, mpg, fill = cyl)) +
# Change point shape; set alpha
geom_point(shape = 21, size = 4, alpha=0.6)
# Map color to fam
ggplot(mtcars, aes(wt, mpg, fill = cyl, color = am)) +
geom_point(shape = 21, size = 4, alpha = 0.6)
# Establish the base layer
plt_mpg_vs_wt <- ggplot(mtcars, aes(wt, mpg))
# Map fcyl to size
plt_mpg_vs_wt +
geom_point(aes(size = fcyl))
## Warning: Using size for a discrete variable is not advised.
# Map fcyl to alpha, not size
plt_mpg_vs_wt +
geom_point(aes(alpha = fcyl))
## Warning: Using alpha for a discrete variable is not advised.
# Map fcyl to shape, not alpha
plt_mpg_vs_wt +
geom_point(aes(shape = fcyl))
# Use text layer and map fcyl to label
plt_mpg_vs_wt +
geom_text(aes(label = fcyl))
his time you’ll use these arguments to set attributes of the plot, not map variables onto aesthetics.
You can specify colors in R using hex codes: a hash followed by two hexadecimal numbers each for red, green, and blue (“#RRGGBB”). Hexadecimal is base-16 counting. You have 0 to 9, and A representing 10 up to F representing 15. Pairs of hexadecimal numbers give you a range from 0 to 255. “#000000” is “black” (no color), “#FFFFFF” means “white”, and `“#00FFFF” is cyan (mixed green and blue).
# A hexadecimal color
my_blue <- "#4ABEFF"
ggplot(mtcars, aes(wt, mpg)) +
# Set the point color and alpha
geom_point(color = my_blue, alpha = 0.6)
# Change the color mapping to a fill mapping
ggplot(mtcars, aes(wt, mpg, fill = cyl)) +
# Set point size and shape
geom_point(color = my_blue, size = 10, shape = 1)
ggplot(mtcars, aes(wt, mpg, color = fcyl)) +
# Add point layer with alpha 0.5
geom_point(alpha = 0.5)
ggplot(mtcars, aes(wt, mpg, color = cyl)) +
# Add text layer with label rownames(mtcars) and color red
geom_text(label =rownames(mtcars), color = "red")
ggplot(mtcars, aes(wt, mpg, color = fcyl)) +
# Add points layer with shape 24 and color yellow
geom_point(color = "yellow", shape = 24)
# 3 aesthetics: qsec vs. mpg, colored by fcyl
ggplot(mtcars, aes(mpg, qsec, color = mtcars$fcyl)) +
geom_point()
## Warning: Use of `mtcars$fcyl` is discouraged. Use `fcyl` instead.
# 4 aesthetics: add a mapping of shape to fam
ggplot(mtcars, aes(mpg, qsec, shape = mtcars$fam, color = mtcars$fcyl)) +
geom_point()
## Warning: Use of `mtcars$fam` is discouraged. Use `fam` instead.
## Use of `mtcars$fcyl` is discouraged. Use `fcyl` instead.
# 5 aesthetics: add a mapping of size to hp / wt
ggplot(mtcars, aes(mpg, qsec, color = fcyl, shape = fam, size = hp/wt)) +
geom_point()
In this exercise, you’ll modify some aesthetics to make a bar plot of the number of cylinders for cars with different types of transmission.
You’ll also make use of some functions for improving the appearance of the plot.
ggplot(mtcars, aes(as.factor(cyl), fill = as.factor(am))) +
geom_bar() +
# Set the axis labels
labs(x= "Number of Cylinders", y= "Count" )
palette <- c(automatic = "#377EB8", manual = "#E41A1C")
ggplot(mtcars, aes(as.factor(cyl), fill = as.factor(am))) +
geom_bar() +
labs(x = "Number of Cylinders", y = "Count") +
# Set the fill color scale
scale_fill_manual("Transmission", values = palette)
palette <- c(automatic = "#377EB8", manual = "#E41A1C")
# Set the position
ggplot(mtcars, aes(as.factor(cyl), fill = as.factor(am))) +
geom_bar(position = "dodge") +
labs(x = "Number of Cylinders", y = "Count")
scale_fill_manual("Transmission", values = palette)
## <ggproto object: Class ScaleDiscrete, Scale, gg>
## aesthetics: fill
## axis_order: function
## break_info: function
## break_positions: function
## breaks: waiver
## call: call
## clone: function
## dimension: function
## drop: TRUE
## expand: waiver
## get_breaks: function
## get_breaks_minor: function
## get_labels: function
## get_limits: function
## guide: legend
## is_discrete: function
## is_empty: function
## labels: waiver
## limits: automatic manual
## make_sec_title: function
## make_title: function
## map: function
## map_df: function
## n.breaks.cache: NULL
## na.translate: TRUE
## na.value: grey50
## name: Transmission
## palette: function
## palette.cache: NULL
## position: left
## range: <ggproto object: Class RangeDiscrete, Range, gg>
## range: NULL
## reset: function
## train: function
## super: <ggproto object: Class RangeDiscrete, Range, gg>
## rescale: function
## reset: function
## scale_name: manual
## train: function
## train_df: function
## transform: function
## transform_df: function
## super: <ggproto object: Class ScaleDiscrete, Scale, gg>
In the last chapter you saw that all the visible aesthetics can serve as attributes and aesthetics, but I very conveniently left out x and y. That’s because although you can make univariate plots (such as histograms, which you’ll get to in the next chapter), a y-axis will always be provided, even if you didn’t ask for it.
When using setting y-axis limits, you can specify the limits as separate arguments, or as a single numeric vector. That is, ylim(lo, hi) or ylim(c(lo, hi)).
# Plot 0 vs. mpg
ggplot(mtcars, aes(mpg, 0)) +
# Add jitter
geom_point(position="jitter")
ggplot(mtcars, aes(mpg, 0)) +
geom_jitter() +
# Set the y-axis limits
ylim(-2, 2)
A plot’s geometry dictates what visual elements will be used. In this chapter, we’ll familiarize you with the geometries used in the three most common plot types you’ll encounter - scatter plots, bar charts and line plots. We’ll look at a variety of different ways to construct these plots.
Scatter plots (using geom_point()) are intuitive, easily understood, and very common, but we must always consider overplotting, particularly in the following four situations:
Typically, alpha blending (i.e. adding transparency) is recommended when using solid shapes. Alternatively, you can use opaque, hollow shapes.
Small points are suitable for large datasets with regions of high density (lots of overlapping).
# Plot price vs. carat, colored by clarity
plt_price_vs_carat_by_clarity <- ggplot(diamonds, aes(carat, price, color = clarity))
# Add a point layer with tiny points
plt_price_vs_carat_by_clarity + geom_point(alpha = 0.5, shape = ".", size = 1)
# Set transparency to 0.5
plt_price_vs_carat_by_clarity + geom_point(alpha = 0.5, shape = 16)
# Plot base
plt_mpg_vs_fcyl_by_fam <- ggplot(mtcars, aes(fcyl, mpg, color = fam))
# Default points are shown for comparison
plt_mpg_vs_fcyl_by_fam + geom_point()
# Default points are shown for comparison
plt_mpg_vs_fcyl_by_fam + geom_point()
# Alter the point positions by jittering, width 0.3
plt_mpg_vs_fcyl_by_fam + geom_point(position = position_jitter(width = 0.3))
# Default points are shown for comparison
plt_mpg_vs_fcyl_by_fam + geom_point()
# Now jitter and dodge the point positions
plt_mpg_vs_fcyl_by_fam + geom_point(position = position_jitterdodge(jitter.width= 0.3, dodge.width=0.3))
You already saw how to deal with overplotting when using geom_point() in two cases:
ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) +
# Swap for jitter layer with width 0.1
geom_jitter(width = 0.1, alpha = 0.5)
ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) +
# Set the position to jitter
geom_point(position = "jitter", alpha = 0.5)
ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) +
# Use a jitter position function with width 0.1
geom_point(position = position_jitter(width = 0.1), alpha = 0.5)
# Plot vocabulary vs. education
ggplot(Vocab, aes(education, vocabulary)) +
# Add a point layer
geom_point()
ggplot(Vocab, aes(education, vocabulary)) +
# Change to a jitter layer
geom_jitter()
ggplot(Vocab, aes(education, vocabulary)) +
# Set the transparency to 0.2
geom_jitter(alpha = 0.2)
ggplot(Vocab, aes(education, vocabulary)) +
# Set the shape to 1
geom_jitter(alpha = 0.2, shape = 1)
Notice how jittering and alpha blending serves as a great solution to
the overplotting problem here. Setting the shape to 1 didn’t really
help, but it was useful in the previous exercises when you had less
data. You need to consider each plot individually. You’ll encounter this
dataset again when you look at bar plots.
Recall that histograms cut up a continuous variable into discrete bins and, by default, maps the internally calculated count variable (the number of observations in each bin) onto the y aesthetic. An internal variable called density can be accessed by using the .. notation, i.e. ..density… Plotting this variable will show the relative frequency, which is the height times the width of each bin.
ggplot(mtcars, aes(mpg)) +
# Set the binwidth to 1
geom_histogram(binwidth = 1)
# Map y to ..density..
ggplot(mtcars, aes(mpg, ..density..)) +
geom_histogram(binwidth = 1)
datacamp_light_blue <- "#51A8C9"
ggplot(mtcars, aes(mpg, ..density..)) +
# Set the fill color to datacamp_light_blue
geom_histogram(binwidth = 1, fill = datacamp_light_blue)
Here, we’ll examine the various ways of applying positions to histograms. geom_histogram(), a special case of geom_bar(), has a position argument that can take on the following values:
# Update the aesthetics so the fill color is by fam
ggplot(mtcars, aes(mpg, fill = fam)) +
geom_histogram(binwidth = 1)
ggplot(mtcars, aes(mpg, fill = fam)) +
# Change the position to dodge
geom_histogram(position = "dodge", binwidth = 1)
ggplot(mtcars, aes(mpg, fill = fam)) +
# Change the position to fill
geom_histogram(binwidth = 1, position = "fill")
## Warning: Removed 16 rows containing missing values (geom_bar).
ggplot(mtcars, aes(mpg, fill = fam)) +
# Change the position to identity, with transparency 0.4
geom_histogram(binwidth = 1, position = "identity", alpha = 0.4)
Let’s see how the position argument changes geom_bar().
We have three position options:
While we will be using geom_bar() here, note that the function geom_col() is just geom_bar() where both the position and stat arguments are set to “identity”. It is used when we want the heights of the bars to represent the exact values in the data.
# Plot fcyl, filled by fam
ggplot(mtcars, aes(fcyl, fill = fam)) +
# Add a bar layer
geom_bar()
ggplot(mtcars, aes(fcyl, fill = fam)) +
# Set the position to "fill"
geom_bar(position = "fill")
ggplot(mtcars, aes(fcyl, fill = fam)) +
# Change the position to "dodge"
geom_bar(position = "dodge")
You can customize bar plots further by adjusting the dodging so that your bars partially overlap each other. Instead of using position = “dodge”, you’re going to use position_dodge(), like you did with position_jitter() in the the previous exercises. Here, you’ll save this as an object, posn_d, so that you can easily reuse it.
Remember, the reason you want to use position_dodge() (and position_jitter()) is to specify how much dodging (or jittering) you want.
ggplot(mtcars, aes(cyl, fill = fam)) +
# Change position to use the functional form, with width 0.2
geom_bar(position = position_dodge(width = 0.2))
ggplot(mtcars, aes(cyl, fill = fam)) +
# Set the transparency to 0.6
geom_bar(position = position_dodge(width = 0.2), alpha = 0.6)
Bar plots: sequential color palette
In this bar plot, we’ll fill each segment according to an ordinal variable. The best way to do that is with a sequential color palette.
Here’s an example of using a sequential color palette with the mtcars dataset:
ggplot(mtcars, aes(fcyl, fill = fam)) +
geom_bar() +
scale_fill_brewer(palette = "Set1")
Vocab$education <- as.factor(Vocab$education)
Vocab$vocabulary <- as.factor(Vocab$vocabulary)
# Plot education, filled by vocabulary
ggplot(Vocab, aes(education, fill = vocabulary)) +
# Add a bar layer with position "fill"
geom_bar(position = "fill")
# Plot education, filled by vocabulary
ggplot(Vocab, aes(education, fill = vocabulary)) +
# Add a bar layer with position "fill"
geom_bar(position = "fill") +
# Add a brewer fill scale with default palette
scale_fill_brewer()
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Blues is 9
## Returning the palette you asked for with that many colors
# Definition of a set of blue colors
blues <- brewer.pal(9, "Blues") # from the RColorBrewer package
# 1 - Make a color range using colorRampPalette() and the set of blues
blue_range <- colorRampPalette(blues)
# Plot education, filled by vocabulary
ggplot(Vocab, aes(education, fill = vocabulary)) +
# Add a bar layer with position "fill"
geom_bar(position = "fill") +
# Add a brewer fill scale with default palette
scale_fill_manual(values = blue_range(11))
# Print the head of economics
head(economics)
## # A tibble: 6 × 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
# Using economics, plot unemploy vs. date
ggplot(economics, aes(date, unemploy)) +
# Make it a line plot
geom_line()
# Change the y-axis to the proportion of the population that is unemployed
ggplot(economics, aes(date, unemploy/pop)) +
geom_line()
We already saw how the form of your data affects how you can plot it. Let’s explore that further with multiple time series. Here, it’s important that all lines are on the same scale, and if possible, on the same plot.
# Plot the Rainbow Salmon time series
ggplot(fish.species, aes(x = Year, y = Rainbow)) +
geom_line()
# Plot the Pink Salmon time series
ggplot(fish.species, aes(x = Year, y = Pink)) +
geom_line()
# Plot multiple time-series by grouping by species
ggplot(fish.tidy, aes(Year, Capture)) +
geom_line(aes(group = Species))
# Plot multiple time-series by coloring by species
ggplot(fish.tidy, aes(Year, Capture, color = Species)) +
geom_line()
n this chapter, we’ll explore how understanding the structure of your data makes data visualization much easier. Plus, it’s time to make our plots pretty. This is the last step in the data viz process. The Themes layer will enable you to make publication quality plots directly in R.
To change stylistic elements of a plot, call theme() and set plot properties to a new value. For example, the following changes the legend position. p + theme(legend.position = new_value)
Here, the new value can be
prop_unemployed_over_time <- economics %>% mutate(prop_unemployed_over_time= unemploy/pop)
# View the default plot
plt_prop_unemployed_over_time <- ggplot(prop_unemployed_over_time, aes(date, prop_unemployed_over_time)) + geom_line()
# Remove legend entirely
plt_prop_unemployed_over_time +
theme(legend.position = "none")
# Position the legend at the bottom of the plot
plt_prop_unemployed_over_time +
theme(legend.position = "bottom")
# Position the legend inside the plot at (0.6, 0.1)
plt_prop_unemployed_over_time +
theme(legend.position = c(0.6, 0.1))
Many plot elements have multiple properties that can be set. For example, line elements in the plot such as axes and gridlines have a color, a thickness (size), and a line type (solid line, dashed, or dotted). To set the style of a line, you use element_line(). For example, to make the axis lines into red, dashed lines, you would use the following.
Similarly, element_rect() changes rectangles and element_text() changes text. You can remove a plot element using element_blank().
plt_prop_unemployed_over_time +
theme(
# For all rectangles, set the fill color to grey92
rect = element_rect(fill = "grey92"),
# For the legend key, turn off the outline
legend.key = element_rect(color = NA))
plt_prop_unemployed_over_time +
theme(
rect = element_rect(fill = "grey92"),
legend.key = element_rect(color = NA),
# Turn off axis ticks
axis.ticks = element_blank(),
# Turn off the panel grid
panel.grid = element_blank())
plt_prop_unemployed_over_time +
theme(
rect = element_rect(fill = "grey92"),
legend.key = element_rect(color = NA),
axis.ticks = element_blank(),
panel.grid = element_blank(),
# Add major y-axis panel grid lines back
panel.grid.major.y =element_line(
# Set the color to white
color = "white",
# Set the size to 0.5
size = 0.5,
# Set the line type to dotted
linetype = "dotted"))
plt_prop_unemployed_over_time +
theme(
rect = element_rect(fill = "grey92"),
legend.key = element_rect(color = NA),
axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.grid.major.y = element_line(
color = "white",
size = 0.5,
linetype = "dotted"),
# Set the axis text color to grey25
axis.text = element_text(color="grey25"),
# Set the plot title font face to italic and font size to 16
plot.title = element_text(face="italic", size = 16))
Whitespace means all the non-visible margins and spacing in the plot.
To set a single whitespace value, use unit(x, unit), where x is the amount and unit is the unit of measure.
Borders require you to set 4 positions, so use margin(top, right, bottom, left, unit). To remember the margin order, think TRouBLe.
The default unit is “pt” (points), which scales well with text. Other options include “cm”, “in” (inches) and “lines” (of text).
plt_mpg_vs_wt_by_cyl <-ggplot(mtcars, aes(wt, mpg)) + geom_point(aes(color = factor(cyl))) + labs(x = "weight(1000/lbs)", y = "Miles Per Gallon")
plt_mpg_vs_wt_by_cyl
plt_mpg_vs_wt_by_cyl +
theme(
# Set the axis tick length to 2 lines
axis.ticks.length = unit(2, "lines"))
plt_mpg_vs_wt_by_cyl +
theme(
# Set the legend key size to 3 centimeters
legend.key.size = unit(3, "cm"))
plt_mpg_vs_wt_by_cyl +
theme(
# Set the legend margin to (20, 30, 40, 50) points
legend.margin=margin(20, 30, 40, 50, "pt"))
plt_mpg_vs_wt_by_cyl +
theme(
# Set the plot margin to (10, 30, 50, 70) millimeters
plot.margin = margin(10, 30, 50, 70, "mm"))
In addition to making your own themes, there are several out-of-the-box solutions that may save you lots of time.
library(ggthemes)
# Add a black and white theme
plt_prop_unemployed_over_time + theme_bw()
# Add a classic theme
plt_prop_unemployed_over_time + theme_classic()
# Add a void theme
plt_prop_unemployed_over_time + theme_void()
# Use the fivethirtyeight theme
plt_prop_unemployed_over_time + theme_fivethirtyeight()
# Use Tufte's theme
plt_prop_unemployed_over_time + theme_tufte()
# Use the Wall Street Journal theme
plt_prop_unemployed_over_time +
theme_wsj()
Outside of ggplot2, another source of built-in themes is the ggthemes package. The workspace already contains the plt_prop_unemployed_over_time, the line plot from before. Let’s explore some of the ready-made ggthemes themes.
# Use the fivethirtyeight theme
plt_prop_unemployed_over_time +
theme_fivethirtyeight()
# Use Tufte's theme
plt_prop_unemployed_over_time +
theme_tufte()
# Use the Wall Street Journal theme
plt_prop_unemployed_over_time +
theme_wsj()
Reusing a theme across many plots helps to provide a consistent style. You have several options for this.
# Theme layer saved as an object, theme_recession
theme_recession <- theme(
rect = element_rect(fill = "grey92"),
legend.key = element_rect(color = NA),
axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "white", size = 0.5, linetype = "dotted"),
axis.text = element_text(color = "grey25"),
plot.title = element_text(face = "italic", size = 16),
legend.position = c(0.6, 0.1))
# Combine the Tufte theme with theme_recession
theme_tufte_recession <- theme_tufte() + theme_recession
# Add the recession theme to the plot
plt_prop_unemployed_over_time + theme_tufte_recession
theme_recession <- theme(
rect = element_rect(fill = "grey92"),
legend.key = element_rect(color = NA),
axis.ticks = element_blank(),
panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "white", size = 0.5, linetype = "dotted"),
axis.text = element_text(color = "grey25"),
plot.title = element_text(face = "italic", size = 16),
legend.position = c(0.6, 0.1))
theme_tufte_recession <- theme_tufte() + theme_recession
# Set theme_tufte_recession as the default theme
theme_set(theme_tufte_recession)
# Draw the plot (without explicitly adding a theme)
plt_prop_unemployed_over_time
We’ve seen many examples of beautiful, publication-quality plots. Let’s take a final look and put all the pieces together.
# Add Tufte's theme
plt_prop_unemployed_over_time + theme_tufte()
plt_prop_unemployed_over_time + theme_tufte() +
# Add individual theme elements
theme(
# Turn off the legend
legend.position = "none",
# Turn off the axis ticks
axis.ticks = element_blank())
plt_prop_unemployed_over_time + theme_tufte() +
theme(
legend.position = "none",
axis.ticks = element_blank(),
# Set the axis title's text color to grey60
axis.title= element_text(color = "grey60"),
# Set the axis text's text color to grey60
axis.text = element_text(color = "grey60"))
plt_prop_unemployed_over_time + theme_tufte() +
theme(
legend.position = "none",
axis.ticks = element_blank(),
axis.title = element_text(color = "grey60"),
axis.text = element_text(color = "grey60"),
# Set the panel gridlines major y values
panel.grid.major.y = element_line(
# Set the color to grey60
color = "grey60",
# Set the size to 0.25
size = 0.25,
# Set the linetype to dotted
linetype = "dotted"))
Let’s focus on producing beautiful and effective explanatory plots. In the next couple of exercises, you’ll create a plot that is similar to the one shown in the video using gm2007, a filtered subset of the gapminder dataset.
This type of plot will be in an info-viz style, meaning that it would be similar to something you’d see in a magazine or website for a mostly lay audience.
# Add a geom_segment() layer
ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) + scale_x_continuous(position = "top")
# Add a geom_text() layer
ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) +
geom_text(aes(label = lifeExp), color = "white", size = 1.5) +scale_x_continuous(position = "top")
# Set the color scale
palette <- brewer.pal(5, "RdYlBu")[-(2:4)]
# Modify the scales
plt_country_vs_lifeExp <- ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) +
geom_text(aes(label = round(lifeExp,1)), color = "white", size = 1.5) +
scale_x_continuous("", expand = c(0, 0), limits = c(30, 90), position = "top") +
scale_color_gradientn(colors = palette)
plt_country_vs_lifeExp
# Add a title and caption
ggplot(gm2007, aes(x = lifeExp, y = country, color = lifeExp)) +
geom_point(size = 4) +
geom_segment(aes(xend = 30, yend = country), size = 2) +
geom_text(aes(label = round(lifeExp,1)), color = "white", size = 1.5) +
scale_x_continuous("", expand = c(0,0), limits = c(30,90), position = "top") +
scale_color_gradientn(colors = palette) +
labs(title = "Highest and lowest life expectancies, 2007", caption = "Source: gapminder")
In the previous exercise, we completed our basic plot. Now let’s polish it by playing with the theme and adding annotations. In this exercise, you’ll use annotate() to add text and a curve to the plot.
global_mean <- mean(gm2007$lifeExp)
x_start <- global_mean + 4
y_start <- 5.5
x_end <- global_mean
y_end <- 7.5
step_1_themes <- theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_blank(),
legend.position = "none")
step_3_annotation <- annotate(
"text",
x = x_start, y = y_start,
label = "The\nglobal\naverage",
vjust = 1, size = 3, color = "grey40")
# Define the theme
plt_country_vs_lifeExp +
step_1_themes +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_blank(),
legend.position = "none") +
scale_x_continuous(position = "top")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# Add a vertical line
plt_country_vs_lifeExp +
step_1_themes +
geom_vline(xintercept = global_mean, color = "grey40", linetype = 3) +
scale_x_continuous(position = "top")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# Add text
plt_country_vs_lifeExp +
step_1_themes +
geom_vline(xintercept = global_mean, color = "grey40", linetype = 3) +
annotate(
"text",
x = x_start, y = y_start,
label = "The\nglobal\naverage",
vjust = 1, size = 3, color = "grey40"
) + scale_x_continuous(position = "top")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
# Add a curve
plt_country_vs_lifeExp +
step_1_themes +
geom_vline(xintercept = global_mean, color = "grey40", linetype = 3) +
step_3_annotation +
annotate(
"curve",
x = x_start, y = y_start,
xend = x_end, yend = y_end,
arrow = arrow(length = unit(0.2, "cm"), type = "closed"),
color = "grey40"
) + scale_x_continuous(position = "top")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
- My Favorite Team -