1 Packages and data

# install the ggplot2 package by running install.packages()
install.packages("ggplot2", repos = "http://cran.us.r-project.org")

# install the ggridges package by running install.packages()
install.packages("ggridges", repos = "http://cran.us.r-project.org")

# install the fishualize package by running install.packages()
install.packages("fishualize", repos = "http://cran.us.r-project.org")

# install the patchwork package by running install.packages()
install.packages("patchwork", repos = "http://cran.us.r-project.org")
# load the tidyverse package in your current R session
library(tidyverse)

# load the ggplot2 package in your current R session
library(ggplot2)

# load the ggridges package in your current R session
library(ggridges)

# load the fishualize package in your current R session
library(fishualize)

# load the patchwork package in your current R session
library(patchwork)

Read in datasets to use with ggplot exploration

herbivore.traits <- read.csv(file = "data/coralreefherbivores.csv") 
reef.fish <- read.csv(file = "data/reef_fishes.csv")

2 Basics with ggplot

2.1 Scatterplot

Give ggplot tidy data and specify aesthetics

# take a look at the herbivore.traits dataset
head(herbivore.traits)
##         family      genus        species                   gen.spe       sl
## 1 Acanthuridae Acanthurus       achilles       Acanthurus.achilles 163.6667
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 212.7300
## 3 Acanthuridae Acanthurus   auranticavus   Acanthurus.auranticavus 216.0000
## 4 Acanthuridae Acanthurus        blochii        Acanthurus.blochii  82.9000
## 5 Acanthuridae Acanthurus     dussumieri     Acanthurus.dussumieri 193.7033
## 6 Acanthuridae Acanthurus        fowleri        Acanthurus.fowleri 266.0000
##   bodydepth snoutlength eyediameter size    schooling
## 1 0.5543625   0.4877797   0.3507191    S     Solitary
## 2 0.4405350   0.4402623   0.2560593    M  SmallGroups
## 3 0.4726556   0.5386490   0.2451253    M MediumGroups
## 4 0.5586486   0.4782217   0.3196155    M  SmallGroups
## 5 0.5457248   0.5661867   0.2807218    L     Solitary
## 6 0.4669521   0.5950563   0.2217376    M     Solitary
# first, ggplot needs to be given a dataset and aesthetics for the axes
# use the herbivore.traits dataset 
# specify bodydepth for the x-axis and eyediameter for the y-xis
p1 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter))
p1

Add points on the graph

# specify geom_point to create a scatterplot
p1 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter)) +
  geom_point()
p1

Make basic changes

# size changes size of the point
# shape changes the shape of the point
# color changes the color of the point
p2 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter)) +
  geom_point(size = 5, shape = 18, color = "blue")
p2

2.2 Map groups onto geoms

Display groups with colors, shapes, or sizes

# use aes mapping function to map groups onto geoms
# assign point color by family
# remove color argument from p2 to avoid an override to point color by family
p3 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family)) + 
  geom_point(size = 5, shape = 18)
p3

Multiple group aesthetics

# assign point color by family and point shape by size
# remove shape argument from p3 to avoid override to point shape by size
p4 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size)) +
  geom_point(size = 2)
p4

Continuous group aesthetics

# add a continuous variable to aesthetics using color or size (but not shape)
# assign point size by snout length
# plot becomes very busy! 
# including more information in your plot is not always better
p5 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size, size = snoutlength)) +
  geom_point()
p5

2.3 Facets

Create multiple plots with facet

# create separate facets for each family with "facet_wrap(. ~ family)" 
# alternative: use facet_grid to specify whether to stack as rows or columns
# much better for data visualization than p5
p6 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size)) +
  geom_point() +
  facet_wrap(. ~ family) 
p6


3 Plot distributions

3.1 Boxplots

The faceted plot p6 suggests that there is a difference in eye-diameter among three families: Acanthuridae, Labridae, and Siganidae.



Summarizing data

# use geom_point to summarize data across families
# specify a factor (family) on the x-axis, instead of a continuous variable
# single point for Kyphosidae is not helpful, and points are overlapping
p7 <- ggplot(data = herbivore.traits, aes(x = family, y = eyediameter, color = family)) +
  geom_point()
p7

De-clutter points

# remove Kyphosidae with filter
herbivore.traits.filtered <- herbivore.traits %>%
  filter(family != "Kyphosidae")

# separate points with geom_jitter, specifying random horizontal offset of up to 0.1 units
p8 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, color = family)) +
  geom_jitter(width = 0.1)
p8

Summary data with geom_boxplot

# summarize distribution with geom_boxplot
# shows the median, quartiles, and ranges of the data, as well as outliers (as dots)
p9 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, color = family)) +
  geom_boxplot()
p9

Fill the boxes with color

# specify fill instead of color to fill the boxes with color
# specify notch = TRUE to create a notch at the median
p10 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_boxplot(notch = TRUE)
p10

3.2 Violin plots

Provide more holistic view of data with geom_violin

# specify geom_violin
# add draw quantiles to indicate quantile ranges
p11 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_violin(draw_quantiles = c(0.05, 0.5, 0.95))
p11

Further investigate distribution with a sina plot

# add raw, jittered data points
p12 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) + 
  geom_jitter(width = 0.1)
p12

3.3 Ridgeline density curves

Use package ggridges to create density curves

# include distribution on x-axis and categories on the y-axis
# use geom_density_ridges
# set alpha to 0.5 to give transparency to curves
p13 <- ggplot(data = herbivore.traits.filtered, aes(x = eyediameter, y = family, fill = family)) +
  geom_density_ridges(alpha = 0.50) + 
  geom_jitter(height = 0.1)
p13
## Picking joint bandwidth of 0.0174

Other ways to plot data distributions include histograms, halfeyes, or kernel densities


4 Plot means and uncertainty

4.1 Barplots

Use barplots to visualize means and uncertainty

# use tidyverse for data wrangling 
# summarize to compute means (mean.eye) and standard deviations (sd.eye), and sample size for each group (n)
# mutate to calculate standard error (se.eye) and 95% confidence intervals (lower.ci, upper.ci)
herbivore.trait.means <- herbivore.traits.filtered %>%
  group_by(family) %>%
  summarize(mean.eye = mean(eyediameter),
            sd.eye = sd(eyediameter), 
            n = n()) %>% 
  mutate(se.eye = sqrt(sd.eye)/n, 
         lower.ci = mean.eye - qt(1 - (0.05 / 2), n - 1) * se.eye,
         upper.ci = mean.eye + qt(1 - (0.05 / 2), n - 1) * se.eye)
herbivore.trait.means
## # A tibble: 3 × 7
##   family       mean.eye sd.eye     n  se.eye lower.ci upper.ci
##   <chr>           <dbl>  <dbl> <int>   <dbl>    <dbl>    <dbl>
## 1 Acanthuridae    0.292 0.0437    45 0.00465    0.283    0.302
## 2 Labridae        0.195 0.0298    33 0.00523    0.185    0.206
## 3 Siganidae       0.329 0.0536    17 0.0136     0.300    0.358
# plot means
p14 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, fill = family)) +
  geom_bar(stat = "identity")  
p14

# add 95% confidence intervals to bar plot with geom_errorbar
p15 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, fill = family)) +
  geom_bar(stat = "identity") +  
  geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci))
p15

4.2 Caterpillar plots

Use catepillar plots to visualize means and uncertainty

# use geom_pointrange to create catepillar plots
p16 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, color = family)) +
  geom_pointrange(aes(ymin = lower.ci, ymax = upper.ci))
p16


5 Stitch plots together

Use package patchwork to compare data visualizations

# simply add together plots
# specify the number of columns with plot_layout(ncol = x)
all.ps <- p8 + p9 + p12 + p13 + p14 + p16 + plot_layout(ncol = 2)
all.ps
## Picking joint bandwidth of 0.0174


6 Colors and shapes

6.1 Changing colors and shapes

Specify colors by name

# use scale_fill_manual to select your own colors using values you provide
# set alpha (opacity) of point color to 0.5
p17 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) + 
  geom_jitter(width = 0.1, color = "black", alpha = 0.5) +
  scale_fill_manual(values = c("steelblue", "yellowgreen", "darkorchid"))
p17

Specify point shape

# choose shape that can accommodate color
# set alpha (opacity) of violin color to 0.5
# darken alpha of point color to 0.8 
# specify black point outline with width of 0.1
p18 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_violin(draw_quantiles = c(0.05, 0.5, 0.95), alpha = 0.5) + 
  geom_jitter(shape = 23, width = 0.1, color = "black", alpha = 0.8) +
  scale_fill_manual(values = c("steelblue", "yellowgreen", "darkorchid"))
p18

6.2 Color palettes

Thousands of R color palettes are available

The R package fishualize provides discrete and continuous color palettes based on fish colors: https://nschiett.github.io/fishualize/articles/overview_colors.html



Discrete color palette

# scale_fill_fish_d allows you to add a discrete color palette
# specify the color option as a fish species
p19 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
  geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) + 
  geom_jitter(width = 0.1, color = "black", alpha = 0.5) +
  scale_fill_fish_d(option = "Centropyge_loricula")
p19

Continuous color palette

# scale_color_fish allow you to add a continuous color palette
# again, specify the color option as a fish species
p20 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = snoutlength)) +
  geom_point() +
  scale_color_fish(option = "Centropyge_loricula")
p20

6.3 Change color style

Use theme wrapper to modify the color of the background

Some options that you can try

  • theme_bw() - dark on light background, good for presentations
  • theme_classic() - includes x and y axis lines, but no gridlines
  • theme_minimal() - no background annotations
# use theme_classic to get rid of gray background color and gridlines
p21 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = snoutlength)) +
  geom_point() +
  scale_color_fish(option = "Centropyge_loricula") +
  theme_classic()
p21


7 Scaling and labeling

7.1 Axis scales

The reef.fish dataset includes a list of all described reef fish species, along with their year of description, some information about their depth range and size, and whether they’re considered a cryptobenthic or mobile reef fish.



Explore the dataset

# take a look at the reef.fish dataset
head(reef.fish)
##         family                  genspe        genus      species year
## 1 Acanthuridae      Acanthurus.tristis   Acanthurus      tristis 1993
## 2 Acanthuridae      Acanthurus.blochii   Acanthurus      blochii 1835
## 3 Acanthuridae Acanthurus.xanthopterus   Acanthurus xanthopterus 1835
## 4 Acanthuridae    Acanthurus.chirurgus   Acanthurus    chirurgus 1787
## 5 Acanthuridae  Ctenochaetus.truncatus Ctenochaetus    truncatus 2001
## 6 Acanthuridae   Acanthurus.dussumieri   Acanthurus   dussumieri 1835
##           habitat depth length  category
## 1 reef-associated    30     25 No_crypto
## 2 reef-associated    15     45 No_crypto
## 3 reef-associated   100     70 No_crypto
## 4 reef-associated    25     39 No_crypto
## 5 reef-associated    21     16 No_crypto
## 6 reef-associated   131     54 No_crypto
# plot the relationship between year of description and size of the species
# some rows are removed due to NA values in dataset
p22 <- ggplot(reef.fish, aes(x = length, y = year)) +
                geom_point()
p22
## Warning: Removed 561 rows containing missing values (`geom_point()`).

Non-normal axis scale

# get rid of NAs from dataset
reef.fish2 <- reef.fish %>%
  drop_na(length)

# fish length is not normally distributed, which can be fixed with a log transformation
# use log10 scale in ggplot with scale_x_log10
p23 <- ggplot(reef.fish2, aes(x = length, y = year)) +
  geom_point() +
  scale_x_log10()
p23

Axis breaks and limits

# specify limits and breaks on the y-axis
# add annotation_logticks to see tickmarks on the log10 scale
# specify sides = "b" to only include tickmarks on the bottom axis
p24 <- ggplot(reef.fish2, aes(x = length, y = year)) +
  geom_point() +
  scale_x_log10() +
  scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) + 
  annotation_logticks(sides = "b")
p24

7.2 Labeling

Label axes

# fix the axis labels with xlab for x-axis and ylab for y-axis
p25 <- ggplot(reef.fish2, aes(x = length, y = year)) +
  geom_point() +
  scale_x_log10() +
  scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) + 
  annotation_logticks(sides = "b") + 
  xlab("Body length (cm)") +
  ylab("Year of description")
p25

Add legend

# add color based on category (cryptobenthic or mobile)
# use fishualize to add discrete colors with scale_color_fish_d
p26 <- ggplot(reef.fish2, aes(x = length, y = year, color = category)) +
  geom_point(alpha = 0.5) +
  scale_x_log10() +
  scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) + 
  annotation_logticks(sides = "b") + 
  xlab("Body length (cm)") +
  ylab("Year of description") +
  scale_color_fish_d(option = "Trimma_lantana")
p26

Relabel legend

# relabel legend to make it more informative
# legend is tied to color, so re-specify categories within the color function
p27 <- ggplot(reef.fish2, aes(x = length, y = year, color = category)) + 
  geom_point(alpha = 0.5) + 
  scale_x_log10() +
  scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) + 
  annotation_logticks(sides = "b") + 
  xlab("Body length (cm)") +
  ylab("Year of description") +
  scale_color_fish_d(option = "Trimma_lantana", name = "Fish category", labels = c("Cryptobenthic", "Mobile"))
p27