# install the ggplot2 package by running install.packages()
install.packages("ggplot2", repos = "http://cran.us.r-project.org")
# install the ggridges package by running install.packages()
install.packages("ggridges", repos = "http://cran.us.r-project.org")
# install the fishualize package by running install.packages()
install.packages("fishualize", repos = "http://cran.us.r-project.org")
# install the patchwork package by running install.packages()
install.packages("patchwork", repos = "http://cran.us.r-project.org")
# load the tidyverse package in your current R session
library(tidyverse)
# load the ggplot2 package in your current R session
library(ggplot2)
# load the ggridges package in your current R session
library(ggridges)
# load the fishualize package in your current R session
library(fishualize)
# load the patchwork package in your current R session
library(patchwork)
Read in datasets to use with ggplot exploration
herbivore.traits <- read.csv(file = "data/coralreefherbivores.csv")
reef.fish <- read.csv(file = "data/reef_fishes.csv")
Give ggplot tidy data and specify aesthetics
# take a look at the herbivore.traits dataset
head(herbivore.traits)
## family genus species gen.spe sl
## 1 Acanthuridae Acanthurus achilles Acanthurus.achilles 163.6667
## 2 Acanthuridae Acanthurus albipectoralis Acanthurus.albipectoralis 212.7300
## 3 Acanthuridae Acanthurus auranticavus Acanthurus.auranticavus 216.0000
## 4 Acanthuridae Acanthurus blochii Acanthurus.blochii 82.9000
## 5 Acanthuridae Acanthurus dussumieri Acanthurus.dussumieri 193.7033
## 6 Acanthuridae Acanthurus fowleri Acanthurus.fowleri 266.0000
## bodydepth snoutlength eyediameter size schooling
## 1 0.5543625 0.4877797 0.3507191 S Solitary
## 2 0.4405350 0.4402623 0.2560593 M SmallGroups
## 3 0.4726556 0.5386490 0.2451253 M MediumGroups
## 4 0.5586486 0.4782217 0.3196155 M SmallGroups
## 5 0.5457248 0.5661867 0.2807218 L Solitary
## 6 0.4669521 0.5950563 0.2217376 M Solitary
# first, ggplot needs to be given a dataset and aesthetics for the axes
# use the herbivore.traits dataset
# specify bodydepth for the x-axis and eyediameter for the y-xis
p1 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter))
p1
Add points on the graph
# specify geom_point to create a scatterplot
p1 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter)) +
geom_point()
p1
Make basic changes
# size changes size of the point
# shape changes the shape of the point
# color changes the color of the point
p2 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter)) +
geom_point(size = 5, shape = 18, color = "blue")
p2
Display groups with colors, shapes, or sizes
# use aes mapping function to map groups onto geoms
# assign point color by family
# remove color argument from p2 to avoid an override to point color by family
p3 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family)) +
geom_point(size = 5, shape = 18)
p3
Multiple group aesthetics
# assign point color by family and point shape by size
# remove shape argument from p3 to avoid override to point shape by size
p4 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size)) +
geom_point(size = 2)
p4
Continuous group aesthetics
# add a continuous variable to aesthetics using color or size (but not shape)
# assign point size by snout length
# plot becomes very busy!
# including more information in your plot is not always better
p5 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size, size = snoutlength)) +
geom_point()
p5
Create multiple plots with facet
# create separate facets for each family with "facet_wrap(. ~ family)"
# alternative: use facet_grid to specify whether to stack as rows or columns
# much better for data visualization than p5
p6 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = family, shape = size)) +
geom_point() +
facet_wrap(. ~ family)
p6
The faceted plot p6 suggests that there is a difference in eye-diameter among three families: Acanthuridae, Labridae, and Siganidae.
Summarizing data
# use geom_point to summarize data across families
# specify a factor (family) on the x-axis, instead of a continuous variable
# single point for Kyphosidae is not helpful, and points are overlapping
p7 <- ggplot(data = herbivore.traits, aes(x = family, y = eyediameter, color = family)) +
geom_point()
p7
De-clutter points
# remove Kyphosidae with filter
herbivore.traits.filtered <- herbivore.traits %>%
filter(family != "Kyphosidae")
# separate points with geom_jitter, specifying random horizontal offset of up to 0.1 units
p8 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, color = family)) +
geom_jitter(width = 0.1)
p8
Summary data with geom_boxplot
# summarize distribution with geom_boxplot
# shows the median, quartiles, and ranges of the data, as well as outliers (as dots)
p9 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, color = family)) +
geom_boxplot()
p9
Fill the boxes with color
# specify fill instead of color to fill the boxes with color
# specify notch = TRUE to create a notch at the median
p10 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_boxplot(notch = TRUE)
p10
Provide more holistic view of data with geom_violin
# specify geom_violin
# add draw quantiles to indicate quantile ranges
p11 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_violin(draw_quantiles = c(0.05, 0.5, 0.95))
p11
Further investigate distribution with a sina plot
# add raw, jittered data points
p12 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) +
geom_jitter(width = 0.1)
p12
Use package ggridges to create density curves
# include distribution on x-axis and categories on the y-axis
# use geom_density_ridges
# set alpha to 0.5 to give transparency to curves
p13 <- ggplot(data = herbivore.traits.filtered, aes(x = eyediameter, y = family, fill = family)) +
geom_density_ridges(alpha = 0.50) +
geom_jitter(height = 0.1)
p13
## Picking joint bandwidth of 0.0174
Other ways to plot data distributions include histograms, halfeyes, or kernel densities
Use barplots to visualize means and uncertainty
# use tidyverse for data wrangling
# summarize to compute means (mean.eye) and standard deviations (sd.eye), and sample size for each group (n)
# mutate to calculate standard error (se.eye) and 95% confidence intervals (lower.ci, upper.ci)
herbivore.trait.means <- herbivore.traits.filtered %>%
group_by(family) %>%
summarize(mean.eye = mean(eyediameter),
sd.eye = sd(eyediameter),
n = n()) %>%
mutate(se.eye = sqrt(sd.eye)/n,
lower.ci = mean.eye - qt(1 - (0.05 / 2), n - 1) * se.eye,
upper.ci = mean.eye + qt(1 - (0.05 / 2), n - 1) * se.eye)
herbivore.trait.means
## # A tibble: 3 × 7
## family mean.eye sd.eye n se.eye lower.ci upper.ci
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Acanthuridae 0.292 0.0437 45 0.00465 0.283 0.302
## 2 Labridae 0.195 0.0298 33 0.00523 0.185 0.206
## 3 Siganidae 0.329 0.0536 17 0.0136 0.300 0.358
# plot means
p14 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, fill = family)) +
geom_bar(stat = "identity")
p14
# add 95% confidence intervals to bar plot with geom_errorbar
p15 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, fill = family)) +
geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci))
p15
Use catepillar plots to visualize means and uncertainty
# use geom_pointrange to create catepillar plots
p16 <- ggplot(data = herbivore.trait.means, aes(x = family, y = mean.eye, color = family)) +
geom_pointrange(aes(ymin = lower.ci, ymax = upper.ci))
p16
Use package patchwork to compare data visualizations
# simply add together plots
# specify the number of columns with plot_layout(ncol = x)
all.ps <- p8 + p9 + p12 + p13 + p14 + p16 + plot_layout(ncol = 2)
all.ps
## Picking joint bandwidth of 0.0174
Specify colors by name
# use scale_fill_manual to select your own colors using values you provide
# set alpha (opacity) of point color to 0.5
p17 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) +
geom_jitter(width = 0.1, color = "black", alpha = 0.5) +
scale_fill_manual(values = c("steelblue", "yellowgreen", "darkorchid"))
p17
Specify point shape
# choose shape that can accommodate color
# set alpha (opacity) of violin color to 0.5
# darken alpha of point color to 0.8
# specify black point outline with width of 0.1
p18 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_violin(draw_quantiles = c(0.05, 0.5, 0.95), alpha = 0.5) +
geom_jitter(shape = 23, width = 0.1, color = "black", alpha = 0.8) +
scale_fill_manual(values = c("steelblue", "yellowgreen", "darkorchid"))
p18
Thousands of R color palettes are available
The R package fishualize provides discrete and continuous color palettes based on fish colors: https://nschiett.github.io/fishualize/articles/overview_colors.html
Discrete color palette
# scale_fill_fish_d allows you to add a discrete color palette
# specify the color option as a fish species
p19 <- ggplot(data = herbivore.traits.filtered, aes(x = family, y = eyediameter, fill = family)) +
geom_violin(draw_quantiles = c(0.05, 0.5, 0.95)) +
geom_jitter(width = 0.1, color = "black", alpha = 0.5) +
scale_fill_fish_d(option = "Centropyge_loricula")
p19
Continuous color palette
# scale_color_fish allow you to add a continuous color palette
# again, specify the color option as a fish species
p20 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = snoutlength)) +
geom_point() +
scale_color_fish(option = "Centropyge_loricula")
p20
Use theme wrapper to modify the color of the background
Some options that you can try
# use theme_classic to get rid of gray background color and gridlines
p21 <- ggplot(data = herbivore.traits, aes(x = bodydepth, y = eyediameter, color = snoutlength)) +
geom_point() +
scale_color_fish(option = "Centropyge_loricula") +
theme_classic()
p21
The reef.fish dataset includes a list of all described reef fish species, along with their year of description, some information about their depth range and size, and whether they’re considered a cryptobenthic or mobile reef fish.
Explore the dataset
# take a look at the reef.fish dataset
head(reef.fish)
## family genspe genus species year
## 1 Acanthuridae Acanthurus.tristis Acanthurus tristis 1993
## 2 Acanthuridae Acanthurus.blochii Acanthurus blochii 1835
## 3 Acanthuridae Acanthurus.xanthopterus Acanthurus xanthopterus 1835
## 4 Acanthuridae Acanthurus.chirurgus Acanthurus chirurgus 1787
## 5 Acanthuridae Ctenochaetus.truncatus Ctenochaetus truncatus 2001
## 6 Acanthuridae Acanthurus.dussumieri Acanthurus dussumieri 1835
## habitat depth length category
## 1 reef-associated 30 25 No_crypto
## 2 reef-associated 15 45 No_crypto
## 3 reef-associated 100 70 No_crypto
## 4 reef-associated 25 39 No_crypto
## 5 reef-associated 21 16 No_crypto
## 6 reef-associated 131 54 No_crypto
# plot the relationship between year of description and size of the species
# some rows are removed due to NA values in dataset
p22 <- ggplot(reef.fish, aes(x = length, y = year)) +
geom_point()
p22
## Warning: Removed 561 rows containing missing values (`geom_point()`).
Non-normal axis scale
# get rid of NAs from dataset
reef.fish2 <- reef.fish %>%
drop_na(length)
# fish length is not normally distributed, which can be fixed with a log transformation
# use log10 scale in ggplot with scale_x_log10
p23 <- ggplot(reef.fish2, aes(x = length, y = year)) +
geom_point() +
scale_x_log10()
p23
Axis breaks and limits
# specify limits and breaks on the y-axis
# add annotation_logticks to see tickmarks on the log10 scale
# specify sides = "b" to only include tickmarks on the bottom axis
p24 <- ggplot(reef.fish2, aes(x = length, y = year)) +
geom_point() +
scale_x_log10() +
scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) +
annotation_logticks(sides = "b")
p24
Label axes
# fix the axis labels with xlab for x-axis and ylab for y-axis
p25 <- ggplot(reef.fish2, aes(x = length, y = year)) +
geom_point() +
scale_x_log10() +
scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) +
annotation_logticks(sides = "b") +
xlab("Body length (cm)") +
ylab("Year of description")
p25
Add legend
# add color based on category (cryptobenthic or mobile)
# use fishualize to add discrete colors with scale_color_fish_d
p26 <- ggplot(reef.fish2, aes(x = length, y = year, color = category)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) +
annotation_logticks(sides = "b") +
xlab("Body length (cm)") +
ylab("Year of description") +
scale_color_fish_d(option = "Trimma_lantana")
p26
Relabel legend
# relabel legend to make it more informative
# legend is tied to color, so re-specify categories within the color function
p27 <- ggplot(reef.fish2, aes(x = length, y = year, color = category)) +
geom_point(alpha = 0.5) +
scale_x_log10() +
scale_y_continuous(limits = c(1755, 2015), breaks = seq(1755, 2015, 20)) +
annotation_logticks(sides = "b") +
xlab("Body length (cm)") +
ylab("Year of description") +
scale_color_fish_d(option = "Trimma_lantana", name = "Fish category", labels = c("Cryptobenthic", "Mobile"))
p27