Plot Improvements - demo

From Ewen Harrison - Riinu Pius - The University of Edinburgh

{https://media.ed.ac.uk/media/HealthyR+demoA+Plot+Improvements/1_obw1zgc0}

Day 06 of HealthyR demo

Plot Improvements

Take A Sad Plot and Make it Better

Load libraries

library(tidyverse)
library(scales)
library(palmerpenguins)
library(ggbeeswarm)
library(colorspace)

Dataset

Meet the penguins

The ‘Palmerpenguins’ contain size measurements of three species of penguins observed on three islands in the Palmer Archipelago, Antarctica.

This data was collected between 2007 and 2009 by Dr. Kristen Gorman with the Palmer Station Long-Term Ecological Research Program, part of the US Long-Term Ecological Research Network. The data was imported directly from the Portal from the Environmental Data Initiative (EDI) and are available for use under a CC0 (“No Rights Reserved”) license in accordance with the Palmer Station Data Policy.


Conoce a los pingüinos

Los palmerpenguins contienen medidas de tamaño de tres especies de pingüinos observadas en tres islas del archipiélago Palmer, en la Antártida.

Estos datos fueron recolectados entre 2007 y 2009 por la Dra. Kristen Gorman con el Programa de Investigación Ecológica a Largo Plazo de la Estación Palmer , parte de la Red de Investigación Ecológica a Largo Plazo de EE.UU. Los datos se importaron directamente del Portal de datos de la Iniciativa de datos ambientales (EDI) y están disponibles para su uso con licencia CC0 (“Sin derechos reservados”) de acuerdo con la Política de datos de Palmer Station .

finalfit::missing_glimpse(penguins)
##                               label var_type   n missing_n missing_percent
## species                     species    <fct> 344         0             0.0
## island                       island    <fct> 344         0             0.0
## bill_length_mm       bill_length_mm    <dbl> 342         2             0.6
## bill_depth_mm         bill_depth_mm    <dbl> 342         2             0.6
## flipper_length_mm flipper_length_mm    <int> 342         2             0.6
## body_mass_g             body_mass_g    <int> 342         2             0.6
## sex                             sex    <fct> 333        11             3.2
## year                           year    <int> 344         0             0.0
  penguins = penguins %>% 
              drop_na(species, sex, body_mass_g)

Sad Plot

penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot() +
  facet_grid(.~island)

Sad Plot + raw data

penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(alpha = 0.2) +
  facet_grid(.~island)

# Checkout geom_boxplot2
## https://stackoverflow.com/questions/5677885/ignore-outliers-in-ggplot2-boxplot

Sad Plot + raw data + remove grey background

penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(alpha = 0.2) +
  facet_grid(.~island) + 
  theme_bw()

Sad Plot + raw data + remove grey background + axis labels

library(ggtext)
penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(alpha = 0.2) +
  labs(x = "Species", y = "Body mass (g^(2))") +
  facet_grid(. ~ island) +
  theme_bw() +
  theme(axis.title.y = element_markdown())

Sad Plot + raw data + remove grey background + axis labels + remove empty factor levels

forcats::fct_drop
## function (f, only = NULL) 
## {
##     f <- check_factor(f)
##     check_character(only, allow_null = TRUE)
##     levels <- levels(f)
##     count <- table(f)
##     to_drop <- levels[count == 0]
##     if (!is.null(only)) {
##         to_drop <- intersect(to_drop, only)
##     }
##     refactor(f, new_levels = setdiff(levels, to_drop))
## }
## <bytecode: 0x000001d938639f00>
## <environment: namespace:forcats>
penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(alpha = 0.2) +
  labs(x = "Species", y = "Body mass (g^(2))") +
  facet_grid(. ~ island, scales = "free_x") +
  theme_bw() +
  theme(axis.title.y = element_markdown())

Sad plot + change whisker length

penguins %>% 
  ggplot(aes(x = species, y = body_mass_g)) + 
  geom_boxplot(coef = 2) +
  facet_grid(.~island)

Hints

  • Add the raw data - colour per species - change colour palatte
  • Remove the outliers
  • Remove the grey background
  • Fix the axes lables - remove legend
  • Add the mean
penguins %>% 
  ggplot(aes(x = species, y = body_mass_g))  +
  geom_boxplot(fill = NA, outlier.shape = NA, alpha = 0.2) +                                  # remove outliers
  facet_grid(.~island, scales = "free_x", space = "free_x") +                                 # free x axis          
  geom_jitter(aes(colour = species), alpha = 0.5, width = 0.1, size = 3) +                    # jitter 
  labs(y = "Body Mass (g)", x = "Penguin Species", caption = "Source: Palmerpenguins") +      # Axis captions
  stat_summary(fun = mean, 
               geom = "point", shape = 20, size = 5, color = "black", fill = "black") +       # Add mean
  scale_colour_viridis_d() +                                                                  # Change colour scheme 
  theme_bw() +                                                                                # Remove grey background  
  theme(legend.position = "none")                                                             # Remove legend - note: setting the theme should be done after removing background theme

My version with the help of Cedric Scherer

# Scatterplot example 1: penguin flipper length versus body mass
ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g)) +
  geom_point(aes(color = species, 
                 shape = species),
             size = 2) +
  theme_minimal()+
  scale_color_manual(values = c("darkorange","darkorchid","cyan4"))

# Scatterplot example 2: penguin bill length versus bill depth
ggplot(data = penguins, aes(x = bill_length_mm, y = bill_depth_mm)) +
  geom_point(aes(color = species, 
                 shape = species),
             size = 2)  +
  theme_minimal()+
  scale_color_manual(values = c("darkorange","darkorchid","cyan4"))

#Traigo mis imagenes 
url <- "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/man/figures/lter_penguins.png"
img <- magick::image_read((url))
pic <- grid::rasterGrob(img, interpolate = TRUE)

pal <- c("#FF8C00", "#A034F0", "#159090")

add_sample <- function(x){
  return(c(y = max(x) + .025, 
           label = length(x)))
}

penguins %>% 
  group_by(species) %>% 
  mutate(bill_ratio = bill_length_mm / bill_depth_mm) %>% 
  filter(!is.na(bill_ratio)) %>% 
  ggplot(aes(x = fct_rev(species), y = bill_ratio)) + 
  ggdist::stat_halfeye(
    aes(color = species,
        fill = after_scale(lighten(color, .5))),
    adjust = .5, 
    width = .75, 
    .width = 0,
    justification = -.4, 
    point_color = NA) + 
  geom_boxplot(
    aes(color = species,
        color = after_scale(darken(color, .1, space = "HLS")),
        fill = after_scale(desaturate(lighten(color, .8), .4))),
    width = .42, 
    outlier.shape = NA
  ) +
  geom_point(
    aes(color = species,
        color = after_scale(darken(color, .1, space = "HLS"))),
    fill = "white",
    shape = 21,
    stroke = .4,
    size = 2,
    position = position_jitter(seed = 1, width = .12)
  ) + 
  geom_point(
    aes(fill = species),
    color = "transparent",
    shape = 21,
    stroke = .4,
    size = 2,
    alpha = .3,
    position = position_jitter(seed = 1, width = .12)
  ) + 
  stat_summary(
    geom = "text",
    fun = "median",
    aes(label = round(..y.., 2),
        color = species,
        color = after_scale(darken(color, .1, space = "HLS"))),
    family = "Roboto Mono",
    fontface = "bold",
    size = 4.5,
    vjust = -3.5
  ) +
  stat_summary(
    geom = "text",
    fun.data = add_sample,
    aes(label = paste("n =", ..label..),
        color = species,
        color = after_scale(darken(color, .1, space = "HLS"))),
    family = "Roboto Condensed",
    size = 4,
    hjust = 0
  ) +
  coord_flip(xlim = c(1.2, NA), clip = "off") +
  annotation_custom(pic, ymin = 2.9, ymax = 3.85, xmin = 2.7, xmax = 4.7) +
  scale_y_continuous(
    limits = c(1.57, 3.8),
    breaks = seq(1.6, 3.8, by = .2),
    expand = c(.001, .001)
  ) +
  scale_color_manual(values = pal, guide = "none") +
  scale_fill_manual(values = pal, guide = "none") +
  labs(
    x = NULL,
    y = "Bill ratio",
    title = "Bill Ratios of Brush–Tailed Penguins (*Pygoscelis* spec.)",
    subtitle = "Distribution of bill ratios, estimated as bill length divided by bill depth.",
    caption = "Gorman, Williams & Fraser (2014) *PLoS ONE* DOI: 10.1371/journal.pone.0090081<br>Visualization: Cédric Scherer  &bull; llustration: Allison Horst"
  ) +
  theme_minimal(base_family = "Zilla Slab", base_size = 15) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.ticks = element_blank(),
    axis.text.x = element_text(family = "Roboto Mono"),
    axis.text.y = element_text(
      color = rev(darken(pal, .1, space = "HLS")), 
      size = 12
    ),
    axis.title.x = element_text(margin = margin(t = 10),
                                size = 16),
    plot.title = element_markdown(face = "bold", size = 16),
    plot.subtitle = element_text(
      color = "grey40", hjust = 0,
      margin = margin(0, 0, 20, 0)
    ),
    plot.title.position = "plot",
    plot.caption = element_markdown(
      color = "grey40", lineheight = 1.2,
      margin = margin(20, 0, 0, 0)),
    plot.margin = margin(15, 15, 10, 15)
  )