library(tidyverse)
library(ggplot2)
library(ggthemes)
library(plotly)
library(ggdist)
library(MASS)
Simple examples
Overlaid histogram
First up is package imports (that’s just like adding whatever tools we’ll need later).
Next we have to import the data into a data frame.1
<- read_csv("data.csv", col_types = cols(Participant = col_integer())) df
We explicitly set the ‘Participant’ column to be an integer for the sake of efficiency – the rest are automatically assigned types.
df
actually look like?
The top few rows of the data frame look like this:
head(df)
# A tibble: 6 × 4
Participant Training Test Error
<int> <chr> <chr> <dbl>
1 1 Physical Pre-test 18.1
2 1 Physical Mid-test 12.8
3 1 Physical Post-test 16.4
4 1 Physical Carryover 16.5
5 2 Physical Pre-test 37.9
6 2 Physical Mid-test 21.6
Next, we’ll filter the data so it’s just the pre- and post-tests for visual training (and order them correctly).2
<- df %>%
vis_prepost filter(Training == "Visual",
%in% c("Pre-test", "Post-test")) %>%
Test mutate(Test = factor(Test, levels = c("Pre-test", "Post-test")))
Now we can make the plot itself.
- 1
- Define the data.
- 2
- Reduce the graph’s opacity (so you can actually see when they overlap).
- 3
- Overlay the graphs (instead of stacking them).
- 4
- Set the number of bins to 20.
Finally, we’ll add some labels for the axes and a basic theme.
Code
<- p + labs(
p title = "Distribution of error scores (visual training)",
subtitle = "Performance in pre- vs post-tests",
x = "Error (cm)",
y = "Count",
fill = "Test stage"
+
) theme_clean()
print(p)
Interactive
We can also make it interactive (like below) or animated or literally whatever you want.
ggplotly(p)
Smoothed
A smoothed PDF approximation can also convey the info pretty well.
Code
<- vis_prepost %>%
p_density ggplot(aes(x = Error, fill = Test)) +
geom_density(alpha = 0.5) +
scale_fill_manual(values = c("#69b3a2", "#404080")) +
labs(
title = "Smoothed distribution of error scores",
subtitle = "Performance in pre- vs post-tests (kernel density estimate)",
x = "Error (cm)",
y = expression(Density~(cm^{-1})),
fill = "Test stage"
+
) theme_clean()
print(p_density)
It’s still only one line, like Figure 1.
ggplotly(p_density)
Faceted histogram
For a faceted histogram, you just add facet_wrap
.
<- p +
p_sxs facet_wrap(~ Test, ncol = 2, scales = "fixed") +
guides(fill = "none")
print(p_sxs)
The same can be applied to the probability density function in Figure 2 (but it doesn’t look half as cool).
Code
<- p_density +
p_density_faceted facet_wrap(~ Test, ncol = 2) +
guides(fill = "none")
print(p_density_faceted)
Box plot
Making a box plot is just as simple.
<- vis_prepost %>%
p_base_dist ggplot(aes(x = Test, y = Error, fill = Test)) +
labs(
title = "Distribution of error scores",
subtitle = "Performance in pre- vs post-tests",
x = "Test stage",
y = "Error (cm)"
+
) theme_clean() +
guides(fill = "none")
<- p_base_dist + geom_boxplot()
p_box print(p_box)
Violin plot
A violin plot can be made with some minor adjustments to a box plot.
<- p_base_dist + geom_violin(trim = FALSE)
p_violin print(p_violin)
More complex examples
Individual trajectories
It’s super easy to add other factors. Here’s a graph with an extra one, training group:
Code
<- df %>%
p_trajectories mutate(Test = factor(Test, levels = c("Pre-test", "Mid-test", "Post-test", "Carryover"))) %>%
ggplot(aes(
x = Test,
y = Error,
group = interaction(Participant, Training),
colour = Training
+
)) geom_line(alpha = 0.2) +
stat_summary(
aes(group = Training),
fun = mean,
geom = "line",
linewidth = 1.3
+
) stat_summary(
aes(group = Training, fill = Training),
fun = mean,
geom = "point",
shape = 21,
fill = "white",
size = 2.5,
stroke = 1.2
+
) labs(
title = "Individual participant trajectories",
subtitle = "By training type, over time",
x = "Test stage",
y = "Error (cm)",
colour = "Training group"
+
) theme_clean()
print(p_trajectories)
The same applies for e.g. dominant vs non-dominant, or against different throwing lengths or whatever else.
Three-dimensional plots
Three-dimensional plots are also easy to make, if we want to like plot every participant without over-plotting the hell out of it.
Code
<- df %>%
df_wide filter(Test %in% c("Pre-test", "Post-test")) %>%
::pivot_wider(names_from = Test, values_from = Error)
tidyr
<- function(d, n = 120, pad = 0.5) {
kde_surface <- d$`Pre-test`; y <- d$`Post-test`
x <- c(min(x) - pad, max(x) + pad, min(y) - pad, max(y) + pad)
lims ::kde2d(x, y, n = n, lims = lims)
MASS
}
<- df_wide %>%
surfaces group_split(Training) %>%
setNames(df_wide %>% distinct(Training) %>% pull()) %>%
lapply(kde_surface)
<- plot_ly() |>
p_3d_visual add_surface(
x = surfaces$Visual$x,
y = surfaces$Visual$y,
z = surfaces$Visual$z,
colorscale = "Viridis",
showscale = TRUE
|>
) layout(
title = "Visual training — 3D density surface",
scene = list(
xaxis = list(title = "Pre-test (cm)"),
yaxis = list(title = "Post-test (cm)"),
zaxis = list(title = "Density")
)
)
p_3d_visual
Code
<- plot_ly() |>
p_3d_physical add_surface(
x = surfaces$Physical$x,
y = surfaces$Physical$y,
z = surfaces$Physical$z,
colorscale = "Viridis",
showscale = TRUE
|>
) layout(
title = "Physical training — 3D density surface",
scene = list(
xaxis = list(title = "Pre-test (cm)"),
yaxis = list(title = "Post-test (cm)"),
zaxis = list(title = "Density")
)
)
p_3d_physical
Heatmap
Of course, a heatmap may be better in this case:
<- df_wide %>%
p_heat_visual2 filter(Training == "Visual") %>%
ggplot(aes(x = `Pre-test`, y = `Post-test`)) +
stat_density_2d(aes(fill = after_stat(density)),
geom = "raster", contour = FALSE) +
coord_equal() +
scale_fill_viridis_c(name = "Density") +
labs(
title = "Visual training — 2D density heatmap",
x = "Pre-test (cm)", y = "Post-test (cm)"
+
) theme_minimal()
p_heat_visual2