The data set and code used in this tutorial and an interactive version with activity solutions, additional resources, and advanced plotting options are available at https:// osf.io/bj83f/
Remember that you need to install the packages before you can load them - but never save the install code in your Markdown.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(patchwork)
dat <- read_csv(file = "ldt_data.csv")
## Rows: 100 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): id
## dbl (6): age, language, rt_word, rt_nonword, acc_word, acc_nonword
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(dat)
## id age language rt_word
## Length:100 Min. :18.00 Min. :1.00 Min. :256.3
## Class :character 1st Qu.:24.00 1st Qu.:1.00 1st Qu.:322.6
## Mode :character Median :28.50 Median :1.00 Median :353.8
## Mean :29.75 Mean :1.45 Mean :353.6
## 3rd Qu.:33.25 3rd Qu.:2.00 3rd Qu.:379.5
## Max. :58.00 Max. :2.00 Max. :479.6
## rt_nonword acc_word acc_nonword
## Min. :327.3 Min. : 89.00 Min. :76.00
## 1st Qu.:438.8 1st Qu.: 94.00 1st Qu.:82.75
## Median :510.6 Median : 95.00 Median :85.00
## Mean :515.8 Mean : 95.01 Mean :84.90
## 3rd Qu.:582.9 3rd Qu.: 96.25 3rd Qu.:88.00
## Max. :706.2 Max. :100.00 Max. :93.00
str(dat)
## spec_tbl_df [100 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : chr [1:100] "S001" "S002" "S003" "S004" ...
## $ age : num [1:100] 22 33 23 28 26 29 20 30 26 22 ...
## $ language : num [1:100] 1 1 1 1 1 1 1 1 1 1 ...
## $ rt_word : num [1:100] 379 312 405 298 316 ...
## $ rt_nonword : num [1:100] 517 435 459 336 401 ...
## $ acc_word : num [1:100] 99 94 96 92 91 96 95 91 94 94 ...
## $ acc_nonword: num [1:100] 90 82 87 76 83 78 86 80 86 88 ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_character(),
## .. age = col_double(),
## .. language = col_double(),
## .. rt_word = col_double(),
## .. rt_nonword = col_double(),
## .. acc_word = col_double(),
## .. acc_nonword = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
dat <- mutate(dat, language = factor(
x = language, # column to translate
levels = c(1, 2), # values of the original data in preferred order
labels = c("monolingual", "bilingual") # labels for display
))
dat %>%
group_by(language) %>%
count() %>%
ungroup()
## # A tibble: 2 × 2
## language n
## <fct> <int>
## 1 monolingual 55
## 2 bilingual 45
dat %>%
count()
## # A tibble: 1 × 1
## n
## <int>
## 1 100
dat %>%
summarise(mean_age = mean(age),
sd_age = sd(age),
n_values = n())
## # A tibble: 1 × 3
## mean_age sd_age n_values
## <dbl> <dbl> <int>
## 1 29.8 8.28 100
age_stats <- dat %>%
summarise(mean_age = mean(age),
sd_age = sd(age),
n_values = n())
dat %>%
group_by(language) %>%
summarise(mean_age = mean(age),
sd_age = sd(age),
n_values = n()) %>%
ungroup()
## # A tibble: 2 × 4
## language mean_age sd_age n_values
## <fct> <dbl> <dbl> <int>
## 1 monolingual 28.0 6.78 55
## 2 bilingual 31.9 9.44 45
ggplot(data = dat, mapping = aes(x = language)) +
geom_bar()
Bar chart of counts.
dat_percent <- dat %>%
group_by(language) %>%
count() %>%
ungroup() %>%
mutate(percent = (n/sum(n)*100))
ggplot(dat_percent, aes(x = language, y = percent)) +
geom_bar(stat="identity")
Bar chart of pre-calculated counts.
ggplot(dat, aes(x = age)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Histogram of ages.
ggplot(dat, aes(x = age)) +
geom_histogram(binwidth = 5)
Histogram of ages where each bin covers one year.
ggplot(dat, aes(age)) +
geom_histogram(binwidth = 1,
fill = "white",
colour = "black")
Histogram with custom colors for bar fill and line colors.
ggplot(dat, aes(language)) +
geom_bar() +
scale_x_discrete(name = "Language group",
labels = c("Monolingual", "Bilingual")) +
scale_y_continuous(name = "Number of participants",
breaks = c(0,10,20,30,40,50))
Bar chart with custom axis labels.
ggplot(dat, aes(language)) +
geom_bar() +
scale_x_continuous(name = "Language group",
labels = c("Monolingual", "Bilingual"))
ggplot(dat, aes(age)) +
geom_histogram(binwidth = 1, fill = "wheat", color = "black") +
scale_x_continuous(name = "Participant age (years)") +
theme_minimal()
Histogram with a custom theme.
theme_set(theme_minimal())
Step 1
long <- pivot_longer(data = dat,
cols = rt_word:acc_nonword,
names_to = c("dv_condition"),
values_to = "dv")
Step 2
long2 <- pivot_longer(data = dat,
cols = rt_word:acc_nonword,
names_sep = "_",
names_to = c("dv_type", "condition"),
values_to = "dv")
Step 3
dat_long <- pivot_wider(long2,
names_from = "dv_type",
values_from = "dv")
Combined steps
dat_long <- pivot_longer(data = dat,
cols = rt_word:acc_nonword,
names_sep = "_",
names_to = c("dv_type", "condition"),
values_to = "dv") %>%
pivot_wider(names_from = "dv_type",
values_from = "dv")
ggplot(dat_long, aes(x = rt)) +
geom_histogram(binwidth = 10, fill = "white", colour = "black") +
scale_x_continuous(name = "Reaction time (ms)")
ggplot(dat_long, aes(x = acc)) +
geom_histogram(binwidth = 1, fill = "white", colour = "black") +
scale_x_continuous(name = "Accuracy (0-100)")
ggplot(dat_long, aes(x = rt)) +
geom_density()+
scale_x_continuous(name = "Reaction time (ms)")
ggplot(dat_long, aes(x = rt, fill = condition)) +
geom_density(alpha = 0.75)+
scale_x_continuous(name = "Reaction time (ms)") +
scale_fill_discrete(name = "Condition",
labels = c("Word", "Non-word"))
ggplot(dat_long, aes(x = rt, y = age)) +
geom_point()
With line of best fit
ggplot(dat_long, aes(x = rt, y = age)) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(dat_long, aes(x = rt, y = age, colour = condition)) +
geom_point() +
geom_smooth(method = "lm") +
scale_colour_discrete(name = "Condition",
labels = c("Word", "Non-word"))
## `geom_smooth()` using formula 'y ~ x'
ggplot(dat_long, aes(x = rt, y = age, colour = condition)) +
geom_point() +
geom_smooth(method = "lm") +
scale_color_brewer(palette = "Dark2",
name = "Condition",
labels = c("Word", "Non-word"))
## `geom_smooth()` using formula 'y ~ x'
ggplot(dat_long, aes(x = condition, y = acc)) +
geom_boxplot()
ggplot(dat_long, aes(x = condition, y = acc, fill = language)) +
geom_boxplot() +
scale_fill_brewer(palette = "Dark2",
name = "Group",
labels = c("Bilingual", "Monolingual")) +
theme_classic() +
scale_x_discrete(name = "Condition",
labels = c("Word", "Non-word")) +
scale_y_continuous(name = "Accuracy")
ggplot(dat_long, aes(x = condition, y = acc, fill = language)) +
geom_violin() +
scale_fill_brewer(palette = "Dark2",
name = "Group",
labels = c("Bilingual", "Monolingual")) +
theme_classic() +
scale_x_discrete(name = "Condition",
labels = c("Word", "Non-word")) +
scale_y_continuous(name = "Accuracy")
ggplot(dat_long, aes(x = condition, y = rt)) +
stat_summary(fun = "mean", geom = "bar")
ggplot(dat_long, aes(x = condition, y = rt)) +
stat_summary(fun = "mean", geom = "bar") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2)
ggplot(dat_long, aes(x = condition, y= rt)) +
geom_violin() +
# remove the median line with fatten = NULL
geom_boxplot(width = .2, fatten = NULL) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1)
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Messy layers
ggplot(dat_long, aes(x = condition, y= rt)) +
geom_boxplot() +
geom_violin() +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1)
ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
geom_violin() +
geom_boxplot(width = .2,
fatten = NULL) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se",
geom = "errorbar",
width = .1) +
scale_fill_brewer(palette = "Dark2")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Fixed positions
# set the offset position of the geoms
pos <- position_dodge(0.9)
ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
geom_violin(position = pos) +
geom_boxplot(width = .2,
fatten = NULL,
position = pos) +
stat_summary(fun = "mean",
geom = "point",
position = pos) +
stat_summary(fun.data = "mean_se",
geom = "errorbar",
width = .1,
position = pos) +
scale_fill_brewer(palette = "Dark2")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Hard to see colours
ggplot(dat_long, aes(x = condition, y= rt, fill = language,
group = paste(condition, language))) +
geom_violin(alpha = 0.25, position = pos) +
geom_boxplot(width = .2,
fatten = NULL,
alpha = 0.75,
position = pos) +
stat_summary(fun = "mean",
geom = "point",
position = pos) +
stat_summary(fun.data = "mean_se",
geom = "errorbar",
width = .1,
position = pos) +
scale_fill_brewer(palette = "Dark2")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Adjusted geom colours
ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
geom_violin(position = pos) +
geom_boxplot(width = .2, fatten = NULL,
mapping = aes(group = interaction(condition, language)),
fill = "white",
position = pos) +
stat_summary(fun = "mean",
geom = "point",
position = pos) +
stat_summary(fun.data = "mean_se",
geom = "errorbar",
width = .1,
position = pos) +
scale_fill_brewer(palette = "Dark2")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Write any notes you have here.
Give yourself a high five here.
ggplot(dat_long, aes(x = condition, y = rt,
shape = language,
group = language,
color = language)) +
stat_summary(fun = "mean", geom = "point", size = 3) +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
scale_color_manual(values = c("blue", "darkorange")) +
theme_classic()
ggplot(dat_long, aes(x = condition, y = rt, group = language, shape = language)) +
geom_point(aes(colour = language),alpha = .2) +
geom_line(aes(group = id, colour = language), alpha = .2) +
stat_summary(fun = "mean", geom = "point", size = 2, colour = "black") +
stat_summary(fun = "mean", geom = "line", colour = "black") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2, colour = "black") +
theme_minimal()
Faceted scatterplots
ggplot(dat_long, aes(x = rt, y = age)) +
geom_point() +
geom_smooth(method = "lm") +
facet_wrap(~condition) +
scale_color_discrete(name = "Condition",
labels = c("Word", "Non-word"))
## `geom_smooth()` using formula 'y ~ x'
Faceted violin-boxplots
ggplot(dat_long, aes(x = condition, y= rt)) +
geom_violin() +
geom_boxplot(width = .2, fatten = NULL) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
facet_wrap(~language) +
theme_minimal()
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Adjusted the facet labels
ggplot(dat_long, aes(x = condition, y= rt)) +
geom_violin() +
geom_boxplot(width = .2, fatten = NULL) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
facet_wrap(~language,
labeller = labeller(
language = c(monolingual = "Monolingual participants",
bilingual = "Bilingual participants"))) +
theme_minimal()
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
p1 <- ggplot(dat_long, aes(x = rt)) +
geom_histogram(binwidth = 10, color = "black")
p2 <- ggplot(dat_long, aes(x = acc)) +
geom_histogram(binwidth = 1, color = "black")
p3 <- p1 + theme_minimal()
ggsave(filename = "my_plot.png") # save last displayed plot
## Saving 7 x 5 in image
ggsave(filename = "my_plot.png", plot = p3) # save plot p3
## Saving 7 x 5 in image
Combining two plots
p1 + p2 # side-by-side
p1 / p2 # stacked
Combining three or more plots
p5 <- ggplot(dat_long, aes(x = condition, y = rt, group = language, shape = language)) +
geom_point(aes(colour = language),alpha = .2) +
geom_line(aes(group = id, colour = language), alpha = .2) +
stat_summary(fun = "mean", geom = "point", size = 2, colour = "black") +
stat_summary(fun = "mean", geom = "line", colour = "black") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2, colour = "black") +
theme_minimal()
p6 <- ggplot(dat_long, aes(x = condition, y= rt)) +
geom_violin() +
geom_boxplot(width = .2, fatten = NULL) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
facet_wrap(~language,
labeller = labeller(
language = (c(monolingual = "Monolingual participants",
bilingual = "Bilingual participants")))) +
theme_minimal()
p1 /p5 / p6
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
(p1 + p6) / p5
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
p6 | p1 / p5
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
p5 + labs(x = "Type of word",
y = "Reaction time (ms)",
title = "Language group by word type interaction plot",
subtitle = "Reaction time data")
With redundant legend
ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
geom_violin(alpha = .4) +
geom_boxplot(width = .2, fatten = NULL, alpha = .6) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
facet_wrap(~factor(language,
levels = c("monolingual", "bilingual"),
labels = c("Monolingual participants",
"Bilingual participants"))) +
theme_minimal() +
scale_fill_brewer(palette = "Dark2")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
Without redundant legend
ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
geom_violin(alpha = .4) +
geom_boxplot(width = .2, fatten = NULL, alpha = .6) +
stat_summary(fun = "mean", geom = "point") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
facet_wrap(~factor(language,
levels = c("monolingual", "bilingual"),
labels = c("Monolingual participants",
"Bilingual participants"))) +
theme_minimal() +
scale_fill_brewer(palette = "Dark2") +
guides(fill = "none")
## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).