workbook

The data set and code used in this tutorial and an interactive version with activity solutions, additional resources, and advanced plotting options are available at https:// osf.io/bj83f/

Chapter 2

Loading packages

Remember that you need to install the packages before you can load them - but never save the install code in your Markdown.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(patchwork)

Loading data

dat <- read_csv(file = "ldt_data.csv")

## Rows: 100 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): id
## dbl (6): age, language, rt_word, rt_nonword, acc_word, acc_nonword
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Handling numeric factors

summary(dat)

##       id                 age           language       rt_word     
##  Length:100         Min.   :18.00   Min.   :1.00   Min.   :256.3  
##  Class :character   1st Qu.:24.00   1st Qu.:1.00   1st Qu.:322.6  
##  Mode  :character   Median :28.50   Median :1.00   Median :353.8  
##                     Mean   :29.75   Mean   :1.45   Mean   :353.6  
##                     3rd Qu.:33.25   3rd Qu.:2.00   3rd Qu.:379.5  
##                     Max.   :58.00   Max.   :2.00   Max.   :479.6  
##    rt_nonword       acc_word       acc_nonword   
##  Min.   :327.3   Min.   : 89.00   Min.   :76.00  
##  1st Qu.:438.8   1st Qu.: 94.00   1st Qu.:82.75  
##  Median :510.6   Median : 95.00   Median :85.00  
##  Mean   :515.8   Mean   : 95.01   Mean   :84.90  
##  3rd Qu.:582.9   3rd Qu.: 96.25   3rd Qu.:88.00  
##  Max.   :706.2   Max.   :100.00   Max.   :93.00

str(dat)

## spec_tbl_df [100 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id         : chr [1:100] "S001" "S002" "S003" "S004" ...
##  $ age        : num [1:100] 22 33 23 28 26 29 20 30 26 22 ...
##  $ language   : num [1:100] 1 1 1 1 1 1 1 1 1 1 ...
##  $ rt_word    : num [1:100] 379 312 405 298 316 ...
##  $ rt_nonword : num [1:100] 517 435 459 336 401 ...
##  $ acc_word   : num [1:100] 99 94 96 92 91 96 95 91 94 94 ...
##  $ acc_nonword: num [1:100] 90 82 87 76 83 78 86 80 86 88 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_character(),
##   ..   age = col_double(),
##   ..   language = col_double(),
##   ..   rt_word = col_double(),
##   ..   rt_nonword = col_double(),
##   ..   acc_word = col_double(),
##   ..   acc_nonword = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

dat <- mutate(dat, language = factor(
    x = language, # column to translate
    levels = c(1, 2), # values of the original data in preferred order
    labels = c("monolingual", "bilingual") # labels for display
  ))

Demographic information

dat %>%
  group_by(language) %>%
  count() %>%
  ungroup()

## # A tibble: 2 × 2
##   language        n
##   <fct>       <int>
## 1 monolingual    55
## 2 bilingual      45

dat %>%
  count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1   100

dat %>%
  summarise(mean_age = mean(age),
            sd_age = sd(age),
            n_values = n())

## # A tibble: 1 × 3
##   mean_age sd_age n_values
##      <dbl>  <dbl>    <int>
## 1     29.8   8.28      100

age_stats <- dat %>%
  summarise(mean_age = mean(age),
            sd_age = sd(age),
            n_values = n())

dat %>%
  group_by(language) %>%
  summarise(mean_age = mean(age),
            sd_age = sd(age),
            n_values = n()) %>%
  ungroup()

## # A tibble: 2 × 4
##   language    mean_age sd_age n_values
##   <fct>          <dbl>  <dbl>    <int>
## 1 monolingual     28.0   6.78       55
## 2 bilingual       31.9   9.44       45

Bar chart of counts

ggplot(data = dat, mapping = aes(x = language)) +
  geom_bar()

Bar chart of counts.

Plotting existing aggregates and percent

dat_percent <- dat %>%
  group_by(language) %>%
  count() %>%
  ungroup() %>%
  mutate(percent = (n/sum(n)*100))

ggplot(dat_percent, aes(x = language, y = percent)) +
  geom_bar(stat="identity")

Bar chart of pre-calculated counts.

Histogram

ggplot(dat, aes(x = age)) +
  geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Histogram of ages.

ggplot(dat, aes(x = age)) +
  geom_histogram(binwidth = 5)

Histogram of ages where each bin covers one year.

Customisation 1

Changing colours

ggplot(dat, aes(age)) +
  geom_histogram(binwidth = 1, 
                 fill = "white", 
                 colour = "black")

Histogram with custom colors for bar fill and line colors.

Editing axis names and labels

ggplot(dat, aes(language)) +
  geom_bar() +
  scale_x_discrete(name = "Language group", 
                   labels = c("Monolingual", "Bilingual")) +
  scale_y_continuous(name = "Number of participants",
                     breaks = c(0,10,20,30,40,50))

Bar chart with custom axis labels.

Discrete vs continuous errors

ggplot(dat, aes(language)) +
  geom_bar() +
  scale_x_continuous(name = "Language group", 
                   labels = c("Monolingual", "Bilingual"))

Adding a theme

ggplot(dat, aes(age)) +
  geom_histogram(binwidth = 1, fill = "wheat", color = "black") +
  scale_x_continuous(name = "Participant age (years)") +
  theme_minimal()

Histogram with a custom theme.

theme_set(theme_minimal())

Activities 1

Chapter 3

Transforming data

Step 1

long <- pivot_longer(data = dat, 
                     cols = rt_word:acc_nonword, 
                     names_to = c("dv_condition"),
                     values_to = "dv")

Step 2

long2 <- pivot_longer(data = dat, 
                     cols = rt_word:acc_nonword, 
                     names_sep = "_", 
                     names_to = c("dv_type", "condition"),
                     values_to = "dv")

Step 3

dat_long <- pivot_wider(long2, 
                        names_from = "dv_type", 
                        values_from = "dv")

Combined steps

dat_long <- pivot_longer(data = dat, 
                     cols = rt_word:acc_nonword, 
                     names_sep = "_", 
                     names_to = c("dv_type", "condition"),
                     values_to = "dv") %>%
  pivot_wider(names_from = "dv_type", 
              values_from = "dv")

Histogram 2

ggplot(dat_long, aes(x = rt)) +
  geom_histogram(binwidth = 10, fill = "white", colour = "black") +
  scale_x_continuous(name = "Reaction time (ms)")

ggplot(dat_long, aes(x = acc)) +
  geom_histogram(binwidth = 1, fill = "white", colour = "black") +
  scale_x_continuous(name = "Accuracy (0-100)")

Density plots

ggplot(dat_long, aes(x = rt)) +
  geom_density()+
  scale_x_continuous(name = "Reaction time (ms)")

Grouped density plots

ggplot(dat_long, aes(x = rt, fill = condition)) +
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Reaction time (ms)") +
  scale_fill_discrete(name = "Condition",
                      labels = c("Word", "Non-word"))

Scatterplots

ggplot(dat_long, aes(x = rt, y = age)) +
  geom_point()

With line of best fit

ggplot(dat_long, aes(x = rt, y = age)) +
  geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula 'y ~ x'

Grouped scatterplots

ggplot(dat_long, aes(x = rt, y = age, colour = condition)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_colour_discrete(name = "Condition",
                      labels = c("Word", "Non-word"))

## `geom_smooth()` using formula 'y ~ x'

Customisation 2

Accessible colour schemes

ggplot(dat_long, aes(x = rt, y = age, colour = condition)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_brewer(palette = "Dark2",
                     name = "Condition",
                     labels = c("Word", "Non-word"))

## `geom_smooth()` using formula 'y ~ x'

Activities 2

Chapter 4

Boxplots

ggplot(dat_long, aes(x = condition, y = acc)) +
  geom_boxplot()

Grouped boxplots

ggplot(dat_long, aes(x = condition, y = acc, fill = language)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Dark2",
                    name = "Group",
                    labels = c("Bilingual", "Monolingual")) +
  theme_classic() +
  scale_x_discrete(name = "Condition",
                   labels = c("Word", "Non-word")) +
  scale_y_continuous(name = "Accuracy")

Violin plots

ggplot(dat_long, aes(x = condition, y = acc, fill = language)) +
  geom_violin() +
  scale_fill_brewer(palette = "Dark2",
                    name = "Group",
                    labels = c("Bilingual", "Monolingual")) +
  theme_classic() +
  scale_x_discrete(name = "Condition",
                   labels = c("Word", "Non-word")) +
  scale_y_continuous(name = "Accuracy")

Bar chart of means

ggplot(dat_long, aes(x = condition, y = rt)) +
  stat_summary(fun = "mean", geom = "bar")

ggplot(dat_long, aes(x = condition, y = rt)) +
  stat_summary(fun = "mean", geom = "bar") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2)

Violin-boxplot

ggplot(dat_long, aes(x = condition, y= rt)) +
  geom_violin() +
  # remove the median line with fatten = NULL
  geom_boxplot(width = .2, fatten = NULL) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1)

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Messy layers

ggplot(dat_long, aes(x = condition, y= rt)) +
  geom_boxplot() +  
  geom_violin() +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1)

Grouped violin-boxplots

ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
  geom_violin() +
  geom_boxplot(width = .2, 
               fatten = NULL) +
  stat_summary(fun = "mean",  geom = "point") +
  stat_summary(fun.data = "mean_se", 
               geom = "errorbar", 
               width = .1) +
  scale_fill_brewer(palette = "Dark2")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Fixed positions

# set the offset position of the geoms
pos <- position_dodge(0.9)

ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
  geom_violin(position = pos) +
  geom_boxplot(width = .2, 
               fatten = NULL, 
               position = pos) +
  stat_summary(fun = "mean", 
               geom = "point", 
               position = pos) +
  stat_summary(fun.data = "mean_se", 
               geom = "errorbar", 
               width = .1,
               position = pos) +
  scale_fill_brewer(palette = "Dark2")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Customisation part 3

Colours

Hard to see colours

ggplot(dat_long, aes(x = condition, y= rt, fill = language, 
                     group = paste(condition, language))) +
  geom_violin(alpha = 0.25, position = pos) +
  geom_boxplot(width = .2, 
               fatten = NULL, 
               alpha = 0.75,
               position = pos) +
  stat_summary(fun = "mean", 
               geom = "point", 
               position = pos) +
  stat_summary(fun.data = "mean_se", 
               geom = "errorbar", 
               width = .1,
               position = pos) +
  scale_fill_brewer(palette = "Dark2")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Adjusted geom colours

ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
  geom_violin(position = pos) +
  geom_boxplot(width = .2, fatten = NULL, 
               mapping = aes(group = interaction(condition, language)),
               fill = "white",
               position = pos) +
  stat_summary(fun = "mean", 
               geom = "point", 
               position = pos) +
  stat_summary(fun.data = "mean_se", 
               geom = "errorbar", 
               width = .1,
               position = pos) +
  scale_fill_brewer(palette = "Dark2")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Activities 3

Write any notes you have here.
Give yourself a high five here.

Chapter 5

Interaction plots

ggplot(dat_long, aes(x = condition, y = rt, 
                     shape = language,
                     group = language,
                     color = language)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_color_manual(values = c("blue", "darkorange")) +
  theme_classic()

Combined interaction plots

ggplot(dat_long, aes(x = condition, y = rt, group = language, shape = language)) +
  geom_point(aes(colour = language),alpha = .2) +
  geom_line(aes(group = id, colour = language), alpha = .2) +
   stat_summary(fun = "mean", geom = "point", size = 2, colour = "black") +
  stat_summary(fun = "mean", geom = "line", colour = "black") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2, colour = "black") +
  theme_minimal()

Facets

Faceted scatterplots

ggplot(dat_long, aes(x = rt, y = age)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~condition) +
  scale_color_discrete(name = "Condition",
                      labels = c("Word", "Non-word"))

## `geom_smooth()` using formula 'y ~ x'

Faceted violin-boxplots

ggplot(dat_long, aes(x = condition, y= rt)) +
  geom_violin() +
  geom_boxplot(width = .2, fatten = NULL) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
  facet_wrap(~language) +
  theme_minimal()

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Adjusted the facet labels

ggplot(dat_long, aes(x = condition, y= rt)) +
  geom_violin() +
  geom_boxplot(width = .2, fatten = NULL) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
  facet_wrap(~language, 
             labeller = labeller(
               language = c(monolingual = "Monolingual participants",
                             bilingual = "Bilingual participants"))) +
  theme_minimal()

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Saving plots

p1 <- ggplot(dat_long, aes(x = rt)) +
  geom_histogram(binwidth = 10, color = "black")

p2 <- ggplot(dat_long, aes(x = acc)) +
  geom_histogram(binwidth = 1, color = "black")

p3 <- p1 + theme_minimal()

Exporting plots

ggsave(filename = "my_plot.png") # save last displayed plot

## Saving 7 x 5 in image

ggsave(filename = "my_plot.png", plot = p3) # save plot p3

## Saving 7 x 5 in image

Mulitple plots

Combining two plots

p1 + p2 # side-by-side

p1 / p2 # stacked

Combining three or more plots

p5 <- ggplot(dat_long, aes(x = condition, y = rt, group = language, shape = language)) +
  geom_point(aes(colour = language),alpha = .2) +
  geom_line(aes(group = id, colour = language), alpha = .2) +
  stat_summary(fun = "mean", geom = "point", size = 2, colour = "black") +
  stat_summary(fun = "mean", geom = "line", colour = "black") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2, colour = "black") +
  theme_minimal()

p6 <- ggplot(dat_long, aes(x = condition, y= rt)) +
  geom_violin() +
  geom_boxplot(width = .2, fatten = NULL) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
  facet_wrap(~language, 
             labeller = labeller(
               language = (c(monolingual = "Monolingual participants", 
                             bilingual = "Bilingual participants")))) +
  theme_minimal()

p1 /p5 / p6

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

(p1 + p6) / p5

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

p6 | p1 / p5

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Customisation 4

Axis labels

p5 + labs(x = "Type of word",
          y = "Reaction time (ms)",
          title = "Language group by word type interaction plot",
          subtitle = "Reaction time data")

Non-meanginful colours

With redundant legend

ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
  geom_violin(alpha = .4) +
  geom_boxplot(width = .2, fatten = NULL, alpha = .6) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
  facet_wrap(~factor(language, 
                     levels = c("monolingual", "bilingual"),
                     labels = c("Monolingual participants", 
                                "Bilingual participants"))) +
  theme_minimal() +
  scale_fill_brewer(palette = "Dark2")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).

Without redundant legend

ggplot(dat_long, aes(x = condition, y= rt, fill = language)) +
  geom_violin(alpha = .4) +
  geom_boxplot(width = .2, fatten = NULL, alpha = .6) +
  stat_summary(fun = "mean", geom = "point") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .1) +
  facet_wrap(~factor(language, 
                     levels = c("monolingual", "bilingual"),
                     labels = c("Monolingual participants", 
                                "Bilingual participants"))) +
  theme_minimal() +
  scale_fill_brewer(palette = "Dark2") +
  guides(fill = "none")

## Warning: Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_segment).