Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
library(ggthemes)

Read in

data_raw = read.csv("NAIV_Exp1.csv")
songs_raw = read.csv("NHSDiscography_Metadata.csv")

Clean

Songs.

songs = songs_raw %>%
  clean_names() %>%
  select(song, type) %>%
  mutate(song_number = as.numeric(str_extract(song, "\\d+$")),
        type = tolower(type)) %>%
  rename(correct_function = type) %>%
  select(-song) %>%
  mutate(correct_function = recode(correct_function, "lullaby" = "baby"))

Data.

data = data_raw %>%
  clean_names() %>%
  mutate(participant = row_number()) %>%
  select(participant, age, uhoh1:story118) %>%
  pivot_longer(cols=c(uhoh1:story118), names_to = "question", values_to = "response") %>%
  extract(question, into = c("possible_function", "song_number"), regex = "([a-zA-Z]+)([0-9]+)") %>%
  mutate(song_number = as.numeric(song_number)) 

data_cleaned <- data %>%
  group_by(participant, song_number) %>%
  filter(any(possible_function == "uhoh" & response == 0)) %>%
  ungroup() %>%
  filter(possible_function != "uhoh") %>%
  na.omit() %>%
  full_join(songs, by = "song_number") %>%
  mutate(song_accuracy = ifelse(possible_function == correct_function, response, NA)) %>%
  na.omit()

avg_data = data_cleaned %>% 
  group_by(participant) %>%
  summarise(n_songs=n(),
            overall_accuracy = mean(song_accuracy),
            age = mean(age))

avg_data_function = data_cleaned %>% 
  group_by(participant, correct_function) %>%
  summarise(n_songs=n(),
            overall_accuracy = mean(song_accuracy),
            age = mean(age))
## `summarise()` has grouped output by 'participant'. You can override using the
## `.groups` argument.

Analyze

Correlation between age and overall accuracy.

cor.test(avg_data$age, avg_data$overall_accuracy)
## 
##  Pearson's product-moment correlation
## 
## data:  avg_data$age and avg_data$overall_accuracy
## t = 2.4072, df = 748, p-value = 0.01632
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.01619125 0.15827350
## sample estimates:
##        cor 
## 0.08767828

Correlation between age and overall accuracy for song type.

cor_table = avg_data_function %>%
  group_by(correct_function) %>%
  summarise(
    cor_test = list(cor.test(overall_accuracy, age)),
    .groups = "drop"
  ) %>%
  mutate(
    r = map_dbl(cor_test, ~ .x$estimate),
    p_value = map_dbl(cor_test, ~ .x$p.value),
    label = paste0("r = ", round(r, 2), ", p = ", ifelse(p_value < 0.001, "< .001", round(p_value, 3)))
  )

cor_table
## # A tibble: 4 × 5
##   correct_function cor_test      r p_value label              
##   <chr>            <list>    <dbl>   <dbl> <chr>              
## 1 baby             <htest>  0.0312  0.394  r = 0.03, p = 0.394
## 2 dance            <htest>  0.0840  0.0214 r = 0.08, p = 0.021
## 3 healing          <htest>  0.0490  0.180  r = 0.05, p = 0.18 
## 4 love             <htest>  0.0903  0.0134 r = 0.09, p = 0.013

Plot

Average rating for correct song.

ggplot(avg_data, aes(x=age, y=overall_accuracy)) +
  geom_point() +
  geom_smooth(method="lm") +
  theme_few() +
  ylab("Mean accuracy (1-6)") +
  xlab("Participant age") +
  ylim(1,6) +
   # scale_y_discrete(limits=c(1,2,3,4,5,6)) +
    annotate(
    "text",
    x = 20, y = 5.5,
    label = "r = 0.088, p = .016",
    hjust = 0,
    size = 5
  )
## `geom_smooth()` using formula = 'y ~ x'

Average rating for correct song by function.

ggplot(avg_data_function, aes(x=age, y=overall_accuracy)) +
  geom_point() +
  geom_smooth(method="lm") +
  theme_few() +
  ylab("Mean accuracy (1-6)") +
  xlab("Participant age") +
  scale_y_discrete(limits=c(1,2,3,4,5,6)) +
  facet_grid(~ correct_function) +
    geom_text(
    data = cor_table,
    aes(x = 20, y = 6.5, label = label),  # adjust position as needed
    inherit.aes = FALSE,
    hjust = 0
  )
## Warning: Continuous limits supplied to discrete scale.
## ℹ Did you mean `limits = factor(...)` or `scale_*_continuous()`?
## `geom_smooth()` using formula = 'y ~ x'