library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
library(ggthemes)
data_raw = read.csv("NAIV_Exp1.csv")
songs_raw = read.csv("NHSDiscography_Metadata.csv")
Songs.
songs = songs_raw %>%
clean_names() %>%
select(song, type) %>%
mutate(song_number = as.numeric(str_extract(song, "\\d+$")),
type = tolower(type)) %>%
rename(correct_function = type) %>%
select(-song) %>%
mutate(correct_function = recode(correct_function, "lullaby" = "baby"))
Data.
data = data_raw %>%
clean_names() %>%
mutate(participant = row_number()) %>%
select(participant, age, uhoh1:story118) %>%
pivot_longer(cols=c(uhoh1:story118), names_to = "question", values_to = "response") %>%
extract(question, into = c("possible_function", "song_number"), regex = "([a-zA-Z]+)([0-9]+)") %>%
mutate(song_number = as.numeric(song_number))
data_cleaned <- data %>%
group_by(participant, song_number) %>%
filter(any(possible_function == "uhoh" & response == 0)) %>%
ungroup() %>%
filter(possible_function != "uhoh") %>%
na.omit() %>%
full_join(songs, by = "song_number") %>%
mutate(song_accuracy = ifelse(possible_function == correct_function, response, NA)) %>%
na.omit()
avg_data = data_cleaned %>%
group_by(participant) %>%
summarise(n_songs=n(),
overall_accuracy = mean(song_accuracy),
age = mean(age))
avg_data_function = data_cleaned %>%
group_by(participant, correct_function) %>%
summarise(n_songs=n(),
overall_accuracy = mean(song_accuracy),
age = mean(age))
## `summarise()` has grouped output by 'participant'. You can override using the
## `.groups` argument.
Correlation between age and overall accuracy.
cor.test(avg_data$age, avg_data$overall_accuracy)
##
## Pearson's product-moment correlation
##
## data: avg_data$age and avg_data$overall_accuracy
## t = 2.4072, df = 748, p-value = 0.01632
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.01619125 0.15827350
## sample estimates:
## cor
## 0.08767828
Correlation between age and overall accuracy for song type.
cor_table = avg_data_function %>%
group_by(correct_function) %>%
summarise(
cor_test = list(cor.test(overall_accuracy, age)),
.groups = "drop"
) %>%
mutate(
r = map_dbl(cor_test, ~ .x$estimate),
p_value = map_dbl(cor_test, ~ .x$p.value),
label = paste0("r = ", round(r, 2), ", p = ", ifelse(p_value < 0.001, "< .001", round(p_value, 3)))
)
cor_table
## # A tibble: 4 × 5
## correct_function cor_test r p_value label
## <chr> <list> <dbl> <dbl> <chr>
## 1 baby <htest> 0.0312 0.394 r = 0.03, p = 0.394
## 2 dance <htest> 0.0840 0.0214 r = 0.08, p = 0.021
## 3 healing <htest> 0.0490 0.180 r = 0.05, p = 0.18
## 4 love <htest> 0.0903 0.0134 r = 0.09, p = 0.013
Average rating for correct song.
ggplot(avg_data, aes(x=age, y=overall_accuracy)) +
geom_point() +
geom_smooth(method="lm") +
theme_few() +
ylab("Mean accuracy (1-6)") +
xlab("Participant age") +
ylim(1,6) +
# scale_y_discrete(limits=c(1,2,3,4,5,6)) +
annotate(
"text",
x = 20, y = 5.5,
label = "r = 0.088, p = .016",
hjust = 0,
size = 5
)
## `geom_smooth()` using formula = 'y ~ x'
Average rating for correct song by function.
ggplot(avg_data_function, aes(x=age, y=overall_accuracy)) +
geom_point() +
geom_smooth(method="lm") +
theme_few() +
ylab("Mean accuracy (1-6)") +
xlab("Participant age") +
scale_y_discrete(limits=c(1,2,3,4,5,6)) +
facet_grid(~ correct_function) +
geom_text(
data = cor_table,
aes(x = 20, y = 6.5, label = label), # adjust position as needed
inherit.aes = FALSE,
hjust = 0
)
## Warning: Continuous limits supplied to discrete scale.
## ℹ Did you mean `limits = factor(...)` or `scale_*_continuous()`?
## `geom_smooth()` using formula = 'y ~ x'