Bahrain_Analyses

library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("psycho")
library("emmeans")
Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'
library("lme4")
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'lme4'

The following object is masked from 'package:psycho':

    golden
library("lmerTest")

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
library("jsonlite")

Attaching package: 'jsonlite'

The following object is masked from 'package:purrr':

    flatten
library("parameters")
library("TOSTER")

options(warn = -1)
options(dplyr.summarise.inform = FALSE)
options(readr.show_col_types = FALSE)
theme_set(theme_classic(base_size = 18))

Load data and do sanity checks

df.dat_all <- read.csv("/Users/ckaicher/Library/Mobile Documents/com~apple~CloudDocs/Documents/Projects/CSPAN/RegisteredReport/Round 2/data/jatos_results_data_20251117214247.txt") %>% 
  filter(participant != '') %>% 
  filter(participant != "participant") 

#there are 4 participants whose prolific IDs were not recorded, so here I am reading in their individual files, assigning them dummy IDs, and adding them to the df with everyone's data
folder_path <- "data/no_prolific_id"

files <- list.files(path = folder_path,
                    pattern=".txt",
                    all.files=FALSE,
                    full.names=FALSE)

for (i in 1:length(files)) {
  participant_id <- paste0("S00", i)
  df.dat <- read_csv(paste0(folder_path, "/", files[i]))
  df.dat$participant <- participant_id
  df.dat_all <- rbind(df.dat, df.dat_all)
}

#check that there are 86 participants in each condition
paste0("Linguistic: ", length(unique(filter(df.dat_all, condition == '1')$participant)), " participants")
[1] "Linguistic: 86 participants"
paste0("Nonlinguistic: ", length(unique(filter(df.dat_all, condition == '2')$participant)), " participants")
[1] "Nonlinguistic: 86 participants"
#make sure everyone consented
unique(df.dat_all$consent_given)
[1] "TRUE" "true"

Clean, tidy, and check for exclusions

Functions

#there were some files where the column names got shifted -- this function deals with that and then cleans the data
clean_data = function(dat) {
  if (dat$sensitive_data_yes[1] == 'Female') {
    print("colname error -- sex in sensitive data column")
    colnames <- colnames(dat)
    colnames[13:(length(colnames)-1)] <- colnames[14:length(colnames)]
    colnames(dat) <- colnames
    names(dat)[30] <- "dummy_col"
    dat <- mutate(dat, sensitive_data_yes = NA)
  }
  if (dat$sex[1] == 'true') {
    print("colname error - true/false in sex column")
    dat <- mutate(dat, dummy1 = NA)
    og_colnames <- colnames(dat)
    new_colnames <- colnames(dat)
    new_colnames[14:(length(og_colnames))] <- c("dummy2", og_colnames[14:length(og_colnames)])
    colnames(dat) <- new_colnames
  }
  dat_clean <- dat %>%
    select(participant,
           condition,
           trial_index,
           stimulus,
           response,
           phase,
           accuracy,
           correct) %>% 
    filter(!is.na(dat$stimulus)) %>%
    mutate(accuracy = as.numeric(accuracy)) %>% 
    mutate(correct = case_when(
      correct == "TRUE" ~ 1,
      correct == "true" ~ 1,
      correct == "FALSE" ~ 0,
      correct == "false" ~ 0,
      .default = NA)) %>%
    mutate(correct = case_when(
      is.na(correct) & !is.na(accuracy) ~ accuracy,
      .default = correct)) %>%
    select(-accuracy) %>% 
    mutate(phase = ifelse(test = (phase == "ax" & as.numeric(trial_index) > 1000),
                          yes = "ax2",
                          no = phase))
  return (dat_clean)
}

#gives percentage of alternating responses during task (for exclusion criteria)
check_alternating <- function(dat) {
  dat <- filter(dat, !is.na(response))
  total_trials <- nrow(dat)
  alternating_count <- 0
  for (i in 2:total_trials) {
    if (dat$response[i] != dat$response[i-1]) {
      alternating_count <- alternating_count + 1
    }
  }
  percentage_alternating <- alternating_count / (total_trials - 1) * 100
  return(percentage_alternating)
}

#checks for the exclusion criteria in the registered report
exclusion_check = function(dat) {
  practice <- dat %>% 
    filter(phase == 'ax_practice')
  if (mean(as.numeric(practice$correct), na.rm = TRUE) < 0.8) {
    return ("exclude for not passing practice")
  }
  ax1 <- dat %>% 
    filter(phase == 'ax')
  if (nrow(filter(ax1, response == 'arrowleft'))/120 > 0.9|
      nrow(filter(ax1, response == 'arrowright'))/120 > 0.9) {
    return ("exclude for pressing the same key for more than 90% of ax1 trials")
  }
  if (nrow(filter(ax1, response == "null")) > 250) { #250 because there are already 240 null response rows just with the way the data was output
    return ("exclude for not responding to more than 10 ax1 trials")
  }
  if (check_alternating(ax1) > 90) {
    return("exclude for alternating button presses for more than 90% of ax1 trials")
  }
  train <- dat %>% 
    filter(phase == 'train') %>% 
    mutate(category_chosen = case_when(
      grepl("mapping1", stimulus) & response == "arrowleft" ~ 1,
      grepl("mapping1", stimulus) & response == "arrowright" ~ 2,
      grepl("mapping2", stimulus) & response == "arrowleft" ~ 2,
      grepl("mapping2", stimulus) & response == "arrowright" ~ 1,
      .default = NA))
  if (nrow(filter(train, category_chosen == 1))/400 > 0.9|
      nrow(filter(train, category_chosen == 2))/400 > 0.9) {
    return ("exclude for choosing the same category in more than 90% of training trials")
  }
  if (nrow(filter(train, response == "null")) > 410) { #410 because there are already 400 null response rows just with the way the data was output
    return ("exclude for not responding to more than 10 training trials")
  }
  if (nrow(filter(train, response == "arrowleft"))/400 > 0.9|
      nrow(filter(train, category_chosen == "arrowright"))/400 > 0.9) {
    return ("exclude for pressing the same key in more than 90% of training trials")
  }
  ax2 <- dat %>% 
    filter(phase == 'ax2')
  if (nrow(filter(ax2, response == 'arrowleft'))/120 > 0.9|
      nrow(filter(ax2, response == 'arrowright'))/120 > 0.9) {
    return ("exclude for pressing the same key for more than 90% of ax2 trials")
  }
  if (nrow(filter(ax2, response == "null")) > 250) {
    return ("exclude for not responding to more than 10 ax2 trials")
  }
  if (check_alternating(ax2) > 90) {
    return("exclude for alternating button presses for more than 90% of ax2 trials")
  }
  
  return("include")
}

#makes separate df with data ready to calculate d-prime for the pre and post discrimination tasks
dprime_prep = function(dat) {
  ax1 <- dat %>% 
    filter(phase == 'ax') 
  
  ax2 <- dat %>% 
    filter(phase == 'ax2')
  
  for (i in seq(3, length(ax1$response), by = 3)) {
    ax1$response[i - 2] <- ax1$response[i]
    ax1$response[i - 1] <- ax1$response[i]
    ax2$response[i - 2] <- ax2$response[i]
    ax2$response[i - 1] <- ax2$response[i]
    ax1$correct[i - 2] <- ax1$correct[i]
    ax1$correct[i - 1] <- ax1$correct[i]
    ax2$correct[i - 2] <- ax2$correct[i]
    ax2$correct[i - 1] <- ax2$correct[i]
  }
  
  ax1 <- ax1 %>% 
    mutate(trial_index = rep(1:120, each = 3)) %>% 
    mutate(response = ifelse(test = response != 'null',
                             yes = response,
                             no = NA)) %>%
    mutate(stim_type = rep(x = c('stim1', 'stim2', 'prompt'),
                           times = 120)) %>% 
    pivot_wider(names_from = stim_type,
                values_from = stimulus)
  ax2 <- ax2 %>% 
    filter(phase == 'ax2') %>% 
    mutate(trial_index = rep(1:120, each = 3)) %>% 
    mutate(response = ifelse(test = response != 'null',
                             yes = response,
                             no = NA)) %>%
    mutate(stim_type = rep(x = c('stim1', 'stim2', 'prompt'),
                           times = 120)) %>% 
    pivot_wider(names_from = stim_type,
                values_from = stimulus)
  
  df.ax <- rbind(ax1, ax2) %>% 
    mutate(same = ifelse(test = stim1 == stim2, 
                         yes = 1,
                         no = 0)) %>% 
    select(-prompt)
  
  return (df.ax)
}

#calculates d-prime for pre and post discrimination tasks
calc_dprime = function(dat) {
  
  dat <- filter(dat, !is.na(response))
  
  ax1 <- dat %>% 
    filter(phase == 'ax') 
  
  ax2 <- dat %>% 
    filter(phase == 'ax2')

  n_fa_ax1 <- sum(ax1$same[ax1$response == "arrowright"], na.rm = T)
  n_fa_ax2 <- sum(ax2$same[ax2$response == "arrowright"], na.rm = T)
  n_cr_ax1 <- sum(ax1$same[ax1$response == "arrowleft"], na.rm = T)
  n_cr_ax2 <- sum(ax2$same[ax2$response == "arrowleft"], na.rm = T)
  
  dprime_ax1 <- ax1 %>%
    group_by(participant,
             phase,
             condition) %>% 
    summarize(n_hit = sum(correct[response == "arrowright"], na.rm = T),
              n_miss = length(which(correct[response == "arrowleft"] == 0)),
              dprime = as.numeric(dprime(n_hit, n_fa_ax1, n_miss, n_cr_ax1)[1]))
  
  dprime_ax2 <- ax2 %>%
    group_by(participant,
             phase,
             condition) %>% 
    summarize(n_hit = sum(correct[response == "arrowright"], na.rm = T),
              n_miss = length(which(correct[response == "arrowleft"] == 0)),
              dprime = as.numeric(dprime(n_hit, n_fa_ax2, n_miss, n_cr_ax2)[1]))
  
  df.dprime <- rbind(dprime_ax1, dprime_ax2) %>% 
    select(-n_hit, -n_miss) %>% 
    pivot_wider(names_from = phase,
                values_from = dprime) %>% 
    mutate(dprime_diff = ax2 - ax)

  return (df.dprime)
}

#makes summary df
phase_summarize = function(dat) {
  dat_byPhase <- dat %>% 
    group_by(participant,
             condition,
             phase) %>% 
    summarize(mean_correct = mean(correct, na.rm = TRUE)) %>% 
    filter(!is.na(phase))
  return (dat_byPhase)
}

Data processing

#to store cleaned data for all participants
df.dat_clean_all <- tibble(participant = c(),
                           condition = c(),
                           trial_index = c(),
                           stimulus = c(),
                           response = c(),
                           phase = c(),
                           correct = c())

#to store just discrimination trials for all participants
df.ax_all <- tibble(participant = c(),
                    condition = c(),
                    trial_index = c(),
                    response = c(),
                    phase = c(),
                    correct = c(),
                    stim1 = c(),
                    stim2 = c(),
                    same = c())

#to store phase summary info for all participants
df.summary <- tibble(participant = c(),
                     condition = c(),
                     phase = c(),
                     correct = c())

#to store d-prime summary info for all participants
df.dprime_summary <- tibble(participant = c(),
                            condition = c(),
                            ax = c(),
                            ax2 = c(),
                            dprime_diff = c())

#to store info about whether participants should be excluded
df.status <- tibble(participant = c(),
                    status = c())

#get list of participant IDs
participants <- unique(df.dat_all$participant)

#for each participant, clean data and check if they should be excluded, then summarize and calculate d-prime
for (i in 1:length(participants)) {
  df.dat <- filter(df.dat_all, participant == participants[i])
  df.dat_clean <- clean_data(df.dat)
  participant_status <- exclusion_check(df.dat_clean)
  df.status <- rbind(df.status, tibble(participant = df.dat_clean$participant[1],
                                       status = participant_status))
  df.dat_clean_all <- rbind(df.dat_clean, df.dat_clean_all)
  df.byPhase <- phase_summarize(df.dat_clean)
  df.summary <- rbind(df.byPhase, df.summary)
  df.ax <- dprime_prep(df.dat_clean)
  df.ax_all <- rbind(df.ax, df.ax_all)
  df.dprime <- calc_dprime(df.ax)
  df.dprime_summary <- rbind(df.dprime, df.dprime_summary)
}
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error -- sex in sensitive data column"
[1] "colname error -- sex in sensitive data column"
[1] "colname error - true/false in sex column"
[1] "colname error -- sex in sensitive data column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error -- sex in sensitive data column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"
[1] "colname error - true/false in sex column"

Exclude participants and see final numbers

to_include <- filter(df.status, status == "include")$participant

df.dat_clean_include <- df.dat_clean_all %>% 
  filter(participant %in% to_include)

paste0("Linguistic: ", length(unique(filter(df.dat_clean_include, condition == 1)$participant)), " participants")
[1] "Linguistic: 84 participants"
paste0("Nonlinguistic: ", length(unique(filter(df.dat_clean_include, condition == 2)$participant)), " participants")
[1] "Nonlinguistic: 83 participants"
df.summary_include <- df.summary %>% 
  filter(participant %in% to_include)

df.dprime_summary_include <- df.dprime_summary %>% 
  filter(participant %in% to_include)

df.ax_all_include <- df.ax_all %>% 
  filter(participant %in% to_include)

Analyses

1) Does learning speech sound categories with linguistic labels increase categorical perception of the speech sounds compared to learning with nonlinguistic labels? (LME on ax accuracy data with 3-way interaction of phase, condition, and comparison[within/between])

#need to add column for whether each trial was a within or between category distinction
within_pairs <- c(c("short_contin_000.wav", "short_contin_011.wav"),
                  c("short_contin_000.wav", "short_contin_022.wav"),
                  c("short_contin_000.wav", "short_contin_033.wav"),
                  c("short_contin_000.wav", "short_contin_044.wav"),
                  c("short_contin_011.wav", "short_contin_022.wav"),
                  c("short_contin_011.wav", "short_contin_033.wav"),
                  c("short_contin_011.wav", "short_contin_044.wav"),
                  c("short_contin_022.wav", "short_contin_033.wav"),
                  c("short_contin_022.wav", "short_contin_044.wav"),
                  c("short_contin_033.wav", "short_contin_044.wav"),
                  c("short_contin_056.wav", "short_contin_067.wav"),
                  c("short_contin_056.wav", "short_contin_078.wav"),
                  c("short_contin_056.wav", "short_contin_089.wav"),
                  c("short_contin_056.wav", "short_contin_100.wav"),
                  c("short_contin_067.wav", "short_contin_078.wav"),
                  c("short_contin_067.wav", "short_contin_089.wav"),
                  c("short_contin_067.wav", "short_contin_100.wav"),
                  c("short_contin_078.wav", "short_contin_089.wav"),
                  c("short_contin_078.wav", "short_contin_100.wav"),
                  c("short_contin_089.wav", "short_contin_100.wav"))

between_pairs <- c(paste("short_contin_011.wav","short_contin_056.wav"),
                   paste("short_contin_022.wav", "short_contin_056.wav"),
                   paste("short_contin_022.wav", "short_contin_067.wav"),
                   paste("short_contin_033.wav", "short_contin_056.wav"),
                   paste("short_contin_033.wav", "short_contin_067.wav"),
                   paste("short_contin_033.wav", "short_contin_078.wav"),
                   paste("short_contin_044.wav", "short_contin_056.wav"),
                   paste("short_contin_044.wav", "short_contin_067.wav"),
                   paste("short_contin_044.wav", "short_contin_078.wav"),
                   paste("short_contin_044.wav", "short_contin_089.wav"))

df.ax_all_include <- df.ax_all_include %>% 
  mutate(comparison = ifelse(test = (paste(stim1, stim2) %in% between_pairs | paste(stim2, stim1) %in% between_pairs),
                              yes = 'between',
                              no = 'within')) %>% 
  mutate(item = paste(stim1, stim2)) #for random effects

#only want to use trials where the sounds were different
df.ax_nosame <- df.ax_all_include %>% 
  filter(same == 0)

#visualize data
ggplot(data = df.ax_nosame,
       mapping = aes(x = comparison, y = correct, fill = phase)) +
  stat_summary(fun = "mean",
               geom = "bar",
               position = position_dodge(width = 0.9)) +
  stat_summary(fun.data = mean_se,
               geom = "linerange",
               position = position_dodge(width = 0.9)) +
  facet_grid(cols = vars(condition))

#full model as described in the registered report -- did not converge (we acknowledged that the model might not converge with the random slopes)

# disc.lme <- glmer(correct ~ phase * condition * comparison + (1|participant) + (1|item) + (0 + phase | participant) + (0 + comparison | participant) + (0 + phase | item) + (0 + condition | item),
#                  family = 'binomial',
#                  data = df.ax_nosame)
# disc.lme

#model without random slopes
disc.lme <- glmer(correct ~ phase * condition * comparison + (1|participant) + (1|item),
                 family = 'binomial',
                 data = df.ax_nosame)
summary(disc.lme)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: correct ~ phase * condition * comparison + (1 | participant) +  
    (1 | item)
   Data: df.ax_nosame

     AIC      BIC   logLik deviance df.resid 
 30379.2  30461.2 -15179.6  30359.2    26710 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4479 -0.6892 -0.3313  0.7129  5.6305 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.3281   0.5728  
 item        (Intercept) 0.7951   0.8917  
Number of obs: 26720, groups:  participant, 167; item, 60

Fixed effects:
                                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)                           0.31081    0.21242   1.463   0.1434    
phaseax2                              0.28470    0.05448   5.226 1.73e-07 ***
condition2                           -0.07488    0.10385  -0.721   0.4709    
comparisonwithin                     -1.21513    0.25066  -4.848 1.25e-06 ***
phaseax2:condition2                   0.03764    0.07732   0.487   0.6264    
phaseax2:comparisonwithin            -0.37970    0.08004  -4.744 2.10e-06 ***
condition2:comparisonwithin           0.20078    0.07945   2.527   0.0115 *  
phaseax2:condition2:comparisonwithin -0.01520    0.11303  -0.134   0.8930    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) phasx2 cndtn2 cmprsn phs2:2 phsx2: cndt2:
phaseax2    -0.125                                          
condition2  -0.243  0.256                                   
comprsnwthn -0.774  0.106  0.056                            
phsx2:cndt2  0.088 -0.704 -0.364 -0.074                     
phsx2:cmprs  0.085 -0.681 -0.174 -0.156  0.479              
cndtn2:cmpr  0.086 -0.334 -0.355 -0.159  0.474  0.494       
phsx2:cnd2: -0.060  0.481  0.249  0.111 -0.684 -0.708 -0.699
#a priori comparisons -- trial accuracy from pretest to posttest for within- and between-category discriminations in the Linguistic and Nonlinguistic label conditions

emm <- emmeans(disc.lme,
               specs = ~ phase,
               by = c("condition","comparison"))

contrasts_pairwise <- contrast(emm,
                               method = "pairwise",
                               adjust = "tukey")

summary(contrasts_pairwise)
condition = 1, comparison = between:
 contrast estimate     SE  df z.ratio p.value
 ax - ax2  -0.2847 0.0545 Inf  -5.226  <.0001

condition = 2, comparison = between:
 contrast estimate     SE  df z.ratio p.value
 ax - ax2  -0.3223 0.0549 Inf  -5.868  <.0001

condition = 1, comparison = within:
 contrast estimate     SE  df z.ratio p.value
 ax - ax2   0.0950 0.0586 Inf   1.620  0.1052

condition = 2, comparison = within:
 contrast estimate     SE  df z.ratio p.value
 ax - ax2   0.0726 0.0580 Inf   1.252  0.2106

Results are given on the log odds ratio (not the response) scale. 
#equivalence testing
equivalence_test(disc.lme,
                 effects = 'fixed')
# TOST-test for Practical Equivalence

  ROPE: [-0.18 0.18]

Parameter                                           |         90% CI |   SGPV
-----------------------------------------------------------------------------
(Intercept)                                         | [-0.04,  0.66] | 0.231 
phase [ax2]                                         | [ 0.20,  0.37] | 0.012 
condition [2]                                       | [-0.25,  0.10] | 0.887 
comparison [within]                                 | [-1.63, -0.80] | < .001
phase [ax2] × condition [2]                         | [-0.09,  0.16] | 0.986 
phase [ax2] × comparison [within]                   | [-0.51, -0.25] | 0.002 
condition [2] × comparison [within]                 | [ 0.07,  0.33] | 0.386 
(phase [ax2] × condition [2]) × comparison [within] | [-0.20,  0.17] | 0.941 

Parameter                                           | Equivalence |      p
--------------------------------------------------------------------------
(Intercept)                                         |   Undecided | 0.739 
phase [ax2]                                         |    Rejected | 0.971 
condition [2]                                       |   Undecided | 0.159 
comparison [within]                                 |    Rejected | > .999
phase [ax2] × condition [2]                         |    Accepted | 0.034 
phase [ax2] × comparison [within]                   |    Rejected | 0.993 
condition [2] × comparison [within]                 |    Rejected | 0.596 
(phase [ax2] × condition [2]) × comparison [within] |   Undecided | 0.112 

2) Does learning speech sound categories with linguistic labels result in better overall discrimination of the speech sounds compared to learning with nonlinguistic labels? (Compare change in d′ scores from pretest to posttest across linguistic and nonlinguistic groups)

#visualize data
ggplot(data = df.dprime_summary_include,
       mapping = aes(x = condition, y = dprime_diff)) +
  stat_summary(fun = "mean",
               geom = "bar") +
  stat_summary(fun.data = mean_se,
               geom = "linerange")

t.test(df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 1], 
       df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 2])

    Welch Two Sample t-test

data:  df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 1] and df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 2]
t = 0.65078, df = 162.75, p-value = 0.5161
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1048354  0.2079036
sample estimates:
 mean of x  mean of y 
0.11623141 0.06469731 
# TOST equivalence test
tsum_TOST(m1 = mean(df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 1]),
          sd1 = sd(df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 1]),
          n1 = 84,
          m2 = mean(df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 2]),
          sd2 = sd(df.dprime_summary_include$dprime_diff[df.dprime_summary_include$condition == 2]),
          n2 = 83,
          eqb = 0.2) #using classic "small" cohen's d for smallest effect size of interest

Welch Two Sample t-test

The equivalence test was significant, t(162.75) = -1.875, p = 3.13e-02
The null hypothesis test was non-significant, t(162.75) = 0.651, p = 5.16e-01
NHST: don't reject null significance hypothesis that the effect is equal to zero 
TOST: reject null equivalence hypothesis

TOST Results 
                 t    df p.value
t-test      0.6508 162.7   0.516
TOST Lower  3.1764 162.7 < 0.001
TOST Upper -1.8748 162.7   0.031

Effect Sizes 
               Estimate      SE              C.I. Conf. Level
Raw             0.05153 0.07919 [-0.0795, 0.1825]         0.9
Hedges's g(av)  0.10029 0.15585 [-0.1535, 0.3538]         0.9
Note: SMD confidence intervals are an approximation. See vignette("SMD_calcs").

3) Does learning speech sound categories with linguistic labels assist training accuracy of the speech sounds compared to learning with nonlinguistic labels? (t-test comparing training accuracy between linguistic and nonlinguistic groups)

#visualize data
df.summary_include %>% 
  drop_na(mean_correct) %>% 
  ggplot(mapping = aes(x = condition, y = mean_correct)) +
  stat_summary(fun = "mean",
               geom = "bar") +
  stat_summary(fun.data = mean_se,
               geom = "linerange")

t.test(df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 1], 
       df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 2])

    Welch Two Sample t-test

data:  df.summary_include$mean_correct[df.summary_include$phase == "train" & df.summary_include$condition == 1] and df.summary_include$mean_correct[df.summary_include$phase == "train" & df.summary_include$condition == 2]
t = 1.0582, df = 135.03, p-value = 0.2919
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.02100114  0.06933710
sample estimates:
mean of x mean of y 
0.7971474 0.7729795 
#TOST equivalence test
tsum_TOST(m1 = mean(df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 1], na.rm = TRUE),
          sd1 = sd(df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 1], na.rm = TRUE),
          n1 = 84,
          m2 = mean(df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 2], na.rm = TRUE),
          sd2 = sd(df.summary_include$mean_correct[df.summary_include$phase == 'train' & df.summary_include$condition == 2], na.rm = TRUE),
          n2 = 83,
          eqb = 0.2) #using classic "small" cohen's d for smallest effect size of interest

Welch Two Sample t-test

The equivalence test was significant, t(153.88) = -8.129, p = 6.67e-14
The null hypothesis test was non-significant, t(153.88) = 1.117, p = 2.66e-01
NHST: don't reject null significance hypothesis that the effect is equal to zero 
TOST: reject null equivalence hypothesis

TOST Results 
                t    df p.value
t-test      1.117 153.9   0.266
TOST Lower 10.363 153.9 < 0.001
TOST Upper -8.129 153.9 < 0.001

Effect Sizes 
               Estimate      SE             C.I. Conf. Level
Raw             0.02417 0.02163  [-0.0116, 0.06]         0.9
Hedges's g(av)  0.17221 0.15613 [-0.0821, 0.426]         0.9
Note: SMD confidence intervals are an approximation. See vignette("SMD_calcs").