Credit:

Many code chunks are adapted from the recitation notebook created for CMU 85732

Set up the mean and SD as measured from the plot:

The original plot

N <- 24
deviant_simple_mean <- (85.9 / 31.59) * 500
deviant_unusual_mean <- (181.44 / 31.59) * 500
deviant_mixed_mean <- (98.88 / 31.59) * 500

deviant_simple_sem <- (9.92 / 31.59) * 500
deviant_unusual_sem <- (26.1 / 31.59) * 500
deviant_mixed_sem <- (15.6 / 31.59) * 500

deviant_simple_sd <- deviant_simple_sem * sqrt(N)
deviant_unusual_sd <- deviant_unusual_sem * sqrt(N)
deviant_mixed_sd <- deviant_mixed_sem * sqrt(N)

bkgd_simple_mean <- (48.02 / 31.59) * 500
bkgd_unusual_mean <- (53.6 / 31.59) * 500
bkgd_mixed_mean <- (58.53 / 31.59) * 500

bkgd_simple_sem <- (10.52 / 31.59) * 500
bkgd_unusual_sem <- (4.27 / 31.59) * 500
bkgd_mixed_sem <- (16.94 / 31.59) * 500

bkgd_simple_sd <- bkgd_simple_sem * sqrt(N)
bkgd_unusual_sd <- bkgd_unusual_sem * sqrt(N)
bkgd_mixed_sd <- bkgd_mixed_sem * sqrt(N)

Generate artificial data

Assuming normal distribution of the looking time data will lead to some “negative” looking time data. Also, because the plot only provides data aggregated across participant levels, the generated data is not going to be perfectly fit for lmer analysis where trial-level data is needed.

simulate_data <- function(sample_size, 
                          deviant_simple_mu, deviant_simple_sd,
                          deviant_unusual_mu, deviant_unusual_sd,
                          deviant_mixed_mu, deviant_mixed_sd,
                          bkgd_simple_mu, bkgd_simple_sd,
                          bkgd_unusual_mu, bkgd_unusual_sd,
                          bkgd_mixed_mu, bkgd_mixed_sd){
  
  deviant_simple <- rnorm(sample_size, deviant_simple_mu, deviant_simple_sd)
  deviant_unusual <- rnorm(sample_size,deviant_unusual_mu, deviant_unusual_sd)
  deviant_mixed <- rnorm(sample_size,deviant_mixed_mu, deviant_mixed_sd)
  
  bkgd_simple <- rnorm(sample_size,bkgd_simple_mu, bkgd_simple_sd)
  bkgd_unusual <- rnorm(sample_size,bkgd_unusual_mu, bkgd_unusual_sd)
  bkgd_mixed <- rnorm(sample_size,bkgd_mixed_mu, bkgd_mixed_sd)
  
  raw_data <- data.frame(
    deviant_simple = deviant_simple,
    deviant_unusual = deviant_unusual,
    deviant_mixed = deviant_mixed,
    bkgd_simple = bkgd_simple, 
    bkgd_unusual = bkgd_unusual, 
    bkgd_mixed = bkgd_mixed
  )
  
  raw_data$subject <- seq_len(nrow(raw_data))
 tidy_data <- raw_data %>% 
  pivot_longer(cols = deviant_simple:bkgd_mixed,
               names_to = "trial_info",
               values_to = "trial_looking_time") %>% 
  mutate(
    block = case_when(
      grepl("simple", trial_info) ~ "simple",
      grepl("unusual", trial_info) ~ "unusual", 
      grepl("mixed", trial_info) ~ "mixed"
    ),
    trial_stimulus_type = case_when(
      grepl("deviant", trial_info) ~ "deviant", 
      grepl("bkgd", trial_info) ~ "background"
    )
  ) %>% 
  select(-trial_info)
 
 return(tidy_data)

}




tidy_data <- simulate_data(24,
              deviant_simple_mean, deviant_simple_sd,
              deviant_unusual_mean, deviant_unusual_sd,
              deviant_mixed_mean, deviant_mixed_sd,
              bkgd_simple_mean, bkgd_simple_sd,
              bkgd_unusual_mean, bkgd_unusual_sd,
              bkgd_mixed_mean, bkgd_mixed_sd)

tidy_data %>% datatable()

Analysis functions

Function to run analysis once

One to run anova, extracting p value. The other to fit lmer model, exctracting BIC.

# get the interaction effect p value
run_anova <- function(data){
  
  anova_res <- anova_test(data = data, 
           formula = trial_looking_time ~ trial_stimulus_type * block,
           within = c(trial_stimulus_type,block),
           dv = trial_looking_time)
  # get the p val for the interaction term 
  return(anova_res$p[nrow(anova_res)])
  
}

run_lme <- function(data){
  
  null_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type + block  + (1|subject), 
                   data = data)
  lm_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type * block + (1|subject), 
     data = data)
  
  comparison <- anova(lm_res, null_res)
  stats <- comparison %>% rownames_to_column() %>% filter(rowname == "lm_res") %>% select("Pr(>Chisq)") %>% pull() %>% as.numeric() 
  return(stats)
  
}

run_lme_with_null <- function(data){
  
  null_res <- lmer(log(trial_looking_time) ~ 1  + (1|subject), 
                   data = data)
  lm_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type * block + (1|subject), 
     data = data)
  
  m_bic <- BIC(null_res, lm_res)$BIC
  stats <- diff(m_bic)
  return(stats)
  
}

Function to run analysis repeatedly

analysis_fun is one of the two functions listed above analysis_type is either “anova” or "lmer

repeat_analysis <- function(n_simulations, alpha, analysis_fun, analysis_type,
                          n_subjects, 
                          deviant_simple_mu, deviant_simple_sd,
                          deviant_unusual_mu, deviant_unusual_sd,
                          deviant_mixed_mu, deviant_mixed_sd,
                          bkgd_simple_mu, bkgd_simple_sd,
                          bkgd_unusual_mu, bkgd_unusual_sd,
                          bkgd_mixed_mu, bkgd_mixed_sd) {
    stat_results <- c() # empty vector to store delta BICs from each simulation
    # loop for repeating the simulation
    for (i in 1:n_simulations) {
        data <- simulate_data(n_subjects, 
                          deviant_simple_mu, deviant_simple_sd,
                          deviant_unusual_mu, deviant_unusual_sd,
                          deviant_mixed_mu, deviant_mixed_sd,
                          bkgd_simple_mu, bkgd_simple_sd,
                          bkgd_unusual_mu, bkgd_unusual_sd,
                          bkgd_mixed_mu, bkgd_mixed_sd)
        stat_result <- analysis_fun(data)
        stat_results <- c(stat_results,stat_result) 
    }
    
    if (analysis_type == "lmer"){
    # calculate how many of the simulations had significant results
    power <- mean(stat_results <= alpha) #strong evidence, see Raftery & Kass 1995
    return(list(power = power, stat_results = stat_results))
    } else if(analysis_type == "anova"){
    power <- mean(stat_results <= alpha)
    return(list(power = power, stat_results = stat_results))
    }
}

Simulation results:

The current RPub run 1000 simulations.

Anova test

anova_data <- expand.grid(sample_size = c(10,20,30,40), alpha = c(0.05,0.01,0.001)) 
anova_data$id <- 1:nrow(anova_data) 

anova_results <- anova_data  %>%  
    nest(-id, .key = 'parameters')  %>% 
    mutate(power = map(parameters, ~repeat_analysis(SIMULATION_NUM, .$alpha, run_anova, "anova",
                                                     
                                                     .$sample_size,
                                                     deviant_simple_mean, deviant_simple_sd,
                                                     deviant_unusual_mean, deviant_unusual_sd,
                                                      deviant_mixed_mean, deviant_mixed_sd,
                                                      bkgd_simple_mean, bkgd_simple_sd,
                                                      bkgd_unusual_mean, bkgd_unusual_sd,
                                                      bkgd_mixed_mean, bkgd_mixed_sd)$power)) %>% 
    unnest(parameters, power)

anova_results %>% 
  ggplot(aes(sample_size, power, color = as.factor(alpha), group = alpha)) +
    geom_point() +
    geom_line() +
    geom_hline(yintercept = 0.8) + 
    geom_hline(yintercept = 0.95) +
    scale_color_discrete('Alpha level') +
    scale_x_continuous('Sample size') +
    theme_classic()

Linear mixed effect model results

compares m1+m2 model

dat <- expand.grid(sample_size = c(10,20,30,40))
dat$id <- 1:nrow(dat)
lmer_results <- dat  %>% 
    nest(-id, .key = 'parameters')  %>% 
    mutate(power = map(parameters, ~ repeat_analysis(SIMULATION_NUM, 0.05, run_lme, "lmer",
                                                     .$sample_size,
                                                     deviant_simple_mean, deviant_simple_sd,
                                                     deviant_unusual_mean, deviant_unusual_sd,
                                                      deviant_mixed_mean, deviant_mixed_sd,
                                                      bkgd_simple_mean, bkgd_simple_sd,
                                                      bkgd_unusual_mean, bkgd_unusual_sd,
                                                      bkgd_mixed_mean, bkgd_mixed_sd)$power))  %>% 
    unnest(parameters, power)

lmer_results %>% 
  ggplot(aes(sample_size, power)) +
    geom_point() +
    geom_line() +
    geom_hline(yintercept = 0.8) + 
    geom_hline(yintercept = 0.95) +
    scale_color_discrete('Alpha level') +
    scale_x_continuous('Sample size') +
    theme_classic()

Power analysis through simulation for Daffner et al (2000)

Anjie Cao (anjiecao@stanford.edu)

October 19, 2020