Many code chunks are adapted from the recitation notebook created for CMU 85732
The original plot
N <- 24
deviant_simple_mean <- (85.9 / 31.59) * 500
deviant_unusual_mean <- (181.44 / 31.59) * 500
deviant_mixed_mean <- (98.88 / 31.59) * 500
deviant_simple_sem <- (9.92 / 31.59) * 500
deviant_unusual_sem <- (26.1 / 31.59) * 500
deviant_mixed_sem <- (15.6 / 31.59) * 500
deviant_simple_sd <- deviant_simple_sem * sqrt(N)
deviant_unusual_sd <- deviant_unusual_sem * sqrt(N)
deviant_mixed_sd <- deviant_mixed_sem * sqrt(N)
bkgd_simple_mean <- (48.02 / 31.59) * 500
bkgd_unusual_mean <- (53.6 / 31.59) * 500
bkgd_mixed_mean <- (58.53 / 31.59) * 500
bkgd_simple_sem <- (10.52 / 31.59) * 500
bkgd_unusual_sem <- (4.27 / 31.59) * 500
bkgd_mixed_sem <- (16.94 / 31.59) * 500
bkgd_simple_sd <- bkgd_simple_sem * sqrt(N)
bkgd_unusual_sd <- bkgd_unusual_sem * sqrt(N)
bkgd_mixed_sd <- bkgd_mixed_sem * sqrt(N)
Assuming normal distribution of the looking time data will lead to some “negative” looking time data. Also, because the plot only provides data aggregated across participant levels, the generated data is not going to be perfectly fit for lmer analysis where trial-level data is needed.
simulate_data <- function(sample_size,
deviant_simple_mu, deviant_simple_sd,
deviant_unusual_mu, deviant_unusual_sd,
deviant_mixed_mu, deviant_mixed_sd,
bkgd_simple_mu, bkgd_simple_sd,
bkgd_unusual_mu, bkgd_unusual_sd,
bkgd_mixed_mu, bkgd_mixed_sd){
deviant_simple <- rnorm(sample_size, deviant_simple_mu, deviant_simple_sd)
deviant_unusual <- rnorm(sample_size,deviant_unusual_mu, deviant_unusual_sd)
deviant_mixed <- rnorm(sample_size,deviant_mixed_mu, deviant_mixed_sd)
bkgd_simple <- rnorm(sample_size,bkgd_simple_mu, bkgd_simple_sd)
bkgd_unusual <- rnorm(sample_size,bkgd_unusual_mu, bkgd_unusual_sd)
bkgd_mixed <- rnorm(sample_size,bkgd_mixed_mu, bkgd_mixed_sd)
raw_data <- data.frame(
deviant_simple = deviant_simple,
deviant_unusual = deviant_unusual,
deviant_mixed = deviant_mixed,
bkgd_simple = bkgd_simple,
bkgd_unusual = bkgd_unusual,
bkgd_mixed = bkgd_mixed
)
raw_data$subject <- seq_len(nrow(raw_data))
tidy_data <- raw_data %>%
pivot_longer(cols = deviant_simple:bkgd_mixed,
names_to = "trial_info",
values_to = "trial_looking_time") %>%
mutate(
block = case_when(
grepl("simple", trial_info) ~ "simple",
grepl("unusual", trial_info) ~ "unusual",
grepl("mixed", trial_info) ~ "mixed"
),
trial_stimulus_type = case_when(
grepl("deviant", trial_info) ~ "deviant",
grepl("bkgd", trial_info) ~ "background"
)
) %>%
select(-trial_info)
return(tidy_data)
}
tidy_data <- simulate_data(24,
deviant_simple_mean, deviant_simple_sd,
deviant_unusual_mean, deviant_unusual_sd,
deviant_mixed_mean, deviant_mixed_sd,
bkgd_simple_mean, bkgd_simple_sd,
bkgd_unusual_mean, bkgd_unusual_sd,
bkgd_mixed_mean, bkgd_mixed_sd)
tidy_data %>% datatable()
One to run anova, extracting p value. The other to fit lmer model, exctracting BIC.
# get the interaction effect p value
run_anova <- function(data){
anova_res <- anova_test(data = data,
formula = trial_looking_time ~ trial_stimulus_type * block,
within = c(trial_stimulus_type,block),
dv = trial_looking_time)
# get the p val for the interaction term
return(anova_res$p[nrow(anova_res)])
}
run_lme <- function(data){
null_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type + block + (1|subject),
data = data)
lm_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type * block + (1|subject),
data = data)
comparison <- anova(lm_res, null_res)
stats <- comparison %>% rownames_to_column() %>% filter(rowname == "lm_res") %>% select("Pr(>Chisq)") %>% pull() %>% as.numeric()
return(stats)
}
run_lme_with_null <- function(data){
null_res <- lmer(log(trial_looking_time) ~ 1 + (1|subject),
data = data)
lm_res <- lmer(log(trial_looking_time) ~ trial_stimulus_type * block + (1|subject),
data = data)
m_bic <- BIC(null_res, lm_res)$BIC
stats <- diff(m_bic)
return(stats)
}
analysis_fun is one of the two functions listed above analysis_type is either “anova” or "lmer
repeat_analysis <- function(n_simulations, alpha, analysis_fun, analysis_type,
n_subjects,
deviant_simple_mu, deviant_simple_sd,
deviant_unusual_mu, deviant_unusual_sd,
deviant_mixed_mu, deviant_mixed_sd,
bkgd_simple_mu, bkgd_simple_sd,
bkgd_unusual_mu, bkgd_unusual_sd,
bkgd_mixed_mu, bkgd_mixed_sd) {
stat_results <- c() # empty vector to store delta BICs from each simulation
# loop for repeating the simulation
for (i in 1:n_simulations) {
data <- simulate_data(n_subjects,
deviant_simple_mu, deviant_simple_sd,
deviant_unusual_mu, deviant_unusual_sd,
deviant_mixed_mu, deviant_mixed_sd,
bkgd_simple_mu, bkgd_simple_sd,
bkgd_unusual_mu, bkgd_unusual_sd,
bkgd_mixed_mu, bkgd_mixed_sd)
stat_result <- analysis_fun(data)
stat_results <- c(stat_results,stat_result)
}
if (analysis_type == "lmer"){
# calculate how many of the simulations had significant results
power <- mean(stat_results <= alpha) #strong evidence, see Raftery & Kass 1995
return(list(power = power, stat_results = stat_results))
} else if(analysis_type == "anova"){
power <- mean(stat_results <= alpha)
return(list(power = power, stat_results = stat_results))
}
}
The current RPub run 1000 simulations.
anova_data <- expand.grid(sample_size = c(10,20,30,40), alpha = c(0.05,0.01,0.001))
anova_data$id <- 1:nrow(anova_data)
anova_results <- anova_data %>%
nest(-id, .key = 'parameters') %>%
mutate(power = map(parameters, ~repeat_analysis(SIMULATION_NUM, .$alpha, run_anova, "anova",
.$sample_size,
deviant_simple_mean, deviant_simple_sd,
deviant_unusual_mean, deviant_unusual_sd,
deviant_mixed_mean, deviant_mixed_sd,
bkgd_simple_mean, bkgd_simple_sd,
bkgd_unusual_mean, bkgd_unusual_sd,
bkgd_mixed_mean, bkgd_mixed_sd)$power)) %>%
unnest(parameters, power)
anova_results %>%
ggplot(aes(sample_size, power, color = as.factor(alpha), group = alpha)) +
geom_point() +
geom_line() +
geom_hline(yintercept = 0.8) +
geom_hline(yintercept = 0.95) +
scale_color_discrete('Alpha level') +
scale_x_continuous('Sample size') +
theme_classic()
compares m1+m2 model
dat <- expand.grid(sample_size = c(10,20,30,40))
dat$id <- 1:nrow(dat)
lmer_results <- dat %>%
nest(-id, .key = 'parameters') %>%
mutate(power = map(parameters, ~ repeat_analysis(SIMULATION_NUM, 0.05, run_lme, "lmer",
.$sample_size,
deviant_simple_mean, deviant_simple_sd,
deviant_unusual_mean, deviant_unusual_sd,
deviant_mixed_mean, deviant_mixed_sd,
bkgd_simple_mean, bkgd_simple_sd,
bkgd_unusual_mean, bkgd_unusual_sd,
bkgd_mixed_mean, bkgd_mixed_sd)$power)) %>%
unnest(parameters, power)
lmer_results %>%
ggplot(aes(sample_size, power)) +
geom_point() +
geom_line() +
geom_hline(yintercept = 0.8) +
geom_hline(yintercept = 0.95) +
scale_color_discrete('Alpha level') +
scale_x_continuous('Sample size') +
theme_classic()