This R notebook illustrates some robust alternatives to commonly used effect sizes.
options(digits = 3)
library(pacman)
p_load(kirkegaard, sn)
#settings
group_n = 100000
group_2 = rep(LETTERS[1:2], each = group_n)
theme_set(theme_bw())
Simulate data with roughly constant gap size, but varying variances. Then apply normal Cohen’s d (based on means and standard deviations) vs. robust Cohen’s d (based on medians and median absolute deviations).
The four distributions each with about 15 point gap between groups: 1. Normal distribution 2. Normal distribution mixed with another normal distribution with larger variance 3. t distribution with low degree of freedom (naturally thick tails) 4. Skewed normal distribution
#large
normal_large = tibble(
group = group_2,
#normal
norm = c(rnorm(group_n, mean = 100, sd = 15),
rnorm(group_n, mean = 115, sd = 15)),
#mixed normal
norm_mixed = c(rnorm(group_n*.9, mean = 100, sd = 15), rnorm(group_n*.1, mean = 100, sd = 50),
rnorm(group_n*.9, mean = 115, sd = 15), rnorm(group_n*.1, mean = 115, sd = 50)),
#t dist
t = c(rt(group_n, df = 2) * 15 + 100,
rt(group_n, df = 2) * 15 + 115),
#skewed normal
snorm = c(rsn(group_n, alpha = 5) %>% {.*30 + 80},
rsn(group_n, alpha = 5) %>% {.*30 + 95})
) %>% mutate(i = 1:n())
#plot by facet
normal_large %>%
gather(key = key, value = value, norm:snorm) %>%
ggplot(aes(value, fill = group)) +
geom_density(alpha = .3, n = 2^15) +
coord_cartesian(xlim = c(0, 200)) +
facet_wrap(vars(key))
#numerics
describeBy(normal_large %>% select(norm:snorm),
group = normal_large$group, mat = T) %>%
rownames_to_column() %>%
select(-min, -max, -range, -se) %>%
arrange(group1, vars)
#cohen d vs. robust version
describeBy(normal_large %>% select(norm:snorm),
group = normal_large$group) %>%
{
tibble(
dist = rownames(.$A),
mean_d = (.$B$mean - .$A$mean),
median_d = (.$B$median - .$A$median),
cohen_d = (.$B$mean - .$A$mean) / rowMeans(cbind(.$B$sd, .$A$sd)),
cohen_d_robust = (.$B$median - .$A$median) / rowMeans(cbind(.$B$mad, .$A$mad))
)
}