Main simulation function
https://en.wikipedia.org/wiki/Log-normal_distribution
simulate_paired_t <- function (df) {
# draw data
if (df$base_distribution == "lognormal") {
lt_1 <- rlnorm(n = df$n_subs,
meanlog = log((M_NORMAL^2) / sqrt(SD_NORMAL^2 + M_NORMAL^2)),
sdlog = sqrt(log(1 + (SD_NORMAL^2 / M_NORMAL^2))))
lt_2 <- rlnorm(n = df$n_subs,
meanlog = log(((M_NORMAL + df$true_effect)^2) / sqrt(SD_NORMAL^2 + (M_NORMAL + df$true_effect)^2)),
sdlog = sqrt(log(1 + (SD_NORMAL^2 / (M_NORMAL + df$true_effect)^2))))
} else if (df$base_distribution == "lognormal_outlier") {
lt_1 <- rlnorm(n = df$n_subs,
meanlog = log((M_NORMAL^2) / sqrt(SD_NORMAL^2 + M_NORMAL^2)),
sdlog = sqrt(log(1 + (SD_NORMAL^2 / M_NORMAL^2))))
outlier <- sample(1:df$n_subs, df$n_subs*0.1, replace=FALSE)
lt_1[outlier] <- (abs(lt_1[outlier]) + 3*sqrt(log(1 + (SD_NORMAL^2 / M_NORMAL^2))))*sign(lt_1[outlier])
lt_2 <- rlnorm(n = df$n_subs,
meanlog = log(((M_NORMAL + df$true_effect)^2) / sqrt(SD_NORMAL^2 + (M_NORMAL + df$true_effect)^2)),
sdlog = sqrt(log(1 + (SD_NORMAL^2 / (M_NORMAL + df$true_effect)^2))))
outlier2 <- sample(1:df$n_subs, df$n_subs*0.1, replace=FALSE)
lt_2[outlier2] <- (abs(lt_2[outlier2]) + 3*sqrt(log(1 + (SD_NORMAL^2 / (M_NORMAL + df$true_effect)^2)))) * sign(lt_2[outlier2])
} else if (df$base_distribution == "normal") {
lt_1 <- rnorm(n = df$n_subs,
mean = M_NORMAL, sd = SD_NORMAL)
lt_2 <- rnorm(n = df$n_subs,
mean = M_NORMAL + df$true_effect, sd = SD_NORMAL)
} else if (df$base_distribution == "normal_outlier") {
lt_1 <- rnorm(n = df$n_subs,
mean = M_NORMAL, sd = SD_NORMAL)
outlier <- sample(1:df$n_subs, df$n_subs*0.1, replace=FALSE)
lt_1[outlier] <- (abs(lt_1[outlier]) + 3*SD_NORMAL) * sign(lt_1[outlier])
lt_2 <- rnorm(n = df$n_subs,
mean = M_NORMAL + df$true_effect, sd = SD_NORMAL)
outlier2 <- sample(1:df$n_subs, df$n_subs*0.1, replace=FALSE)
lt_2[outlier2] <- (abs(lt_2[outlier2]) + 3*SD_NORMAL) * sign(lt_2[outlier2])
# 90% of distribution is rnorm as above
# 10% of distribtion is outliers - 4x the standard deviation, mean of zero (i.e, 2 outliers)
}
# transform
# trim outliers
if (df$trim_before_transform == "trim first") {
if (df$trim_outliers == "trimmed") {
m_lt <- mean(c(lt_1, lt_2))
sd_lt <- sd(c(lt_1, lt_2))
lt_1 <- lt_1[abs(lt_1) < m_lt + OUTLIER_PARAMETER * sd_lt]
lt_2 <- lt_2[abs(lt_2) < m_lt + OUTLIER_PARAMETER * sd_lt]
}
if (df$log_transform == "log transform") {
lt_1 <- log(lt_1)
lt_2 <- log(lt_2)
}
if (df$log_transform == "untransformed") {
lt_1 <- lt_1
lt_2 <- lt_2
}
} else {
if (df$trim_before_transform == "transform first") {
if (df$log_transform == "log transform") {
lt_1 <- log(lt_1)
lt_2 <- log(lt_2)
}
if (df$trim_outliers == "trimmed") {
m_lt <- mean(c(lt_1, lt_2))
sd_lt <- sd(c(lt_1, lt_2))
lt_1 <- lt_1[abs(lt_1) < m_lt + OUTLIER_PARAMETER * sd_lt]
lt_2 <- lt_2[abs(lt_2) < m_lt + OUTLIER_PARAMETER * sd_lt]
}
} else if (df$trim_before_transform == "untransformed") {
if (df$trim_outlier == "trimmed") {
m_lt <- mean(c(lt_1, lt_2))
sd_lt <- sd(c(lt_1, lt_2))
lt_1 <- lt_1[abs(lt_1) < m_lt + OUTLIER_PARAMETER * sd_lt]
lt_2 <- lt_2[abs(lt_2) < m_lt + OUTLIER_PARAMETER * sd_lt]
}
}
}
t_test <- t.test(lt_1, lt_2, paired = FALSE)
df$n_included <- length(c(lt_1, lt_2))
df$p_val <- t_test$p.value
return(df)
}