Monte Carlo simulation of the effect of an unbalanced dichotomous confounder on the Type I error rate

This simulation compares the Type I (false positive) error rate for when the treatment effect is confounded by an unbalanced dichotomous factor under three situations:

Code

assign2 <- function(N, p0, p1) {
    # Generate an N × 2 matrix of dichotomous tmt group and gender assignments
    #   N: sample size (ASSSUMED to be even)
    #   p0: proportion of tmt group 0 to be female
    #   p1: proportion of tmt group 1 to be female
    group.size <- N/2
    n.F0 <- round(p0*group.size)
    n.M0 <- group.size - n.F0
    n.F1 <- round(p1*group.size)
    n.M1 <- group.size - n.F1
    F0 <- matrix(c(0,0), n.F0, 2, byrow=TRUE)
    M0 <- matrix(c(0,1), n.M0, 2, byrow=TRUE)
    F1 <- matrix(c(1,0), n.F1, 2, byrow=TRUE)
    M1 <- matrix(c(1,1), n.M1, 2, byrow=TRUE)
    rbind(F0, M0, F1, M1)
}

gen.sim.data <- function(n.sims, N, p0, p1, delta, sd.delta=.1) {
    # Generate n.sims simulated experiments each comprising N subjects.
    # Subjects are assigned to 2 equal-sized treatment groups, labeled 0 and 1.
    # Proportion p0 of group 0 and p1 of group 1 are designated female.
    # Subjects are initially assigned a random outcome y from a Normal(0, 1)
    # distribution. Then, a random amount from a normal distribution with mean
    # delta and SD sd.delta is added to each "male" subject.
    if (N %% 2 != 0) stop("N must be even")
    # Generate assignments to two treatment groups and two "genders"
    X12 <- assign2(N, p0, p1)
    replicate(n.sims, {
        # Generate y
        y <- rnorm(N) 
        colnames(X12) <- c("group", "sex")
        # Add a random amount with mean=delta SD=sd.delta to the males
        male <- X12[, "sex"] == 1
        y[male] <- y[male] + rnorm(length(y[male]), delta, sd.delta)
        # Return the data as an array
        cbind(y, X12)
    })
}

# Function to compute p-value of treatment effect without adjustemdnt for sex
lm.sex.0.fun <- function(x) summary(lm(y ~ group, data=as.data.frame(x)))$coefficients["group", "Pr(>|t|)"]

# Function to compute p-value of treatment effect after adjustemdnt for sex
lm.sex.1.fun <- function(x) summary(lm(y ~ sex + group, data=as.data.frame(x)))$coefficients["group", "Pr(>|t|)"]

set.seed(1123)
sim.data <- gen.sim.data(1e4, 100, .4, .6, 1)

# Apply the above functions to each simulated data set
pvals.sex.0 <- apply(sim.data, 3, lm.sex.0.fun)
pvals.sex.1 <- apply(sim.data, 3, lm.sex.1.fun)

# Compute proportion of treatment effect p-values < .05 without adjustment for sex
mean(pvals.sex.0 < .05)

## [1] 0.1123

# Compute proportion of treatment effect p-values < .05 after djustment for sex
mean(pvals.sex.1 < .05)

## [1] 0.0467

# Put the p-values generated above into the columns of a matrix
pvals.sex <- cbind(pvals.sex.0, pvals.sex.1)

# For each simulated experiment, select the minimum of the p-values from the
# results adjusted for 0 and 1 covariate
p.vals.sex.01 <- apply(pvals.sex, 1, min)

# Compute proportion of treatment effect p-values < .05 when the analyst can
# choose the smallest of the 0- and 1-covariate-adjusted analyses
mean(p.vals.sex.01 < .05)

## [1] 0.1367