Manual simulating data for linear mix model

Introduction

[Reference] (https://stats.stackexchange.com/questions/11233/how-to-simulate-data-based-on-a-linear-mixed-model-fit-object-in-r)

Manual simulating data for linear mix model with the random intercepts

library(nlme)
fm2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1) 

simfun <- function(n) {
  # n is the number of subjects, total rows will be 4*n
  # n=10
  # sig.b0 is the st. dev. of the random intercepts
  # it might be easier to just copy from the output
  sig.b0 <- exp(unlist(fm2$modelStruct$reStruct))*fm2$sigma
  
  b0 <- rnorm(n, 0, sig.b0)  #random intercepts
  
  sex <- rbinom(n, 1, 0.5)  # assign sex at random
  
  fe <- fixef(fm2)  #fixed coefficients
  
  my.df <- data.frame( Subject=as.factor(rep(1:n, each=4)), 
                       int = fe[1] + rep(b0, each=4), 
                       Sex=rep(sex,each=4), age=rep( c(8,10,12,14), n ) )       #create sex and age
  
  my.df$distance <- my.df$int + fe[2] * my.df$age + 
    fe[3]*my.df$Sex + rnorm(n*4, 0, fm2$sigma) #generate distance
  
  my.df$int <- NULL
  my.df$Sex <- factor( my.df$Sex, levels=0:1,
                       labels=c('Male','Female') ) #label sex
  my.df
}

Orthodont2 <- simfun(10)

Compare simulated data and original data

summary(lme(distance ~ age +  (Sex), data = Orthodont, random = ~ 1|Subject))

## Linear mixed-effects model fit by REML
##   Data: Orthodont 
##        AIC      BIC    logLik
##   447.5125 460.7823 -218.7563
## 
## Random effects:
##  Formula: ~1 | Subject
##         (Intercept) Residual
## StdDev:    1.807425 1.431592
## 
## Fixed effects:  distance ~ age + (Sex) 
##                 Value Std.Error DF   t-value p-value
## (Intercept) 17.706713 0.8339225 80 21.233044  0.0000
## age          0.660185 0.0616059 80 10.716263  0.0000
## SexFemale   -2.321023 0.7614168 25 -3.048294  0.0054
##  Correlation: 
##           (Intr) age   
## age       -0.813       
## SexFemale -0.372  0.000
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.74889609 -0.55034466 -0.02516628  0.45341781  3.65746539 
## 
## Number of Observations: 108
## Number of Groups: 27

summary(lme(distance ~ age +  (Sex), data = Orthodont2, random = ~ 1|Subject ))

## Linear mixed-effects model fit by REML
##   Data: Orthodont2 
##        AIC      BIC   logLik
##   163.5406 171.5952 -76.7703
## 
## Random effects:
##  Formula: ~1 | Subject
##         (Intercept) Residual
## StdDev:    1.616049 1.346149
## 
## Fixed effects:  distance ~ age + (Sex) 
##                 Value Std.Error DF   t-value p-value
## (Intercept) 16.935791 1.3647314 29 12.409615  0.0000
## age          0.703477 0.0951871 29  7.390467  0.0000
## SexFemale   -2.097518 1.1300153  8 -1.856185  0.1005
##  Correlation: 
##           (Intr) age   
## age       -0.767       
## SexFemale -0.497  0.000
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.6184715 -0.6142311 -0.1655648  0.3404100  1.7717551 
## 
## Number of Observations: 40
## Number of Groups: 10

Manual bootstrap sampling

###################

# set the number of participants in your big simulated study

nits=300 
# get the unique participants in the small study

sub=unique(Orthodont$Subject)
# sample the unique participants randomly with replacement

subs=sample(sub,nits,rep=T)
# make an empty data frame

df=Orthodont[-(1:dim(Orthodont)[1]),]  #empty data table
# loop through the sample size and bind it together.

for( i in 1:nits) {  
  df=rbind(df,Orthodont[which(Orthodont$Subject==subs[i]),])
}

#  nrow(df)
# [1] 1200

Using simulate function to sample automatically (and aggregate the statistics )

# simple simulation based on multi variates distribution 
# general linear
x <- 1:5
mod1 <- lm(c(1:3, 7, 6) ~ x)
S2 <- simulate(mod1, nsim = 200, seed = 123)
rowMeans(S2)

##         1         2         3         4         5 
## 0.9084844 2.2925333 3.7416661 5.3075672 6.8462921

fitted(mod1)

##   1   2   3   4   5 
## 0.8 2.3 3.8 5.3 6.8

# generized linear
x2 <- sort(runif(100))
yb2 <- rbinom(100, prob = plogis(2*(x2-1)), size = 1)
yb2 <- factor(1 + yb2, labels = c("failure", "success"))
modb2 <- glm(yb2 ~ x2, family = binomial)
S4 <- simulate(modb2, nsim = 2)

Manual simulating data for linear mix model

2023-05-12

Introduction