[Reference] (https://stats.stackexchange.com/questions/11233/how-to-simulate-data-based-on-a-linear-mixed-model-fit-object-in-r)
Manual simulating data for linear mix model with the random intercepts
library(nlme)
fm2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1)
simfun <- function(n) {
# n is the number of subjects, total rows will be 4*n
# n=10
# sig.b0 is the st. dev. of the random intercepts
# it might be easier to just copy from the output
sig.b0 <- exp(unlist(fm2$modelStruct$reStruct))*fm2$sigma
b0 <- rnorm(n, 0, sig.b0) #random intercepts
sex <- rbinom(n, 1, 0.5) # assign sex at random
fe <- fixef(fm2) #fixed coefficients
my.df <- data.frame( Subject=as.factor(rep(1:n, each=4)),
int = fe[1] + rep(b0, each=4),
Sex=rep(sex,each=4), age=rep( c(8,10,12,14), n ) ) #create sex and age
my.df$distance <- my.df$int + fe[2] * my.df$age +
fe[3]*my.df$Sex + rnorm(n*4, 0, fm2$sigma) #generate distance
my.df$int <- NULL
my.df$Sex <- factor( my.df$Sex, levels=0:1,
labels=c('Male','Female') ) #label sex
my.df
}
Orthodont2 <- simfun(10)
Compare simulated data and original data
summary(lme(distance ~ age + (Sex), data = Orthodont, random = ~ 1|Subject))
## Linear mixed-effects model fit by REML
## Data: Orthodont
## AIC BIC logLik
## 447.5125 460.7823 -218.7563
##
## Random effects:
## Formula: ~1 | Subject
## (Intercept) Residual
## StdDev: 1.807425 1.431592
##
## Fixed effects: distance ~ age + (Sex)
## Value Std.Error DF t-value p-value
## (Intercept) 17.706713 0.8339225 80 21.233044 0.0000
## age 0.660185 0.0616059 80 10.716263 0.0000
## SexFemale -2.321023 0.7614168 25 -3.048294 0.0054
## Correlation:
## (Intr) age
## age -0.813
## SexFemale -0.372 0.000
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.74889609 -0.55034466 -0.02516628 0.45341781 3.65746539
##
## Number of Observations: 108
## Number of Groups: 27
summary(lme(distance ~ age + (Sex), data = Orthodont2, random = ~ 1|Subject ))
## Linear mixed-effects model fit by REML
## Data: Orthodont2
## AIC BIC logLik
## 163.5406 171.5952 -76.7703
##
## Random effects:
## Formula: ~1 | Subject
## (Intercept) Residual
## StdDev: 1.616049 1.346149
##
## Fixed effects: distance ~ age + (Sex)
## Value Std.Error DF t-value p-value
## (Intercept) 16.935791 1.3647314 29 12.409615 0.0000
## age 0.703477 0.0951871 29 7.390467 0.0000
## SexFemale -2.097518 1.1300153 8 -1.856185 0.1005
## Correlation:
## (Intr) age
## age -0.767
## SexFemale -0.497 0.000
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.6184715 -0.6142311 -0.1655648 0.3404100 1.7717551
##
## Number of Observations: 40
## Number of Groups: 10
Manual bootstrap sampling
###################
# set the number of participants in your big simulated study
nits=300
# get the unique participants in the small study
sub=unique(Orthodont$Subject)
# sample the unique participants randomly with replacement
subs=sample(sub,nits,rep=T)
# make an empty data frame
df=Orthodont[-(1:dim(Orthodont)[1]),] #empty data table
# loop through the sample size and bind it together.
for( i in 1:nits) {
df=rbind(df,Orthodont[which(Orthodont$Subject==subs[i]),])
}
# nrow(df)
# [1] 1200
Using simulate function to sample automatically (and aggregate the statistics )
# simple simulation based on multi variates distribution
# general linear
x <- 1:5
mod1 <- lm(c(1:3, 7, 6) ~ x)
S2 <- simulate(mod1, nsim = 200, seed = 123)
rowMeans(S2)
## 1 2 3 4 5
## 0.9084844 2.2925333 3.7416661 5.3075672 6.8462921
fitted(mod1)
## 1 2 3 4 5
## 0.8 2.3 3.8 5.3 6.8
# generized linear
x2 <- sort(runif(100))
yb2 <- rbinom(100, prob = plogis(2*(x2-1)), size = 1)
yb2 <- factor(1 + yb2, labels = c("failure", "success"))
modb2 <- glm(yb2 ~ x2, family = binomial)
S4 <- simulate(modb2, nsim = 2)