library(MASS)
# Define the simulation function
glm_count <- function(n, beta, iters){
beta_est = matrix(-1,iters,4)
for (t in 1:iters)
{
# generate data
x1 <- runif(n,0,1)
cov_mat <- matrix(c(1,0.3,0.3,1),nrow = 2,ncol = 2)
x23 <- mvrnorm(n,rep(0,2),cov_mat)
x2 <- x23[,1]
x3 <- x23[,2]
x <- rbind(rep(1,n),x1,x2,x3)
mu <- exp(beta %*% x)
y <- rep(-1,n)
for (i in 1:n)
{
y[i] <- rpois(1, mu[i])
}
# Store the results
beta_est[t,] <- glm(y ~ x1 + x2 + x3, family = poisson)$coefficients
}
# return
return(beta_est)
}
# Initiate the setting
n <- 500
beta <- c(-1,1,1,1)
# Start
beta_est <- glm_count(n, beta, iters = 10)
The Bias and RMSE of the parameter estimates are as follows:
# Calculate the Bias and RMSE
Bias <- colMeans(beta_est) - beta
dist <- beta_est - matrix(beta, nrow(beta_est), ncol = length(beta), byrow = T)
RMSE <- apply(dist,2,norm,'2')
# Print
cat("Bias:\n", Bias, '\n')
Bias:
-0.05306205 0.004322885 -0.006351053 0.03050441
cat("RMSE:\n", RMSE, '\n')
RMSE:
0.3258033 0.3204383 0.1323827 0.174261
library(nnet)
library(extraDistr)
# Define the simulation function
glm_nominal <- function(n, beta, iters){
beta_est = matrix(-1,iters,(nrow(beta)-1)*ncol(beta))
for (i in 1:iters){
# generate the data
x1 <- runif(n,0,1)
x2 <- rnorm(n,0,1)
x <- rbind(rep(1,n),x1,x2)
pi <- apply(exp(beta %*% x),2,function(x){x / sum(x)})
y <- apply(pi,2, function(x){rcat(1, prob = x)})
y <- factor(as.character(y),
levels = c('4','1','2','3'),
labels = c('category4','category1','category2','category3'))
# Store the results
beta_est[i,] <- summary(multinom(y ~ x1 + x2))$coefficients
}
# return
return(beta_est)
}
# Initiate the setting
beta1 <- c(-1,-1,1)
beta2 <- c(-1,-1,-1)
beta3 <- c(1,-1,1)
beta4 <- rep(0,3)
beta <- rbind(beta1, beta2, beta3, beta4)
# Start
beta_est <- glm_nominal(n = 500, beta, iters = 10)
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 491.266116
final value 490.213443
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 499.961662
final value 499.701329
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 504.874699
final value 503.915752
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 477.547388
final value 476.550571
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 498.945864
final value 498.709079
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 451.280932
final value 450.759075
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 491.938176
final value 490.150183
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 536.929615
final value 536.616682
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 480.642659
final value 479.724027
converged
# weights: 16 (9 variable)
initial value 693.147181
iter 10 value 487.815272
final value 485.508866
converged
The Bias and RMSE of the parameter estimates are as follows:
# Calculate the Bias and RMSE
beta_vec <- as.vector(beta[1:3,])
Bias <- colMeans(beta_est) - beta_vec
dist <- beta_est - matrix(beta_vec, nrow(beta_est), length(beta_vec), byrow = T)
RMSE <- as.vector(apply(dist,2,norm,'2'))
# print
cat("\014")
cat("Bias:\n")
Bias:
print(data.frame(beta1 = Bias[1:3], beta2 = Bias[4:6], beta3 = Bias[7:9]))
beta1 beta2 beta3
1 -0.11584195 -0.02726638 0.010007544
2 -0.02267826 0.08881272 0.048615939
3 0.05862205 -0.09126364 0.009007726
cat("\n")
cat("RMSE:\n")
RMSE:
print(data.frame(beta1 = RMSE[1:3], beta2 = RMSE[4:6], beta3 = RMSE[7:9]))
beta1 beta2 beta3
1 1.3264619 3.1356427 0.6980622
2 1.2548290 2.0378704 0.6572526
3 0.5514944 0.9489469 0.4309279
Considering keeping the structure of panel data, here I apply a three-dimensional array to store the generated data.
library(glmm)
# Define the simulation function
glmm_bernoulli <- function(n, T, beta, sig2, iters){
params_est <- matrix(0,iters,4)
for (i in 1:iters){
# Generate the data
x1 <- matrix(rbinom(n*T,1,prob = 0.4),T)
x2 <- matrix(rnorm(n*T,0,1),T)
x0 <- matrix(1,T,n)
u <- rnorm(n,0,sig2)
x <- array(c(x0,x1,x2), dim=c(T,n,3))
logit_pi <- apply(x, c(1,2), function(i){beta %*% i}) + array(matrix(u,n,T), dim = c(T,n))
pi <- exp(logit_pi) / (1 + exp(logit_pi))
y <- apply(pi,c(1,2), function(i){rbinom(1, 1, prob = i)})
sub <- as.factor(col(y))
data <- data.frame(y = as.vector(y), x1 = as.vector(x1), x2 = as.vector(x2), subject = sub)
# Fit GLMM model and estimate the parameters
# ptm <- proc.time()# record the time for each estimation
model_fit <- glmm(y ~ x1 + x2, random = list(y ~ 0 + subject),
varcomps.names = c("subject"), data = data,
family.glmm = bernoulli.glmm, m = 100, doPQL = TRUE)
# proc.time() - ptm
# Store the results
params_est[i, 1:3] <- coefficients(model_fit)
params_est[i, 4] <- varcomps(model_fit)
}
return(params_est)
}
# Initiate the setting
n <- 500
T <- 4
sig2 <- 1
beta <- c(-1,0.5,-0.5)
# Start
params_est <- glmm_bernoulli(n, T, beta, sig2, iters = 10)
The Bias and RMSE of the parameter estimates are as follows:
# Calculate the Bias and RMSE
params <- append(beta, sig2)
Bias <- colMeans(params_est) - params
dist <- params_est - matrix(params, nrow(params_est), length(params), byrow = T)
RMSE <- apply(dist,2,norm,'2')
# Print
cat("Bias:\n", Bias, '\n')
Bias:
0.1708627 -0.1025265 0.07979331 -0.7529994
cat("RMSE:\n", RMSE, '\n')
RMSE:
0.5719067 0.417621 0.3233055 2.423903
According to the given instructions, the insomnia data set of Example 3.3 can be downloaded online. Here, the value of response (1,2,3,4) represents different lengths of time to falling asleep of a subject (1 - “<20”, 2 - “20-30”, 3 - “30-60”, 4 - “>60”).
insomnia <- read.table("insomnia.txt", header = T)
insomnia
clm and clmm functionsHere I use the clm and clmm function of
ordinal package for replication of the results.
# import libraries
library(ordinal)
# estimation with the random intercept (clmm function)
model_clmm <- clmm(factor(response) ~ (1|case) + treat*occasion, data = insomnia, link = "logit")
# print
cat('Cumulative Logit Model with the Random Intercept\nbeta:',model_clmm$beta,'\n\n')
Cumulative Logit Model with the Random Intercept
beta: -0.04830713 -1.571161 -1.057957
# estimation without the random intercept (clm function)
model_clm <- clm(factor(response) ~ treat*occasion, data = insomnia, link = "logit")
# print
cat('Cumulative Logit Model without the Random Intercept\nbeta:',model_clm$beta,'\n\n')
Cumulative Logit Model without the Random Intercept
beta: -0.03361002 -1.038076 -0.7077589
Here are the detailed summaries of these two models. The results showed that including the random effect can improve the performance. However, according to the std, the random effect is not significantly different from 0 at the 0.05 level.
# Summary #1
summary(model_clmm)
Cumulative Link Mixed Model fitted with the Laplace approximation
formula: factor(response) ~ (1 | case) + treat * occasion
data: insomnia
Random effects:
Groups Name Variance Std.Dev.
case (Intercept) 3.043 1.744
Number of groups: case 239
Coefficients:
Estimate Std. Error z value Pr(>|z|)
treat -0.04831 0.35018 -0.138 0.89028
occasion -1.57116 0.28475 -5.518 3.43e-08 ***
treat:occasion -1.05796 0.38006 -2.784 0.00537 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Threshold coefficients:
Estimate Std. Error z value
1|2 -3.3986 0.3517 -9.664
2|3 -1.4451 0.2799 -5.163
3|4 0.5457 0.2587 2.109
# Summary #2
summary(model_clm)
formula: factor(response) ~ treat * occasion
data: insomnia
Coefficients:
Estimate Std. Error z value Pr(>|z|)
treat -0.03361 0.23775 -0.141 0.8876
occasion -1.03808 0.24099 -4.308 1.65e-05 ***
treat:occasion -0.70776 0.33394 -2.119 0.0341 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Threshold coefficients:
Estimate Std. Error z value
1|2 -2.2671 0.2048 -11.071
2|3 -0.9515 0.1812 -5.251
3|4 0.3517 0.1746 2.014