#### 模拟数据:假设因变量y服从伽马分布
set.seed(2389)
n = 50  #观察值的次数
x1 = rgamma(n, 2, 3)   
x2 = rgamma(n, 5, 3)    
x3 = rbinom(n, 1, 0.2)  
x4 = rbinom(n, 1, 0.4) 
b0 = 7
b1 = 0.8
b2 = -0.6
b3 = 0.5
b4 = -0.2
b5 = -0.35  #真实参数值
eta = b0 + b1 * x1 + b2 * x2 + b3 * x3 + b4 * x4 + b5 * x2 * x3  #线性预测项
mu = exp(eta)  #因变量的均值
y = gamlss.dist::rGA(n, mu = mu, sigma = 0.5)  #因变量的模拟值
dt = data.frame(y, x1, x2, x3, x4)  #模拟的数据集
options(digits = 4)
mod1 = glm(y ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian)
mod2 = glm(log(y) ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian)
mod3 = glm(y ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian(link = log))
par(mfrow = c(1, 3))
plot(y, fitted(mod1), ylab = "mod1的拟合值")
abline(0, 1)
plot(log(y), fitted(mod2), ylab = "mod2的拟合值")
abline(0, 1)
plot(log(y), log(fitted(mod3)), ylab = "log(mod3的拟合值)")
abline(0, 1)

comp=data.frame(cbind(estimate=c(cbind(summary(mod2)$coef[,1])),real=c(b0,b1,b2,b3,b4,b5)))
pander::pander(comp)
estimate real
6.999 7
0.6565 0.8
-0.6213 -0.6
0.473 0.5
-0.318 -0.2
-0.3109 -0.35

模型mod2的参数估计值如下:

  Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.999 0.2458 28.48 5.523e-30
x1 0.6565 0.1617 4.06 0.0001982
x2 -0.6213 0.1045 -5.946 4.049e-07
x3 0.473 0.5001 0.9457 0.3495
x4 -0.318 0.1777 -1.789 0.08048
x2:x3 -0.3109 0.2842 -1.094 0.2798

真实值\(b_0\)=7,\(b_1\)=0.8,\(b_2\)=-0.6,\(b_3\)=0.5 ,\(b_4\)=-0.2,\(b_5\)=-0.35
拟合值\(\hat{b}_0\)=6.9995,\(\hat{b}_1\)=0.6565 ,\(\hat{b}_2\)=-0.6213 ,\(\hat{b}_3\)=0.473 ,\(\hat{b}_4\)=-0.318 ,\(\hat{b}_5\)=-0.3109