#### 模拟数据:假设因变量y服从伽马分布
set.seed(2389)
n = 50 #观察值的次数
x1 = rgamma(n, 2, 3)
x2 = rgamma(n, 5, 3)
x3 = rbinom(n, 1, 0.2)
x4 = rbinom(n, 1, 0.4)
b0 = 7
b1 = 0.8
b2 = -0.6
b3 = 0.5
b4 = -0.2
b5 = -0.35 #真实参数值
eta = b0 + b1 * x1 + b2 * x2 + b3 * x3 + b4 * x4 + b5 * x2 * x3 #线性预测项
mu = exp(eta) #因变量的均值
y = gamlss.dist::rGA(n, mu = mu, sigma = 0.5) #因变量的模拟值
dt = data.frame(y, x1, x2, x3, x4) #模拟的数据集
options(digits = 4)
mod1 = glm(y ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian)
mod2 = glm(log(y) ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian)
mod3 = glm(y ~ x1 + x2 + x3 + x4 + x2 * x3, data = dt, family = gaussian(link = log))
par(mfrow = c(1, 3))
plot(y, fitted(mod1), ylab = "mod1的拟合值")
abline(0, 1)
plot(log(y), fitted(mod2), ylab = "mod2的拟合值")
abline(0, 1)
plot(log(y), log(fitted(mod3)), ylab = "log(mod3的拟合值)")
abline(0, 1)
comp=data.frame(cbind(estimate=c(cbind(summary(mod2)$coef[,1])),real=c(b0,b1,b2,b3,b4,b5)))
pander::pander(comp)
| estimate | real |
|---|---|
| 6.999 | 7 |
| 0.6565 | 0.8 |
| -0.6213 | -0.6 |
| 0.473 | 0.5 |
| -0.318 | -0.2 |
| -0.3109 | -0.35 |
模型mod2的参数估计值如下:
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 6.999 | 0.2458 | 28.48 | 5.523e-30 |
| x1 | 0.6565 | 0.1617 | 4.06 | 0.0001982 |
| x2 | -0.6213 | 0.1045 | -5.946 | 4.049e-07 |
| x3 | 0.473 | 0.5001 | 0.9457 | 0.3495 |
| x4 | -0.318 | 0.1777 | -1.789 | 0.08048 |
| x2:x3 | -0.3109 | 0.2842 | -1.094 | 0.2798 |
真实值\(b_0\)=7,\(b_1\)=0.8,\(b_2\)=-0.6,\(b_3\)=0.5 ,\(b_4\)=-0.2,\(b_5\)=-0.35
拟合值\(\hat{b}_0\)=6.9995,\(\hat{b}_1\)=0.6565 ,\(\hat{b}_2\)=-0.6213 ,\(\hat{b}_3\)=0.473 ,\(\hat{b}_4\)=-0.318 ,\(\hat{b}_5\)=-0.3109