本章ではデータとして測定しなかった値(個体差)を組み込んだGLM一般化線形混合モデル(GLMM)を扱う
head(d_07)
## # A tibble: 6 x 4
## N y x id
## <dbl> <dbl> <dbl> <dbl>
## 1 8 0 2 1
## 2 8 1 2 2
## 3 8 2 2 3
## 4 8 4 2 4
## 5 8 1 2 5
## 6 8 0 2 6
summary(d_07)
## N y x id
## Min. :8 Min. :0.00 Min. :2 Min. : 1.00
## 1st Qu.:8 1st Qu.:1.00 1st Qu.:3 1st Qu.: 25.75
## Median :8 Median :3.00 Median :4 Median : 50.50
## Mean :8 Mean :3.81 Mean :4 Mean : 50.50
## 3rd Qu.:8 3rd Qu.:7.00 3rd Qu.:5 3rd Qu.: 75.25
## Max. :8 Max. :8.00 Max. :6 Max. :100.00
#データ点を少しずらして表示
plot(jitter(d_07$x, 0.5), d_07$y,
xlab = "leaves", ylab = "seed_alive")
fit.glm <- glm(cbind(y, N - y) ~ x, data = d_07, family = binomial)
beta.glm <- fit.glm$coefficients
summary(fit.glm)
##
## Call:
## glm(formula = cbind(y, N - y) ~ x, family = binomial, data = d_07)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.4736 -2.1182 -0.5505 2.3097 4.0966
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1487 0.2372 -9.057 <2e-16 ***
## x 0.5104 0.0556 9.179 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 607.42 on 99 degrees of freedom
## Residual deviance: 513.84 on 98 degrees of freedom
## AIC: 649.61
##
## Number of Fisher Scoring iterations: 4
v.x <- 0:8
xx <- seq(1, 7, 0.1)
logistic <- function(z) 1 / (1 + exp(-z))
plot.d <- function(
d, w = 0.1, col = "#199a60",
xlab = "x", ylab = "y",
axes = TRUE,
ylim = range(d_07$y),
...) {
plot(
d_07$x, d_07$y,
type = "n", # no plot
xlab = xlab, ylab = ylab,
axes = axes,
ylim = ylim
)
for (x in sort(unique(d_07$x))) {
dsub <- d[d_07$x == x, ]
for (ns in sort(unique(dsub$y))) {
n <- sum(dsub$y == ns)
sx <- 1:n
sdx <- ifelse(n == 1, 1, sd(sx))
points(
x + (sx - mean(sx)) / sdx * w,
rep(ns, n),
col = col,
...
)
}
}
}
par.glmm <- function() par(mar = c(1.5, 1.5, 0.1, 0.1), mgp = c(1.5, 0.5, 0), cex = 1.0)
col.true = "#398ac2"
line.true <- function(col = col.true) lines(xx, logistic(-4 + 1 * xx) * max(v.x), lwd = 2, lty = 4, col = col)
par.glmm()
plot.d(d_07)
line.true()
#葉数と生存種子数の関係
col.glm <- "#de4545"
par.glmm()
plot.d(d_07)
line.true()
lines(xx, logistic(beta.glm[1] + beta.glm[2] * xx) * max(v.x), lwd = 3, col = col.glm)
#x=4だけ抽出
d4_07 <- subset(d_07, x == 4)
table(d4_07$y)
##
## 0 1 2 3 4 5 6 7 8
## 3 1 4 2 1 1 2 3 3
d4_07 %>%
ggplot() +
aes(x = y) +
geom_bar(alpha = 0, col = "blue")
mean(d4_07$y)
## [1] 4.05
var(d4_07$y)
## [1] 8.365789
#それぞれsd=1.0, 1.5, 3.0
ggplot(data = NULL) +
aes(xmin = -6, xmax = 6) +
stat_function(
fun = dnorm,
args = list(sd = 1.0)) +
stat_function(
fun = dnorm,
args = list(sd = 1.5)) +
stat_function(
fun = dnorm,
args = list(sd = 3.0))
各個体分説明するにはフルモデルになってしまうから
個体ごとの尤度L_iの式の中で, r_iを積分する
こうするとr_iが消えるL_i = \int_{-∞}^{∞}p(y_i|\beta_1, \beta_2, r_i)p(r_i|s)dr_i
いろいろなr_iの値における尤度を評価し, その期待値を算出
二項分布p(y_i|\beta_1, \beta_2, r_i)と正規分布p(r_i|s)をかけてr_iで積分する操作はこの2種類の分布を混ぜていることになる
無限個の二項分布を混ぜることで, 過分散な確率分布を生み出す
#パッケージはなければインストール
library(glmmML)
#clusterによってrが個体ごとに独立なパラメータであることを指定(ここではidを使う)
fit.glmm <- glmmML(cbind(y, N - y) ~ x, data = d_07, family = binomial,
cluster = id)
summary(fit.glmm)
##
## Call: glmmML(formula = cbind(y, N - y) ~ x, family = binomial, data = d_07, cluster = id)
##
##
## coef se(coef) z Pr(>|z|)
## (Intercept) -4.190 0.8777 -4.774 1.81e-06
## x 1.005 0.2075 4.843 1.28e-06
##
## Scale parameter in mixing distribution: 2.408 gaussian
## Std. Error: 0.2202
##
## LR p-value for H_0: sigma = 0: 2.136e-55
##
## Residual deviance: 269.4 on 97 degrees of freedom AIC: 275.4
#予測結果の図示
beta.glmm <- fit.glmm$coefficients
col.glm <- "#de4545"
par.glmm()
plot.d(d_07)
line.true()
lines(xx, logistic(beta.glmm[1] + beta.glmm[2] * xx) * max(v.x), lwd = 3, col = col.glm)
-二項分布以外にもポアソン分布や負の二項分布, 正規分布, ガンマ分布などでも使える
#ポアソン分布の場合
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
fit.lme <- glmer(cbind(y, N - y) ~ x + (1 | id), family = binomial, data = d_07)
summary(fit.lme)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(y, N - y) ~ x + (1 | id)
## Data: d_07
##
## AIC BIC logLik deviance df.resid
## 407.2 415.0 -200.6 401.2 97
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.88743 -0.35799 0.01639 0.29664 0.82522
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 5.798 2.408
## Number of obs: 100, groups: id, 100
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.1893 0.8977 -4.667 3.06e-06 ***
## x 1.0047 0.2124 4.729 2.25e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## x -0.951