#fを因子に直す
d2$f <- factor(d2$f)
head(d2)
## # A tibble: 6 x 4
## N y x f
## <dbl> <dbl> <dbl> <fct>
## 1 8 1 9.76 C
## 2 8 6 10.5 C
## 3 8 5 10.8 C
## 4 8 6 10.9 C
## 5 8 1 9.37 C
## 6 8 1 8.81 C
summary(d2)
## N y x f
## Min. :8 Min. :0.00 Min. : 7.660 C:50
## 1st Qu.:8 1st Qu.:3.00 1st Qu.: 9.338 T:50
## Median :8 Median :6.00 Median : 9.965
## Mean :8 Mean :5.08 Mean : 9.967
## 3rd Qu.:8 3rd Qu.:8.00 3rd Qu.:10.770
## Max. :8 Max. :8.00 Max. :12.440
#まず枠だけ描く
plot(d2$x, d2$y, type = "n",
xlab = "body_size", ylab = "seed_alive")
#施肥効果ごとに追加
#control
points(d2$x[d2$f == "C"],
d2$y[d2$f == "C"],
pch = 21)
#treatment
points(d2$x[d2$f == "T"],
d2$y[d2$f == "T"],
pch = 19)
#凡例
legend("topleft", legend = c("C", "T"), pch=c(21, 19))
#ロジスティック関数の描画
logistic <- function(z){
1 / (1 + exp(-z))
}
z <- seq(-6, 6, 0.1)
plot(z, logistic(z), type = "l")
#cbind(生存した数, 死亡した数)で2列100行のデータフレームを作る
#1列目が生存数, 2列目が死亡数
#0, 1のデータならcbindする必要はない
fit.xf <- glm(cbind(y, N - y) ~ x + f, data = d2, family = binomial)
#結果の確認
stargazer(fit.xf, type = "text", style = "all", ci = TRUE,
star.cutoffs = NA, omit.table.layout = 'n',
align = TRUE)
##
## =====================================
## Dependent variable:
## -------------------
## cbind(y, N - y)
## -------------------------------------
## x 1.952
## (1.680, 2.225)
## t = 14.059
## p = 0.000
## fT 2.022
## (1.568, 2.475)
## t = 8.740
## p = 0.000
## Constant -19.536
## (-22.307, -16.765)
## t = -13.818
## p = 0.000
## -------------------------------------
## Observations 100
## Log Likelihood -133.106
## Akaike Inf. Crit. 272.211
## Residual Deviance 123.034 (df = 97)
## Null Deviance 499.232 (df = 99)
## =====================================
#作図領域の定義
plot.data <- function(...) {
plot(
d2$x, d2$y,
pch = c(21, 16)[d2$f],
xlab = "x", ylab = "y",
...
)
}
#予測曲線の描画
xx <- seq(min(d2$x), max(d2$x), length = 50)
# control
# cのほうだけ描画, tはNAにして描かない
plot.data(col = c("#000000", NA)[d2$f])
ff <- factor("C", levels = c("C", "T"))
p <- predict(fit.xf, newdata = data.frame(x = xx, f = ff), type = "response")
lines(xx, p * 8, lwd = 3, col = "gray")
#treatment
#tのほうだけプロット, cはNA
plot.data(col = c(NA, "#000000")[d2$f])
ff <- factor("T", levels = c("C", "T"))
p <- predict(fit.xf, newdata = data.frame(x = xx, f = ff), type = "response")
lines(xx, p * 8, lwd = 3, col = "black")
library(MASS)
stepAIC(fit.xf)
## Start: AIC=272.21
## cbind(y, N - y) ~ x + f
##
## Df Deviance AIC
## <none> 123.03 272.21
## - f 1 217.17 364.35
## - x 1 490.58 637.76
##
## Call: glm(formula = cbind(y, N - y) ~ x + f, family = binomial, data = d2)
##
## Coefficients:
## (Intercept) x fT
## -19.536 1.952 2.022
##
## Degrees of Freedom: 99 Total (i.e. Null); 97 Residual
## Null Deviance: 499.2
## Residual Deviance: 123 AIC: 272.2
fit.xf.int <- glm(cbind(y, N - y) ~ x * f, family = binomial, data = d2)
stargazer(fit.xf, fit.xf.int, type = "text", style = "all", ci = TRUE,
star.cutoffs = NA, omit.table.layout = 'n',
align = TRUE)
##
## =============================================================
## Dependent variable:
## -------------------------------------
## cbind(y, N - y)
## (1) (2)
## -------------------------------------------------------------
## x 1.952 1.853
## (1.680, 2.225) (1.489, 2.216)
## t = 14.059 t = 9.983
## p = 0.000 p = 0.000
## fT 2.022 -0.064
## (1.568, 2.475) (-5.363, 5.235)
## t = 8.740 t = -0.024
## p = 0.000 p = 0.982
## x:fT 0.216
## (-0.333, 0.765)
## t = 0.772
## p = 0.440
## Constant -19.536 -18.523
## (-22.307, -16.765) (-22.220, -14.827)
## t = -13.818 t = -9.821
## p = 0.000 p = 0.000
## -------------------------------------------------------------
## Observations 100 100
## Log Likelihood -133.106 -132.805
## Akaike Inf. Crit. 272.211 273.611
## Residual Deviance 123.034 (df = 97) 122.433 (df = 96)
## Null Deviance (df = 99) 499.232 499.232
## =============================================================
xx <- seq(min(d2$x), max(d2$x), length = 50)
# control
# cのほうだけ描画, tはNAにして描かない
plot.data(col = c("#000000", NA)[d2$f])
ff <- factor("C", levels = c("C", "T"))
p <- predict(fit.xf.int, newdata = data.frame(x = xx, f = ff), type = "response")
lines(xx, p * 8, lwd = 3, col = "gray")
#tのほうだけプロット, cはNA
plot.data(col = c(NA, "#000000")[d2$f])
ff <- factor("T", levels = c("C", "T"))
p <- predict(fit.xf.int, newdata = data.frame(x = xx, f = ff), type = "response")
lines(xx, p * 8, lwd = 3, col = "black")
head(d3)
## # A tibble: 6 x 3
## y x A
## <dbl> <dbl> <dbl>
## 1 57 0.68 10.3
## 2 64 0.27 15.6
## 3 49 0.46 10
## 4 64 0.45 14.9
## 5 82 0.74 14
## 6 29 0.15 9.6
plot(d3$A, d3$y, xlab = "A", ylab = "Y")
密度\(= \frac{個体数(\lambda_i)}{面積(A_i)}\)
\(\lambda_i = A_i × 人口密度= A_iexp(\beta_1 + \beta_2x_i)\)
\(\lambda_i = exp(\beta_1 + \beta_2x_i+logA_i\)
これが線形予測子となる.
fit.offset <- glm(y ~ x, offset = log(A), family = poisson, data = d3)
summary(fit.offset)
##
## Call:
## glm(formula = y ~ x, family = poisson, data = d3, offset = log(A))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2566 -0.6532 0.0538 0.5717 2.0575
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.97308 0.04505 21.60 <2e-16 ***
## x 1.03827 0.07769 13.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 261.457 on 99 degrees of freedom
## Residual deviance: 81.608 on 98 degrees of freedom
## AIC: 650.34
##
## Number of Fisher Scoring iterations: 4
#係数の図示
library(coefplot)
## Loading required package: ggplot2
coefplot(fit.offset)
#標準正規分布
y <- seq(-5, 5, 0.1)
#パラメータは平均と標準偏差
plot(y, dnorm(y, mean = 0, sd = 1), type = "l")
#確率密度
pnorm(1.96, 0, 1) - pnorm(-1.96, 0, 1)
## [1] 0.9500042
#r=s=1
curve(dgamma(x, shape = 1, rate = 1), from = 0, to = 4)
#r=s=5
curve(dgamma(x, 5, 5), from = 0, to = 4)
#r=s=0.1
curve(dgamma(x, 0.1, 0.1), from = 0, to = 4)
head(d)
## x y
## 1 0.00100000 0.0008873584
## 2 0.01730612 0.0234652087
## 3 0.03361224 0.0698755633
## 4 0.04991837 0.0343402528
## 5 0.06622449 0.0265204047
## 6 0.08253061 0.1592148027
ggplot(d) +
aes(x = x, y = ..density..) +
geom_histogram() +
geom_density(fill = "lightblue", alpha = 0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fit.gamma <- glm(y ~ log(x), family = Gamma(link = "log"), data = d)
summary(fit.gamma)
##
## Call:
## glm(formula = y ~ log(x), family = Gamma(link = "log"), data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4470 -0.4909 -0.1515 0.2585 1.0679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.04032 0.11877 -8.759 1.61e-11 ***
## log(x) 0.68325 0.06838 9.992 2.60e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.3250858)
##
## Null deviance: 35.368 on 49 degrees of freedom
## Residual deviance: 17.251 on 48 degrees of freedom
## AIC: -110.87
##
## Number of Fisher Scoring iterations: 6
#推定結果の確認
d %>%
mutate(predict = predict(fit.gamma,
type = "response",
newdata = .)) %>%
ggplot() +
aes(x = predict, y = y) +
geom_point() +
geom_abline(slope = 1,
intercept = 0,
linetype = "dashed")