# http://hosho.ees.hokudai.ac.jp/~kubo/ce/IwanamiBook.html
d<-read.csv("~/Desktop/data_modeling_tutorial/chap3/data3a.csv")
str(d)
## 'data.frame': 100 obs. of 3 variables:
## $ y: int 6 6 6 12 10 4 9 9 9 11 ...
## $ x: num 8.31 9.44 9.5 9.07 10.16 ...
## $ f: Factor w/ 2 levels "C","T": 1 1 1 1 1 1 1 1 1 1 ...
summary(d)
## y x f
## Min. : 2.00 Min. : 7.19 C:50
## 1st Qu.: 6.00 1st Qu.: 9.43 T:50
## Median : 8.00 Median :10.15
## Mean : 7.83 Mean :10.09
## 3rd Qu.:10.00 3rd Qu.:10.69
## Max. :15.00 Max. :12.40
# データ全体を眺める
plot(d[,c(2,1)],pch=c(21,19)[d$f])
legend("topleft",legend=c("C","T"),pch=c(21,19))
# ggplot2ライブラリでの表示
# library('ggplot2')
# qplot(data=d,y=y,x=x,col=f)
# 施肥処理でグループ分けした箱ひげ図
plot(d[,c(3,1)])
# 施肥処理でグループ分けした箱ひげ図(ggplot2)
# qplot(data=d,y=y,x=f,col=f,geom='boxplot')
# ポアソン回帰 説明変数 数量型
fit.x <- glm(y~x,data=d,family=poisson)
print(fit.x) #最尤推定値 概要の表示
##
## Call: glm(formula = y ~ x, family = poisson, data = d)
##
## Coefficients:
## (Intercept) x
## 1.2917 0.0757
##
## Degrees of Freedom: 99 Total (i.e. Null); 98 Residual
## Null Deviance: 89.5
## Residual Deviance: 85 AIC: 475
summary(fit.x) # 詳細の表示
##
## Call:
## glm(formula = y ~ x, family = poisson, data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.368 -0.735 -0.177 0.699 2.376
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2917 0.3637 3.55 0.00038 ***
## x 0.0757 0.0356 2.13 0.03358 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 89.507 on 99 degrees of freedom
## Residual deviance: 84.993 on 98 degrees of freedom
## AIC: 474.8
##
## Number of Fisher Scoring iterations: 4
logLik(fit.x) # 最大対数尤度 あてはまりの良さ 大きい値であるほうが良い。
## 'log Lik.' -235.4 (df=2)
# ポアソン回帰モデルと実測値の図示
plot(d[,c(2,1)],pch=c(21,19)[d$f])
legend("topleft",legend=c("C","T"),pch=c(21,19))
xx <- seq(min(d$x),max(d$x),length=100)
lines(xx,exp(1.29+0.0757*xx),lwd=2)
# ポアソン回帰モデルと実測値の図示(predict関数)
yy <- predict(fit.x,newdata=data.frame(x=xx),type='response')
plot(d[,c(2,1)],pch=c(21,19)[d$f])
legend("topleft",legend=c("C","T"),pch=c(21,19))
lines(xx,yy,lwd=2)
# ポアソン回帰 説明変数 因子型
fit.f <- glm(y~f,data=d,family=poisson)
summary(fit.f)
##
## Call:
## glm(formula = y ~ f, family = poisson, data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4751 -0.6994 0.0426 0.7247 2.2520
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.0516 0.0507 40.46 <2e-16 ***
## fT 0.0128 0.0715 0.18 0.86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 89.507 on 99 degrees of freedom
## Residual deviance: 89.475 on 98 degrees of freedom
## AIC: 479.3
##
## Number of Fisher Scoring iterations: 4
logLik(fit.f) # 最大対数尤度
## 'log Lik.' -237.6 (df=2)
# ポアソン回帰 説明変数 数量型+因子型
fit.all <- glm(y~x+f,data=d,family=poisson)
summary(fit.all)
##
## Call:
## glm(formula = y ~ x + f, family = poisson, data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.398 -0.734 -0.202 0.679 2.432
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2631 0.3696 3.42 0.00063 ***
## x 0.0801 0.0370 2.16 0.03062 *
## fT -0.0320 0.0744 -0.43 0.66703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 89.507 on 99 degrees of freedom
## Residual deviance: 84.808 on 97 degrees of freedom
## AIC: 476.6
##
## Number of Fisher Scoring iterations: 4
logLik(fit.all) # 最大対数尤度
## 'log Lik.' -235.3 (df=3)
応答変数\( y \)を\( \log y \)のように変数変換して直線回帰することと、ポアソン回帰はまったく別ものであることに注意してください。試してみればわかりますが、両者の推定結果は一致しません。
fit.lm <- lm(log(y)~log(x),data=d)
summary(fit.lm)
##
## Call:
## lm(formula = log(y) ~ log(x), data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2416 -0.2258 -0.0174 0.2817 0.7469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.246 0.826 -0.30 0.7663
## log(x) 0.972 0.358 2.72 0.0078 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.36 on 98 degrees of freedom
## Multiple R-squared: 0.07, Adjusted R-squared: 0.0606
## F-statistic: 7.38 on 1 and 98 DF, p-value: 0.00779
xx <- seq(-5,25,length=100)
yy.lm <- predict(fit.lm,newdata=data.frame(x=xx),type='response')
yy.x <- predict(fit.x,newdata=data.frame(x=xx),type='response')
plot(d[,c(2,1)],pch=c(21,19)[d$f])
legend("topleft",legend=c("C","T"),pch=c(21,19))
lines(xx,exp(yy.lm),lwd=2,col=2)
lines(xx,yy.x,lwd=2,col=3)
legend("topright",legend=c("fit.lm","fit.x"),lwd=2.5,col=c(2,3))
# 範囲を広げて表示
plot(d[,c(2,1)],pch=c(21,19)[d$f],xlim=c(-1,25),ylim=c(-1,25))
legend("topleft",legend=c("C","T"),pch=c(21,19))
lines(xx,exp(yy.lm),lwd=2,col=2)
lines(xx,yy.x,lwd=2,col=3)
legend("topright",legend=c("fit.lm","fit.x"),lwd=2.5,col=c(2,3))