http://www.ats.ucla.edu/stat/r/examples/mlm_imm/immch4.htm を参考にした。
演習用に作ったデータなのでこの結果を用いて実践の改善を図ろうとしないこと
変数 | 内容 |
---|---|
schid | 学校番号 |
stuid | 生徒番号 |
ses | 生徒の家庭のSES |
meanses | 学校の平均SES |
homework | 1週間あたり宿題時間 |
white | 人種:白人を1,それ以外を0 |
parented | 親の学歴 |
public | 公立私立:公立を1 |
ratio | PT比 |
percmin | (不明) |
math | 数学得点 |
sex | 性別 |
race | 民族 |
sctype | (学校のタイプ?) |
cstr | (不明) |
scsize | 学校規模 |
urban | 都市 |
region | 地域 |
# データ読み込み
dat <- read.csv("dat.csv")
# 学校別のサブセット
s26537 <- subset(dat, schid == 26537)
s47583 <- subset(dat, schid == 47583)
s54344 <- subset(dat, schid == 54344)
# 学校別に色分けした散布図
plot(s26537$homework, s26537$math, pch = 16, col = 2, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
par(new = T)
plot(s47583$homework, s47583$math, pch = 16, col = 3, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
par(new = T)
plot(s54344$homework, s54344$math, pch = 16, col = 4, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
# 回帰直線
lmres <- lm(math ~ homework, data = dat) #回帰係数を求めて格納
abline(lmres, lwd = 2, lty = 3)
# 学校別に色分けした散布図
plot(s26537$homework, s26537$math, pch = 16, col = 2, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
par(new = T)
plot(s47583$homework, s47583$math, pch = 16, col = 3, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
par(new = T)
plot(s54344$homework, s54344$math, pch = 16, col = 4, xlab = "", ylab = "",
xlim = c(0, 7), ylim = c(0, 75))
# 回帰直線
lm26537 <- lm(math ~ homework, data = s26537)
lm47583 <- lm(math ~ homework, data = s47583)
lm54344 <- lm(math ~ homework, data = s54344)
abline(lm26537, lwd = 2, col = 2)
abline(lm47583, lwd = 2, col = 3)
abline(lm54344, lwd = 2, col = 4)
abline(lmres, lwd = 2, lty = 3)
ランダム切片モデルは切片\( \beta_0 \)が集団によって異なるため,以下の式で表現する。 \[ Y = \beta_0 + \beta_1 X_1 + r \] \[ \beta_0 = \gamma_{00} + u_0 \]
この例では\( Y \)が数学得点(math),\( X \)が宿題時間(homework),\( r \)が誤差となる。
library(lme4)
## Loading required package: lattice
## Loading required package: Matrix
res <- lmer(math ~ homework + (1 | schid), data = dat)
summary(res)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ homework + (1 | schid)
## Data: dat
##
## REML criterion at convergence: 370.2
##
## Random effects:
## Groups Name Variance Std.Dev.
## schid (Intercept) 32.0 5.66
## Residual 48.6 6.97
## Number of obs: 55, groups: schid, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.899 3.705 11.04
## homework 3.653 0.629 5.81
##
## Correlation of Fixed Effects:
## (Intr)
## homework -0.396
summary(lmres)
##
## Call:
## lm(formula = math ~ homework, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.676 -6.934 -0.297 4.445 21.324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.918 1.990 19.55 < 2e-16 ***
## homework 4.379 0.709 6.18 9.3e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.27 on 53 degrees of freedom
## Multiple R-squared: 0.419, Adjusted R-squared: 0.408
## F-statistic: 38.2 on 1 and 53 DF, p-value: 9.33e-08
cor(dat$math, dat$ses)
## [1] 0.4814
cor(dat$math, dat$meanses)
## [1] 0.4006
cor(dat$ses, dat$homework)
## [1] 0.1965
cor(s26537$homework, s26537$math)
## [1] 0.7235
cor(s47583$homework, s47583$math)
## [1] 0.5631
cor(s54344$homework, s54344$math)
## [1] 0.5871
cor(s26537$ses, s26537$math)
## [1] 0.1195
cor(s47583$ses, s47583$math)
## [1] 0.5125
cor(s54344$ses, s54344$math)
## [1] 0.3738
cor(s26537$ses, s26537$homework)
## [1] 0.188
cor(s47583$ses, s47583$homework)
## [1] 0.4574
cor(s54344$ses, s54344$homework)
## [1] -0.2506
\[ Y = \beta_0 + \beta_1 X_1 + r \] \[ \beta_1 = \gamma_{00} + u_0 \]
lmer(math~homework+(0+homework|schid), data=dat)