pacman::p_load(mlmRev, tidyverse, lme4) #pacman::p_load()同時取代install.packages()及library(),功能與p_load()一樣
data(Hsb82) #Loads specified data sets
head(Hsb82)
## school minrty sx ses mAch meanses sector cses
## 1 1224 No Female -1.528 5.876 -0.434383 Public -1.09361702
## 2 1224 No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3 1224 No Male -0.528 20.349 -0.434383 Public -0.09361702
## 4 1224 No Male -0.668 8.781 -0.434383 Public -0.23361702
## 5 1224 No Male -0.158 17.898 -0.434383 Public 0.27638298
## 6 1224 No Male 0.022 4.583 -0.434383 Public 0.45638298
dta <- Hsb82 #複製data set到dta
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
"Ave_SES", "Sector", "C_SES")
names(x) <- value;x is an R object and value is a character vector of up to the same length as x.
C()是concatenation,可串接文字或數字。
str(dta) #檢視資料結構
## 'data.frame': 7185 obs. of 8 variables:
## $ School : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
## $ Minority: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
## $ SES : num -1.528 -0.588 -0.528 -0.668 -0.158 ...
## $ Math : num 5.88 19.71 20.35 8.78 17.9 ...
## $ Ave_SES : num -0.434 -0.434 -0.434 -0.434 -0.434 ...
## $ Sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
## $ C_SES : num -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
summary(m1 <- lmer(Math ~ (1 | School), data = dta))#估計不同學校對數學成績的影響,(1 | School)是隨機截距項的R語法,意思是假設每個學校的截距不同,其中"1"代表截距項。
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
## Data: dta
##
## REML criterion at convergence: 47116.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0631 -0.7539 0.0267 0.7606 2.7426
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 8.614 2.935
## Residual 39.148 6.257
## Number of obs: 7185, groups: School, 160
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 12.6370 0.2444 51.71
m1 <- lme4::lmer(Math ~ (1 | School), data=dta)
sjPlot::tab_model(m1, show.p=FALSE, show.r2=FALSE)
Math | ||
---|---|---|
Predictors | Estimates | CI |
(Intercept) | 12.64 | 12.16 – 13.12 |
Random Effects | ||
σ2 | 39.15 | |
τ00 School | 8.61 | |
ICC | 0.18 | |
N School | 160 | |
Observations | 7185 |
1.全部學校的數學平均的估計值為Intercept=12.64,平均數學成績的標準差是隨機效果的標準偏差Std.Dev.=√8.61=2.934。
2.全部學生的數學平均為12.64,平均數學成績的標準差為√(8.61+39.15)=6.911。
3.在同一學校內的學生數學成績之間的相關性為0.18。
4.大約18%的數學分數變異可歸因於學校的差異。
summary(m2 <- lmer(Math ~ Ave_SES + (1 | School), data = dta))#以平均社會經濟狀態作為固定效果
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + (1 | School)
## Data: dta
##
## REML criterion at convergence: 46961.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.13493 -0.75254 0.02413 0.76766 2.78515
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 2.639 1.624
## Residual 39.157 6.258
## Number of obs: 7185, groups: School, 160
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 12.6846 0.1493 84.97
## Ave_SES 5.8635 0.3615 16.22
##
## Correlation of Fixed Effects:
## (Intr)
## Ave_SES 0.010
m2 <- lme4::lmer(Math ~ Ave_SES + (1 | School), data=dta)
sjPlot::tab_model(m1, m2, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
Math | Math | |||
---|---|---|---|---|
Predictors | Estimates | std. Error | Estimates | std. Error |
(Intercept) | 12.64 | 0.24 | 12.68 | 0.15 |
Ave_SES | 5.86 | 0.36 | ||
Random Effects | ||||
σ2 | 39.15 | 39.16 | ||
τ00 | 8.61 School | 2.64 School | ||
ICC | 0.18 | 0.06 |
1.The estimated mean of gain scores for all children is 12.68 with an SD of √(2.64+39.16)=6.465
2.For one point increase in Ave_SES (Average Socio-economic status), gain score increase by 5.86 point, on average.
3.Adjusting for individual Ave_SES (Average Socio-economic status), correlation between gain scores among children attending the same school is 0.06.
4.About 0.03%〔=(39.16-39.15)/39.15〕 of variance in gain scores can be attributed to differences in Average Socio-economic status among children attending different schools.
5.About 12.48% (=〔(39.15+8.61)-(39.16+2.64)〕/(39.15+8.61)) of variance in gain scores can be attributed to differences in Average Socio-economic status among pupils attending the same school
plot(m2, xlab = "Fitted values", ylab = "Pearson residuals",
main = "Model 2", pch = 20, cex = .5, type = c("p", "g"))
診斷模型:殘差圖顯示模型符合常態假設。
summary(m0a <- lm(Math ~ Ave_SES, data = dta))#平均社經狀態對數學成績的影響
##
## Call:
## lm(formula = Math ~ Ave_SES, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.4520 -4.8332 0.1735 5.1117 16.2787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.74703 0.07621 167.27 <2e-16 ***
## Ave_SES 5.71688 0.18429 31.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.46 on 7183 degrees of freedom
## Multiple R-squared: 0.1181, Adjusted R-squared: 0.118
## F-statistic: 962.3 on 1 and 7183 DF, p-value: < 2.2e-16
summary(m0a <- lm(Math ~ Ave_SES, data = dta))
##
## Call:
## lm(formula = Math ~ Ave_SES, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.4520 -4.8332 0.1735 5.1117 16.2787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.74703 0.07621 167.27 <2e-16 ***
## Ave_SES 5.71688 0.18429 31.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.46 on 7183 degrees of freedom
## Multiple R-squared: 0.1181, Adjusted R-squared: 0.118
## F-statistic: 962.3 on 1 and 7183 DF, p-value: < 2.2e-16
平均而言,Ave_SES增加一單位,數學成績增加5.72分
theme_set(theme_bw())
ggplot(dta, aes(Ave_SES, Math)) +
geom_smooth(method = "lm") +
geom_point(alpha = .2) +
labs(x = "School mean SES", y = "Math achievement score")
## `geom_smooth()` using formula 'y ~ x'
dta_mmath <- dta %>%
group_by(School) %>% #group_by(School)依據學校分群分析
mutate( ave_math = mean(Math)) %>% #mutate新增變數ave_math,計算各校數學平均分數mean(Math)
select( School, Ave_SES, ave_math ) %>% #select選要分析的欄位(Column)
unique %>% #丟掉重複選取的學校
as.data.frame
ggplot(dta_mmath, aes(Ave_SES, ave_math)) + #畫出各校數學平均分數與平均社經狀態的關係
geom_smooth(method = "lm") +
geom_point(alpha = .2)+
labs(x = "Mean School SES", y = "Mean School Math Scores")
## `geom_smooth()` using formula 'y ~ x'