ANALISIS MULTILEVEL
Projek Magang
***
Sumber Referensi
Referensi : klik disini
Data : lihat disini
R Packages
General
library(broom)
library(magrittr)
library(tidyverse)
library(knitr)Multilevel
library(broom.mixed)
library(car)
library(ggeffects)
library(lme4)
library(lmerTest)Prepare Data
Import Data
Data Tiap Siswa
| school | minority | sex | ses | mathach |
|---|---|---|---|---|
| 1224 | No | Female | -1.528 | 5.876 |
| 1224 | No | Female | -0.588 | 19.708 |
| 1224 | No | Male | -0.528 | 20.349 |
| 1224 | No | Male | -0.668 | 8.781 |
| 1224 | No | Male | -0.158 | 17.898 |
| 1224 | No | Male | 0.022 | 4.583 |
| 1224 | No | Female | -0.618 | -2.832 |
| 1224 | No | Male | -0.998 | 0.523 |
| 1224 | No | Female | -0.888 | 1.527 |
| 1224 | No | Male | -0.458 | 21.521 |
Data Sekolah
| school | sector |
|---|---|
| 1224 | Public |
| 1288 | Public |
| 1296 | Public |
| 1308 | Catholic |
| 1317 | Catholic |
| 1358 | Public |
| 1374 | Public |
| 1433 | Catholic |
| 1436 | Catholic |
| 1461 | Public |
Pengenalan Data
Jumlah Sekolah ?
## [1] 160
Rata-rata Jumlah Siswa tiap Sekolah
## # A tibble: 1 × 1
## `mean(n)`
## <dbl>
## 1 44.9
Menghitung mean student SES di tiap sekolah
stud_data%<>%group_by(school)%>%mutate(mean.ses= mean(ses))%>%ungroup
kable(stud_data[1:10,])| school | minority | sex | ses | mathach | mean.ses |
|---|---|---|---|---|---|
| 1224 | No | Female | -1.528 | 5.876 | -0.434383 |
| 1224 | No | Female | -0.588 | 19.708 | -0.434383 |
| 1224 | No | Male | -0.528 | 20.349 | -0.434383 |
| 1224 | No | Male | -0.668 | 8.781 | -0.434383 |
| 1224 | No | Male | -0.158 | 17.898 | -0.434383 |
| 1224 | No | Male | 0.022 | 4.583 | -0.434383 |
| 1224 | No | Female | -0.618 | -2.832 | -0.434383 |
| 1224 | No | Male | -0.998 | 0.523 | -0.434383 |
| 1224 | No | Female | -0.888 | 1.527 | -0.434383 |
| 1224 | No | Male | -0.458 | 21.521 | -0.434383 |
Membuat deviasi Student’s SES dari rata-rata sekolah
(stud_data%<>%mutate(ses.dev = ses - mean.ses))## # A tibble: 7,185 × 7
## school minority sex ses mathach mean.ses ses.dev
## <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1224 No Female -1.53 5.88 -0.434 -1.09
## 2 1224 No Female -0.588 19.7 -0.434 -0.154
## 3 1224 No Male -0.528 20.3 -0.434 -0.0936
## 4 1224 No Male -0.668 8.78 -0.434 -0.234
## 5 1224 No Male -0.158 17.9 -0.434 0.276
## 6 1224 No Male 0.022 4.58 -0.434 0.456
## 7 1224 No Female -0.618 -2.83 -0.434 -0.184
## 8 1224 No Male -0.998 0.523 -0.434 -0.564
## 9 1224 No Female -0.888 1.53 -0.434 -0.454
## 10 1224 No Male -0.458 21.5 -0.434 -0.0236
## # … with 7,175 more rows
Menggabungkan dengan variabel level sekolah
df <- left_join(stud_data, school_data, by="school")
kable(df[1:10,])| school | minority | sex | ses | mathach | mean.ses | ses.dev | sector |
|---|---|---|---|---|---|---|---|
| 1224 | No | Female | -1.528 | 5.876 | -0.434383 | -1.093617 | Public |
| 1224 | No | Female | -0.588 | 19.708 | -0.434383 | -0.153617 | Public |
| 1224 | No | Male | -0.528 | 20.349 | -0.434383 | -0.093617 | Public |
| 1224 | No | Male | -0.668 | 8.781 | -0.434383 | -0.233617 | Public |
| 1224 | No | Male | -0.158 | 17.898 | -0.434383 | 0.276383 | Public |
| 1224 | No | Male | 0.022 | 4.583 | -0.434383 | 0.456383 | Public |
| 1224 | No | Female | -0.618 | -2.832 | -0.434383 | -0.183617 | Public |
| 1224 | No | Male | -0.998 | 0.523 | -0.434383 | -0.563617 | Public |
| 1224 | No | Female | -0.888 | 1.527 | -0.434383 | -0.453617 | Public |
| 1224 | No | Male | -0.458 | 21.521 | -0.434383 | -0.023617 | Public |
Menggroupkan ID sekolah menjadi factor
Jumlah Sekolah :
| sector | n |
|---|---|
| Public | 90 |
| Catholic | 70 |
Jumlah Siswa :
| sector | n |
|---|---|
| Public | 3642 |
| Catholic | 3543 |
Separate Analyses by School
Scatter Plot
# Samplel random 20 ID sekolah catholic
set.seed(1129)
school.IDs <- school_data %>%
filter(sector=="Catholic") %>%
sample_n(20) %>%
pull(school)
school.IDs## [1] "5667" "4223" "9198" "4253" "5650" "8150" "5761" "2658" "3838" "8193"
## [11] "4292" "7364" "3992" "5404" "5192" "1462" "4042" "3020" "7172" "1477"
Memfilter data sesuai dengan sampel yang terambil
(df.cat <- df %>%
filter(school %in% school.IDs))## # A tibble: 1,011 × 8
## school minority sex ses mathach mean.ses ses.dev sector
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <fct>
## 1 1462 Yes Male 0.162 1.97 -0.669 0.831 Catholic
## 2 1462 Yes Male -0.758 7.72 -0.669 -0.0886 Catholic
## 3 1462 Yes Male -0.708 19.0 -0.669 -0.0386 Catholic
## 4 1462 Yes Male -0.818 20.1 -0.669 -0.149 Catholic
## 5 1462 Yes Male -1.92 22.3 -0.669 -1.25 Catholic
## 6 1462 Yes Male -0.948 14.3 -0.669 -0.279 Catholic
## 7 1462 Yes Male -0.798 10.2 -0.669 -0.129 Catholic
## 8 1462 Yes Male -1.46 9.61 -0.669 -0.789 Catholic
## 9 1462 Yes Male -0.158 6.12 -0.669 0.511 Catholic
## 10 1462 Yes Male -0.378 23.8 -0.669 0.291 Catholic
## # … with 1,001 more rows
Plot Math dan SES untuk 20 sekolah catholic
Plot Math dan SES untuk 20 sekolah public
Linear Regressions
Estimasi Model Linier
Mengestimasi model linier untuk mathach berdasarkan prediksi SES.dev secara terpisah untuk masing masing 160 sekolah.
# Susun kerangka data siswa berdasarkan sekolah
nested.df <- df %>% group_by(school) %>%nest()
# Membuat kerangka data tingkat sekolah (baris=sekolah)
nested.df## # A tibble: 160 × 2
## # Groups: school [160]
## school data
## <fct> <list>
## 1 1224 <tibble [47 × 7]>
## 2 1288 <tibble [25 × 7]>
## 3 1296 <tibble [48 × 7]>
## 4 1308 <tibble [20 × 7]>
## 5 1317 <tibble [48 × 7]>
## 6 1358 <tibble [30 × 7]>
## 7 1374 <tibble [28 × 7]>
## 8 1433 <tibble [35 × 7]>
## 9 1436 <tibble [44 × 7]>
## 10 1461 <tibble [33 × 7]>
## # … with 150 more rows
Dengan menggunakan kerangka data yang bertingkat tadi, kita dapat
memasukan model linier terpisah di setiap kerangka data sekolah.
Model linier (lmodels) merupakan list dari model linier untuk tiap
sekolah.
lmodels <- nested.df %>%
# Mendapatkan dataframe semua sekolah
pull(data) %>%
# Me-run setiap sekolah dengan fungsi map
purrr::map(~ lm(mathach ~ ses.dev, data= .x))
head(lmodels)## [[1]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 9.715 2.509
##
##
## [[2]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 13.511 3.255
##
##
## [[3]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 7.636 1.076
##
##
## [[4]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 16.256 0.126
##
##
## [[5]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 13.178 1.274
##
##
## [[6]]
##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 11.206 5.068
Model Linier dalam Data Frame
Kita dapat membuat list model linier ke dalam dataframe awal tadi, menambahkan tiap model ke baris sekolah seperti dibawah ini:
## # A tibble: 160 × 3
## # Groups: school [160]
## school data model
## <fct> <list> <list>
## 1 1224 <tibble [47 × 7]> <lm>
## 2 1288 <tibble [25 × 7]> <lm>
## 3 1296 <tibble [48 × 7]> <lm>
## 4 1308 <tibble [20 × 7]> <lm>
## 5 1317 <tibble [48 × 7]> <lm>
## 6 1358 <tibble [30 × 7]> <lm>
## 7 1374 <tibble [28 × 7]> <lm>
## 8 1433 <tibble [35 × 7]> <lm>
## 9 1436 <tibble [44 × 7]> <lm>
## 10 1461 <tibble [33 × 7]> <lm>
## # … with 150 more rows
Mengambil model linier untuk sekolah tertentu
nested.df %>%
filter(school=="1224") %>%
pull(model) %>%
extract2(1)##
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
##
## Coefficients:
## (Intercept) ses.dev
## 9.715 2.509
Menggunakan tidy output
nested.df %>%
filter(school=="1224") %>%
pull(model) %>%
extract2(1) %>%
broom::tidy()## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 9.72 1.10 8.87 1.94e-11
## 2 ses.dev 2.51 1.77 1.42 1.62e- 1
Menambahkan semua estimasi ke dalam dataframe
nested.df %<>%
mutate(model.results = purrr::map(model,
broom::tidy)
)
nested.df %>%unnest(model.results)## # A tibble: 320 × 8
## # Groups: school [160]
## school data model term estimate std.e…¹ stati…² p.value
## <fct> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1224 <tibble [47 × 7]> <lm> (Intercept) 9.72 1.10 8.87 1.94e-11
## 2 1224 <tibble [47 × 7]> <lm> ses.dev 2.51 1.77 1.42 1.62e- 1
## 3 1288 <tibble [25 × 7]> <lm> (Intercept) 13.5 1.36 9.91 9.12e-10
## 4 1288 <tibble [25 × 7]> <lm> ses.dev 3.26 2.08 1.57 1.31e- 1
## 5 1296 <tibble [48 × 7]> <lm> (Intercept) 7.64 0.774 9.86 6.27e-13
## 6 1296 <tibble [48 × 7]> <lm> ses.dev 1.08 1.21 0.890 3.78e- 1
## 7 1308 <tibble [20 × 7]> <lm> (Intercept) 16.3 1.40 11.6 9.02e-10
## 8 1308 <tibble [20 × 7]> <lm> ses.dev 0.126 3.00 0.0420 9.67e- 1
## 9 1317 <tibble [48 × 7]> <lm> (Intercept) 13.2 0.790 16.7 3.97e-21
## 10 1317 <tibble [48 × 7]> <lm> ses.dev 1.27 1.44 0.887 3.80e- 1
## # … with 310 more rows, and abbreviated variable names ¹std.error, ²statistic
Select bagian yang penting
lm.coeff <- nested.df %>%
unnest(model.results) %>%
dplyr::select(school, term, estimate)
lm.coeff## # A tibble: 320 × 3
## # Groups: school [160]
## school term estimate
## <fct> <chr> <dbl>
## 1 1224 (Intercept) 9.72
## 2 1224 ses.dev 2.51
## 3 1288 (Intercept) 13.5
## 4 1288 ses.dev 3.26
## 5 1296 (Intercept) 7.64
## 6 1296 ses.dev 1.08
## 7 1308 (Intercept) 16.3
## 8 1308 ses.dev 0.126
## 9 1317 (Intercept) 13.2
## 10 1317 ses.dev 1.27
## # … with 310 more rows
Reshape dan Rename
lm.coeff %<>%
# Intersep dan slope ke dalam dua kolom
pivot_wider(names_from = term, values_from = estimate) %>%
# Rename columns
dplyr::select(school, intercept = `(Intercept)`, slope = ses.dev)
lm.coeff## # A tibble: 160 × 3
## # Groups: school [160]
## school intercept slope
## <fct> <dbl> <dbl>
## 1 1224 9.72 2.51
## 2 1288 13.5 3.26
## 3 1296 7.64 1.08
## 4 1308 16.3 0.126
## 5 1317 13.2 1.27
## 6 1358 11.2 5.07
## 7 1374 9.73 3.85
## 8 1433 19.7 1.85
## 9 1436 18.1 1.60
## 10 1461 16.8 6.27
## # … with 150 more rows
Menambahakan mean.SES ke dalam datafram
Dataframe akan berisi kolom IDsekolah, sektor, mean.ses, intersep, dan slope.
lm.df <- df %>%
dplyr::select(school, sector, mean.ses) %>%
distinct
(lm.df %<>%
left_join(lm.coeff, by="school")
)## # A tibble: 160 × 5
## school sector mean.ses intercept slope
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1224 Public -0.434 9.72 2.51
## 2 1288 Public 0.122 13.5 3.26
## 3 1296 Public -0.426 7.64 1.08
## 4 1308 Catholic 0.528 16.3 0.126
## 5 1317 Catholic 0.345 13.2 1.27
## 6 1358 Public -0.0197 11.2 5.07
## 7 1374 Public -0.0126 9.73 3.85
## 8 1433 Catholic 0.712 19.7 1.85
## 9 1436 Catholic 0.563 18.1 1.60
## 10 1461 Public 0.677 16.8 6.27
## # … with 150 more rows
Hierarchical Linier Model
Variance Components
Simple Variance Components Model
mod1 <-lmer(mathach~1+(1|school),data=df) #estimasi dengan Restricted Maximum Likelihood (REML)
summary(mod1)## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mathach ~ 1 + (1 | school)
## Data: df
##
## REML criterion at convergence: 47116.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0631 -0.7539 0.0267 0.7606 2.7426
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 8.614 2.935
## Residual 39.148 6.257
## Number of obs: 7185, groups: school, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.6370 0.2444 156.6473 51.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hasil denga tidy format
(mod1.res<-tidy(mod1))## # A tibble: 3 × 8
## effect group term estimate std.error statis…¹ df p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 12.6 0.244 51.7 157. 2.34e-100
## 2 ran_pars school sd__(Intercept) 2.93 NA NA NA NA
## 3 ran_pars Residual sd__Observation 6.26 NA NA NA NA
## # … with abbreviated variable name ¹statistic
Test signifikansi efek sekolah, estimasi single-level model
mod1_sl <- lm(mathach ~ 1,
data=df)
# Membandingkan dua model dengan Likelihood Ratio Test (LRT)
anova(mod1, mod1_sl) ## Data: df
## Models:
## mod1_sl: mathach ~ 1
## mod1: mathach ~ 1 + (1 | school)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1_sl 2 48104 48117 -24050 48100
## mod1 3 47122 47142 -23558 47116 983.92 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimasi Varians level-sekolah
(sigma2_u <- mod1.res %>%
filter(effect == "ran_pars", group == "school") %>%
# Standard deviation
pull(estimate) %>%
# ^2 = Variance
.^2)## [1] 8.614025
Estimasi varians level-individu
(sigma2_e <- mod1.res %>%
filter(effect == "ran_pars", group == "Residual") %>%
# Standard deviation
pull(estimate) %>%
# ^2 = Variance
.^2)## [1] 39.14832
Variance Partition Coefficient
# Variance Partition Coefficient
sigma2_u/(sigma2_u + sigma2_e)## [1] 0.1803518
Random Intercept
Random Intercept dengan fixed slope untuk SES tiap siswa
mod2 <-lmer(mathach~1+ses.dev+(1 | school),
data=df)
summary(mod2)## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mathach ~ 1 + ses.dev + (1 | school)
## Data: df
##
## REML criterion at convergence: 46724
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0969 -0.7322 0.0194 0.7572 2.9147
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 8.672 2.945
## Residual 37.010 6.084
## Number of obs: 7185, groups: school, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.6361 0.2445 156.7405 51.68 <2e-16 ***
## ses.dev 2.1912 0.1087 7022.0237 20.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## ses.dev 0.000
ANOVA
Anova(mod2)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: mathach
## Chisq Df Pr(>Chisq)
## ses.dev 406.68 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tidy
(mod2.res <- tidy(mod2))## # A tibble: 4 × 8
## effect group term estimate std.error statis…¹ df p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 12.6 0.244 51.7 157. 2.29e-100
## 2 fixed <NA> ses.dev 2.19 0.109 20.2 7022. 5.77e- 88
## 3 ran_pars school sd__(Intercept) 2.94 NA NA NA NA
## 4 ran_pars Residual sd__Observation 6.08 NA NA NA NA
## # … with abbreviated variable name ¹statistic
Estimasi fixed effect level populasi :Intercept dan SES slope
mod2.res %>%filter(effect == "fixed")## # A tibble: 2 × 8
## effect group term estimate std.error statistic df p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fixed <NA> (Intercept) 12.6 0.244 51.7 157. 2.29e-100
## 2 fixed <NA> ses.dev 2.19 0.109 20.2 7022. 5.77e- 88
Random Effect Parameters
mod2.res %>% filter(effect == "ran_pars")## # A tibble: 2 × 8
## effect group term estimate std.error statistic df p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ran_pars school sd__(Intercept) 2.94 NA NA NA NA
## 2 ran_pars Residual sd__Observation 6.08 NA NA NA NA
Estimasi Varians Level Sekolaj
(sigma2_u <- mod2.res %>%
filter(effect == "ran_pars", group == "school") %>%
# Standard deviation
pull(estimate) %>%
# ^2 = Variance
.^2)## [1] 8.672289
Estimasi Varians Level Individu
(sigma2_e <- mod2.res %>%
filter(effect == "ran_pars", group == "Residual") %>%
# Standard deviation
pull(estimate) %>%
# ^2 = Variance
.^2)## [1] 37.01041
Variance Partition Coefficient
sigma2_u/(sigma2_u + sigma2_e)## [1] 0.1898375