ANALISIS MULTILEVEL

Projek Magang

🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐤 🐘 ✂ ***

Sumber Referensi

Referensi : klik disini
Data : lihat disini

R Packages

General

library(broom)
library(magrittr)
library(tidyverse)
library(knitr)

Multilevel

library(broom.mixed)
library(car)
library(ggeffects)
library(lme4)
library(lmerTest)

Prepare Data

Import Data

Data Tiap Siswa

school	minority	sex	ses	mathach
1224	No	Female	-1.528	5.876
1224	No	Female	-0.588	19.708
1224	No	Male	-0.528	20.349
1224	No	Male	-0.668	8.781
1224	No	Male	-0.158	17.898
1224	No	Male	0.022	4.583
1224	No	Female	-0.618	-2.832
1224	No	Male	-0.998	0.523
1224	No	Female	-0.888	1.527
1224	No	Male	-0.458	21.521

Data Sekolah

school	sector
1224	Public
1288	Public
1296	Public
1308	Catholic
1317	Catholic
1358	Public
1374	Public
1433	Catholic
1436	Catholic
1461	Public

Pengenalan Data

Jumlah Sekolah ?

## [1] 160

Rata-rata Jumlah Siswa tiap Sekolah

## # A tibble: 1 × 1
##   `mean(n)`
##       <dbl>
## 1      44.9

Menghitung mean student SES di tiap sekolah

stud_data%<>%group_by(school)%>%mutate(mean.ses= mean(ses))%>%ungroup
kable(stud_data[1:10,])

school	minority	sex	ses	mathach	mean.ses
1224	No	Female	-1.528	5.876	-0.434383
1224	No	Female	-0.588	19.708	-0.434383
1224	No	Male	-0.528	20.349	-0.434383
1224	No	Male	-0.668	8.781	-0.434383
1224	No	Male	-0.158	17.898	-0.434383
1224	No	Male	0.022	4.583	-0.434383
1224	No	Female	-0.618	-2.832	-0.434383
1224	No	Male	-0.998	0.523	-0.434383
1224	No	Female	-0.888	1.527	-0.434383
1224	No	Male	-0.458	21.521	-0.434383

Membuat deviasi Student’s SES dari rata-rata sekolah

(stud_data%<>%mutate(ses.dev = ses - mean.ses))

## # A tibble: 7,185 × 7
##    school minority sex       ses mathach mean.ses ses.dev
##    <chr>  <fct>    <fct>   <dbl>   <dbl>    <dbl>   <dbl>
##  1 1224   No       Female -1.53    5.88    -0.434 -1.09  
##  2 1224   No       Female -0.588  19.7     -0.434 -0.154 
##  3 1224   No       Male   -0.528  20.3     -0.434 -0.0936
##  4 1224   No       Male   -0.668   8.78    -0.434 -0.234 
##  5 1224   No       Male   -0.158  17.9     -0.434  0.276 
##  6 1224   No       Male    0.022   4.58    -0.434  0.456 
##  7 1224   No       Female -0.618  -2.83    -0.434 -0.184 
##  8 1224   No       Male   -0.998   0.523   -0.434 -0.564 
##  9 1224   No       Female -0.888   1.53    -0.434 -0.454 
## 10 1224   No       Male   -0.458  21.5     -0.434 -0.0236
## # … with 7,175 more rows

Menggabungkan dengan variabel level sekolah

df <- left_join(stud_data, school_data, by="school")
kable(df[1:10,])

school	minority	sex	ses	mathach	mean.ses	ses.dev	sector
1224	No	Female	-1.528	5.876	-0.434383	-1.093617	Public
1224	No	Female	-0.588	19.708	-0.434383	-0.153617	Public
1224	No	Male	-0.528	20.349	-0.434383	-0.093617	Public
1224	No	Male	-0.668	8.781	-0.434383	-0.233617	Public
1224	No	Male	-0.158	17.898	-0.434383	0.276383	Public
1224	No	Male	0.022	4.583	-0.434383	0.456383	Public
1224	No	Female	-0.618	-2.832	-0.434383	-0.183617	Public
1224	No	Male	-0.998	0.523	-0.434383	-0.563617	Public
1224	No	Female	-0.888	1.527	-0.434383	-0.453617	Public
1224	No	Male	-0.458	21.521	-0.434383	-0.023617	Public

Menggroupkan ID sekolah menjadi factor

Jumlah Sekolah :

sector	n
Public	90
Catholic	70

Jumlah Siswa :

sector	n
Public	3642
Catholic	3543

Separate Analyses by School

Scatter Plot

# Samplel random 20 ID sekolah catholic
set.seed(1129)
school.IDs <- school_data %>%
  filter(sector=="Catholic") %>%
  sample_n(20) %>% 
  pull(school)

school.IDs

##  [1] "5667" "4223" "9198" "4253" "5650" "8150" "5761" "2658" "3838" "8193"
## [11] "4292" "7364" "3992" "5404" "5192" "1462" "4042" "3020" "7172" "1477"

Memfilter data sesuai dengan sampel yang terambil

(df.cat <- df %>%
    filter(school %in% school.IDs))

## # A tibble: 1,011 × 8
##    school minority sex      ses mathach mean.ses ses.dev sector  
##    <fct>  <fct>    <fct>  <dbl>   <dbl>    <dbl>   <dbl> <fct>   
##  1 1462   Yes      Male   0.162    1.97   -0.669  0.831  Catholic
##  2 1462   Yes      Male  -0.758    7.72   -0.669 -0.0886 Catholic
##  3 1462   Yes      Male  -0.708   19.0    -0.669 -0.0386 Catholic
##  4 1462   Yes      Male  -0.818   20.1    -0.669 -0.149  Catholic
##  5 1462   Yes      Male  -1.92    22.3    -0.669 -1.25   Catholic
##  6 1462   Yes      Male  -0.948   14.3    -0.669 -0.279  Catholic
##  7 1462   Yes      Male  -0.798   10.2    -0.669 -0.129  Catholic
##  8 1462   Yes      Male  -1.46     9.61   -0.669 -0.789  Catholic
##  9 1462   Yes      Male  -0.158    6.12   -0.669  0.511  Catholic
## 10 1462   Yes      Male  -0.378   23.8    -0.669  0.291  Catholic
## # … with 1,001 more rows

Plot Math dan SES untuk 20 sekolah catholic

Plot Math dan SES untuk 20 sekolah public

Linear Regressions

Estimasi Model Linier

Mengestimasi model linier untuk mathach berdasarkan prediksi SES.dev secara terpisah untuk masing masing 160 sekolah.

# Susun kerangka data siswa berdasarkan sekolah
nested.df <- df %>% group_by(school) %>%nest()

# Membuat kerangka data tingkat sekolah (baris=sekolah)
nested.df

## # A tibble: 160 × 2
## # Groups:   school [160]
##    school data             
##    <fct>  <list>           
##  1 1224   <tibble [47 × 7]>
##  2 1288   <tibble [25 × 7]>
##  3 1296   <tibble [48 × 7]>
##  4 1308   <tibble [20 × 7]>
##  5 1317   <tibble [48 × 7]>
##  6 1358   <tibble [30 × 7]>
##  7 1374   <tibble [28 × 7]>
##  8 1433   <tibble [35 × 7]>
##  9 1436   <tibble [44 × 7]>
## 10 1461   <tibble [33 × 7]>
## # … with 150 more rows

Dengan menggunakan kerangka data yang bertingkat tadi, kita dapat memasukan model linier terpisah di setiap kerangka data sekolah.
Model linier (lmodels) merupakan list dari model linier untuk tiap sekolah.

lmodels <- nested.df %>%
  # Mendapatkan dataframe semua sekolah
  pull(data) %>%
  # Me-run setiap sekolah dengan fungsi map
  purrr::map(~ lm(mathach ~ ses.dev, data= .x))

head(lmodels)

## [[1]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##       9.715        2.509  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##      13.511        3.255  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##       7.636        1.076  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##      16.256        0.126  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##      13.178        1.274  
## 
## 
## [[6]]
## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##      11.206        5.068

Model Linier dalam Data Frame

Kita dapat membuat list model linier ke dalam dataframe awal tadi, menambahkan tiap model ke baris sekolah seperti dibawah ini:

## # A tibble: 160 × 3
## # Groups:   school [160]
##    school data              model 
##    <fct>  <list>            <list>
##  1 1224   <tibble [47 × 7]> <lm>  
##  2 1288   <tibble [25 × 7]> <lm>  
##  3 1296   <tibble [48 × 7]> <lm>  
##  4 1308   <tibble [20 × 7]> <lm>  
##  5 1317   <tibble [48 × 7]> <lm>  
##  6 1358   <tibble [30 × 7]> <lm>  
##  7 1374   <tibble [28 × 7]> <lm>  
##  8 1433   <tibble [35 × 7]> <lm>  
##  9 1436   <tibble [44 × 7]> <lm>  
## 10 1461   <tibble [33 × 7]> <lm>  
## # … with 150 more rows

Mengambil model linier untuk sekolah tertentu

nested.df %>%
  filter(school=="1224") %>%
  pull(model) %>%
  extract2(1)

## 
## Call:
## lm(formula = mathach ~ ses.dev, data = .x)
## 
## Coefficients:
## (Intercept)      ses.dev  
##       9.715        2.509

Menggunakan tidy output

nested.df %>%
  filter(school=="1224") %>%
  pull(model) %>%
  extract2(1) %>%
  broom::tidy()

## # A tibble: 2 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)     9.72      1.10      8.87 1.94e-11
## 2 ses.dev         2.51      1.77      1.42 1.62e- 1

Menambahkan semua estimasi ke dalam dataframe

nested.df %<>%
  mutate(model.results = purrr::map(model, 
                            broom::tidy)
  )

nested.df %>%unnest(model.results)

## # A tibble: 320 × 8
## # Groups:   school [160]
##    school data              model  term        estimate std.e…¹ stati…²  p.value
##    <fct>  <list>            <list> <chr>          <dbl>   <dbl>   <dbl>    <dbl>
##  1 1224   <tibble [47 × 7]> <lm>   (Intercept)    9.72    1.10   8.87   1.94e-11
##  2 1224   <tibble [47 × 7]> <lm>   ses.dev        2.51    1.77   1.42   1.62e- 1
##  3 1288   <tibble [25 × 7]> <lm>   (Intercept)   13.5     1.36   9.91   9.12e-10
##  4 1288   <tibble [25 × 7]> <lm>   ses.dev        3.26    2.08   1.57   1.31e- 1
##  5 1296   <tibble [48 × 7]> <lm>   (Intercept)    7.64    0.774  9.86   6.27e-13
##  6 1296   <tibble [48 × 7]> <lm>   ses.dev        1.08    1.21   0.890  3.78e- 1
##  7 1308   <tibble [20 × 7]> <lm>   (Intercept)   16.3     1.40  11.6    9.02e-10
##  8 1308   <tibble [20 × 7]> <lm>   ses.dev        0.126   3.00   0.0420 9.67e- 1
##  9 1317   <tibble [48 × 7]> <lm>   (Intercept)   13.2     0.790 16.7    3.97e-21
## 10 1317   <tibble [48 × 7]> <lm>   ses.dev        1.27    1.44   0.887  3.80e- 1
## # … with 310 more rows, and abbreviated variable names ¹std.error, ²statistic

Select bagian yang penting

lm.coeff <- nested.df %>%
  unnest(model.results) %>%
  dplyr::select(school, term, estimate)
lm.coeff

## # A tibble: 320 × 3
## # Groups:   school [160]
##    school term        estimate
##    <fct>  <chr>          <dbl>
##  1 1224   (Intercept)    9.72 
##  2 1224   ses.dev        2.51 
##  3 1288   (Intercept)   13.5  
##  4 1288   ses.dev        3.26 
##  5 1296   (Intercept)    7.64 
##  6 1296   ses.dev        1.08 
##  7 1308   (Intercept)   16.3  
##  8 1308   ses.dev        0.126
##  9 1317   (Intercept)   13.2  
## 10 1317   ses.dev        1.27 
## # … with 310 more rows

Reshape dan Rename

lm.coeff %<>%
  # Intersep dan slope ke dalam dua kolom
  pivot_wider(names_from = term, values_from = estimate) %>%
  # Rename columns
  dplyr::select(school, intercept = `(Intercept)`, slope = ses.dev)

lm.coeff

## # A tibble: 160 × 3
## # Groups:   school [160]
##    school intercept slope
##    <fct>      <dbl> <dbl>
##  1 1224        9.72 2.51 
##  2 1288       13.5  3.26 
##  3 1296        7.64 1.08 
##  4 1308       16.3  0.126
##  5 1317       13.2  1.27 
##  6 1358       11.2  5.07 
##  7 1374        9.73 3.85 
##  8 1433       19.7  1.85 
##  9 1436       18.1  1.60 
## 10 1461       16.8  6.27 
## # … with 150 more rows

Menambahakan mean.SES ke dalam datafram

Dataframe akan berisi kolom IDsekolah, sektor, mean.ses, intersep, dan slope.

lm.df <- df %>% 
  dplyr::select(school, sector, mean.ses) %>%
  distinct
(lm.df %<>%
    left_join(lm.coeff, by="school")
  )

## # A tibble: 160 × 5
##    school sector   mean.ses intercept slope
##    <fct>  <fct>       <dbl>     <dbl> <dbl>
##  1 1224   Public    -0.434       9.72 2.51 
##  2 1288   Public     0.122      13.5  3.26 
##  3 1296   Public    -0.426       7.64 1.08 
##  4 1308   Catholic   0.528      16.3  0.126
##  5 1317   Catholic   0.345      13.2  1.27 
##  6 1358   Public    -0.0197     11.2  5.07 
##  7 1374   Public    -0.0126      9.73 3.85 
##  8 1433   Catholic   0.712      19.7  1.85 
##  9 1436   Catholic   0.563      18.1  1.60 
## 10 1461   Public     0.677      16.8  6.27 
## # … with 150 more rows

Hierarchical Linier Model

Variance Components

Simple Variance Components Model

mod1 <-lmer(mathach~1+(1|school),data=df) #estimasi dengan Restricted Maximum Likelihood (REML)
summary(mod1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mathach ~ 1 + (1 | school)
##    Data: df
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  school, 160
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  12.6370     0.2444 156.6473   51.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hasil denga tidy format

(mod1.res<-tidy(mod1))

## # A tibble: 3 × 8
##   effect   group    term            estimate std.error statis…¹    df    p.value
##   <chr>    <chr>    <chr>              <dbl>     <dbl>    <dbl> <dbl>      <dbl>
## 1 fixed    <NA>     (Intercept)        12.6      0.244     51.7  157.  2.34e-100
## 2 ran_pars school   sd__(Intercept)     2.93    NA         NA     NA  NA        
## 3 ran_pars Residual sd__Observation     6.26    NA         NA     NA  NA        
## # … with abbreviated variable name ¹statistic

Test signifikansi efek sekolah, estimasi single-level model

mod1_sl <- lm(mathach ~ 1, 
              data=df)

# Membandingkan dua model dengan Likelihood Ratio Test (LRT)
anova(mod1, mod1_sl)

## Data: df
## Models:
## mod1_sl: mathach ~ 1
## mod1: mathach ~ 1 + (1 | school)
##         npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## mod1_sl    2 48104 48117 -24050    48100                         
## mod1       3 47122 47142 -23558    47116 983.92  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimasi Varians level-sekolah

(sigma2_u <- mod1.res %>%
    filter(effect == "ran_pars", group == "school") %>%
    # Standard deviation
    pull(estimate) %>%
    # ^2 = Variance
    .^2)

## [1] 8.614025

Estimasi varians level-individu

(sigma2_e <- mod1.res %>%
    filter(effect == "ran_pars", group == "Residual") %>%
    # Standard deviation
    pull(estimate) %>%
    # ^2 = Variance
    .^2)

## [1] 39.14832

Variance Partition Coefficient

# Variance Partition Coefficient
sigma2_u/(sigma2_u + sigma2_e)

## [1] 0.1803518

Random Intercept

Random Intercept dengan fixed slope untuk SES tiap siswa

mod2 <-lmer(mathach~1+ses.dev+(1 | school), 
             data=df)
summary(mod2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mathach ~ 1 + ses.dev + (1 | school)
##    Data: df
## 
## REML criterion at convergence: 46724
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0969 -0.7322  0.0194  0.7572  2.9147 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept)  8.672   2.945   
##  Residual             37.010   6.084   
## Number of obs: 7185, groups:  school, 160
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   12.6361     0.2445  156.7405   51.68   <2e-16 ***
## ses.dev        2.1912     0.1087 7022.0237   20.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr)
## ses.dev 0.000

ANOVA

Anova(mod2)

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: mathach
##          Chisq Df Pr(>Chisq)    
## ses.dev 406.68  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tidy

(mod2.res <- tidy(mod2))

## # A tibble: 4 × 8
##   effect   group    term            estimate std.error statis…¹    df    p.value
##   <chr>    <chr>    <chr>              <dbl>     <dbl>    <dbl> <dbl>      <dbl>
## 1 fixed    <NA>     (Intercept)        12.6      0.244     51.7  157.  2.29e-100
## 2 fixed    <NA>     ses.dev             2.19     0.109     20.2 7022.  5.77e- 88
## 3 ran_pars school   sd__(Intercept)     2.94    NA         NA     NA  NA        
## 4 ran_pars Residual sd__Observation     6.08    NA         NA     NA  NA        
## # … with abbreviated variable name ¹statistic

Estimasi fixed effect level populasi :Intercept dan SES slope

mod2.res %>%filter(effect == "fixed")

## # A tibble: 2 × 8
##   effect group term        estimate std.error statistic    df   p.value
##   <chr>  <chr> <chr>          <dbl>     <dbl>     <dbl> <dbl>     <dbl>
## 1 fixed  <NA>  (Intercept)    12.6      0.244      51.7  157. 2.29e-100
## 2 fixed  <NA>  ses.dev         2.19     0.109      20.2 7022. 5.77e- 88

Random Effect Parameters

mod2.res %>%  filter(effect == "ran_pars")

## # A tibble: 2 × 8
##   effect   group    term            estimate std.error statistic    df p.value
##   <chr>    <chr>    <chr>              <dbl>     <dbl>     <dbl> <dbl>   <dbl>
## 1 ran_pars school   sd__(Intercept)     2.94        NA        NA    NA      NA
## 2 ran_pars Residual sd__Observation     6.08        NA        NA    NA      NA

Estimasi Varians Level Sekolaj

(sigma2_u <- mod2.res %>%
  filter(effect == "ran_pars", group == "school") %>%
  # Standard deviation
  pull(estimate) %>%
  # ^2 = Variance
  .^2)

## [1] 8.672289

Estimasi Varians Level Individu

(sigma2_e <- mod2.res %>%
    filter(effect == "ran_pars", group == "Residual") %>%
    # Standard deviation
    pull(estimate) %>%
    # ^2 = Variance
    .^2)

## [1] 37.01041

Variance Partition Coefficient

sigma2_u/(sigma2_u + sigma2_e)

## [1] 0.1898375