Kết nối dữ liệu

setwd("d:/R Studio/Multilevel")

library(lme4)
## Loading required package: Matrix
library(foreign)

dulieu <-read.dta("hoc.dta")

head(dulieu)
##   school student        y       x1 x2    x3
## 1      1       1   2.6132   6.1906  1 Mixed
## 2      1       2   1.3407   2.0580  1 Mixed
## 3      1       3 -17.2390 -13.6460  0 Mixed
## 4      1       4   9.6759   2.0580  1 Mixed
## 5      1       5   5.4434   3.7110  1 Mixed
## 6      1       6  17.3490  21.8940  0 Mixed

Varying-intercept model

null <-lmer(data=dulieu,y~ (1|school),REML=FALSE)
summary(null)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: y ~ (1 | school)
##    Data: dulieu
## 
##      AIC      BIC   logLik deviance df.resid 
##  29709.0  29727.9 -14851.5  29703.0     4056 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9471 -0.6486  0.0117  0.6992  3.6579 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 16.86    4.107   
##  Residual             84.78    9.207   
## Number of obs: 4059, groups:  school, 65
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  -0.1317     0.5363  -0.246
IC_null<- 16.86 /(16.86 + 84.78)
IC_null
## [1] 0.1658796

IC -> 0: Hồi quy OLS IC -> 1: Phương sai các cá thể là bằng nhau lmer(data=dulieu,y ~ x1 + (x1|school),REML=FALSE)

Run random intercept model

ri <-lmer(data=dulieu,y ~ x1 + (1|school),REML=FALSE)
summary(ri)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: y ~ x1 + (1 | school)
##    Data: dulieu
## 
##      AIC      BIC   logLik deviance df.resid 
##  28057.6  28082.8 -14024.8  28049.6     4055 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7162 -0.6304  0.0287  0.6844  3.2680 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept)  9.213   3.035   
##  Residual             56.573   7.521   
## Number of obs: 4059, groups:  school, 65
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  0.02387    0.40023    0.06
## x1           0.56337    0.01247   45.20
## 
## Correlation of Fixed Effects:
##    (Intr)
## x1 0.008
IC_ri <-  9.2 /(9.2 +56.6)
IC_ri
## [1] 0.1398176

Run random coefficients (Run random intercept and slope model)

rc <-lmer(data=dulieu,y ~ x1 + (1 + x1|school),REML=FALSE)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00534721 (tol = 0.002, component 1)
summary(rc)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: y ~ x1 + (1 + x1 | school)
##    Data: dulieu
## 
##      AIC      BIC   logLik deviance df.resid 
##  28021.2  28059.1 -14004.6  28009.2     4053 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8312 -0.6325  0.0340  0.6832  3.4561 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  school   (Intercept)  9.04467 3.0074       
##           x1           0.01453 0.1205   0.50
##  Residual             55.36543 7.4408       
## Number of obs: 4059, groups:  school, 65
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -0.11506    0.39783  -0.289
## x1           0.55673    0.01993  27.928
## 
## Correlation of Fixed Effects:
##    (Intr)
## x1 0.365 
## convergence code: 0
## Model failed to converge with max|grad| = 0.00534721 (tol = 0.002, component 1)
IC_rc <-  ( 0.01453 + 9.04467) /  ( 0.01453 + 9.04467 + 55.36543 )
IC_rc
## [1] 0.140617

LR test

LR test = -2 (H0 -H1)

Cách 1

Pvalue < 0.05, bác bỏ H0 chấp nhận H1

LR <- -2 *( -14024.8 + 14004.6 )
LR
## [1] 40.4
dchisq(LR, 6)   # df của mô hình H1
## [1] 1.721449e-07

Cách 2

require(lmtest)
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
lrtest (ri, rc)
## Likelihood ratio test
## 
## Model 1: y ~ x1 + (1 | school)
## Model 2: y ~ x1 + (1 + x1 | school)
##   #Df LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -14025                         
## 2   6 -14005  2 40.372  1.711e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ta chọn rc: y = -0.12 + 0.56x1

Ước lượng BLUP

moi <- ranef(rc)
u2 <-ranef(rc)$school[[1]] 
u1 <-ranef(rc)$school[[2]]                        

moi2 <-data.frame(u1,u2)
head(moi2,10)
##              u1         u2
## 1   0.124960584  3.7493416
## 2   0.164705637  4.7021799
## 3   0.080848917  4.7977664
## 4   0.127170791  0.3502078
## 5   0.072040888  2.4628077
## 6   0.058610082  5.1838995
## 7  -0.148861472  3.6409964
## 8   0.006884282 -0.1219091
## 9  -0.088606514 -1.7679234
## 10 -0.136053336 -3.1391128
moi3 <-data.frame(heso =u1 + 0.56, hangso=u2-1.12)
head(moi3,10)
##         heso     hangso
## 1  0.6849606  2.6293416
## 2  0.7247056  3.5821799
## 3  0.6408489  3.6777664
## 4  0.6871708 -0.7697922
## 5  0.6320409  1.3428077
## 6  0.6186101  4.0638995
## 7  0.4111385  2.5209964
## 8  0.5668843 -1.2419091
## 9  0.4713935 -2.8879234
## 10 0.4239467 -4.2591128