1 Hw1

1.1 load data and see it

pacman::p_load(mlmRev, tidyverse, lme4, nlme)
dta<-read.table("D:/sheu/iq_language.txt", h=T)
head(dta)

##   School Pupil   IQ Language Group_size       IQ_c School_mean Group_mean
## 1      1 17001 15.0       46         29  3.1659379    -1.51406        5.9
## 2      1 17002 14.5       45         29  2.6659379    -1.51406        5.9
## 3      1 17003  9.5       33         29 -2.3340621    -1.51406        5.9
## 4      1 17004 11.0       46         29 -0.8340621    -1.51406        5.9
## 5      1 17005  8.0       20         29 -3.8340621    -1.51406        5.9
## 6      1 17006  9.5       30         29 -2.3340621    -1.51406        5.9
##        SES_c
## 1  -4.811981
## 2 -17.811981
## 3 -12.811981
## 4  -4.811981
## 5 -17.811981
## 6 -17.811981

str(dta)

## 'data.frame':    2287 obs. of  9 variables:
##  $ School     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil      : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ IQ         : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ Language   : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ Group_size : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ IQ_c       : num  3.166 2.666 -2.334 -0.834 -3.834 ...
##  $ School_mean: num  -1.51 -1.51 -1.51 -1.51 -1.51 ...
##  $ Group_mean : num  5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 ...
##  $ SES_c      : num  -4.81 -17.81 -12.81 -4.81 -17.81 ...

dta[,1:2] <- apply(dta[,1:2], 2, as.factor)

1.2 model

summary(m0 <- lmer(Language ~ (1|School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 16253.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11618 -0.65703  0.07597  0.74128  2.50639 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 19.63    4.431   
##  Residual             64.56    8.035   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  40.3624     0.4282   94.26

summary(m1 <- lmer(Language ~ IQ_c + (1|School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 15255.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0939 -0.6375  0.0579  0.7061  3.1448 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  9.602   3.099   
##  Residual             42.245   6.500   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 40.60823    0.30819   131.8
## IQ_c         2.48759    0.07008    35.5
## 
## Correlation of Fixed Effects:
##      (Intr)
## IQ_c 0.018

sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)

	Language		Language
Predictors	Estimates	std. Error	Estimates	std. Error
(Intercept)	40.36	0.43	40.61	0.31
IQ_c			2.49	0.07
Random Effects
σ²	64.56		42.24
τ₀₀	19.63 _School		9.60 _School
ICC	0.23		0.19

The estimated mean of Language scores for all children is 40.61

For one point increase in IQ_c(School mean centered IQ), Language score increase by 2.49 point, on average.

About 34.57%〔=(64.56-42.24)/64.56〕 of variance in gain scores can be attributed to differences in IQ_c(School mean centered IQ) children attending different schools.

1.3 anova

anova(m0, m1)

## refitting model(s) with ML (instead of REML)

## Data: dta
## Models:
## m0: Language ~ (1 | School)
## m1: Language ~ IQ_c + (1 | School)
##    Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m0  3 16259 16276 -8126.6    16253                             
## m1  4 15260 15283 -7625.9    15252 1001.4      1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2 Hw2

2.1 load data and see it

pacman::p_load(foreign,tidyverse, lme4, merTools)
fLoc <- "D:/sheu/eg_hlm.sav"

# read spss format and covert it to data frame 
dta2 <- read.spss(fLoc) %>% as.data.frame


# check data structure
str(dta2)

## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...

2.2 plot

# set black-and-white theme
theme_set(theme_bw())

#
ggplot(dta2, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

2.3 model

# nested specification with random intercepts only
# fixed-effect for slope
summary(m21 <- lmer(math ~ year + (1 | school/cid), data = dta2))

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta2
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031

# random slopes
summary(m22 <- lmer(math ~ ( year | school/cid), data = dta2))

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta2
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64127  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92436  0.9614       
##             year        0.60793  0.7797   0.92
##  Residual               0.30115  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64424    0.05462  -30.11
## convergence code: 0
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)

# compare models
anova(m21, m22)

## refitting model(s) with ML (instead of REML)

## Data: dta2
## Models:
## m21: math ~ year + (1 | school/cid)
## m22: math ~ (year | school/cid)
##     Df   AIC   BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## m21  5 16757 16792 -8373.5    16747                            
## m22  8 16566 16621 -8275.0    16550 197.1      3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# merTools
# estimate random-effects parameters by simulation
m2_re <- REsim(m22)

# plot for random effects
plotREsim(m2_re)

# residual plots
plot(m21, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

#
plot(m22, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

# The end

Hw

tjlee

2020-10-12