First we load our data
load("~/Google Drive/dem7903_App_Hier/data/eclsk.Rdata")
names(eclsk)<-tolower(names(eclsk))
library (car)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(arm)
## Loading required package: MASS
##
## arm (Version 1.7-05, built: 2014-8-1)
##
## Working directory is /Users/ozd504/Google Drive/dem7903_App_Hier/code/code14
##
##
## Attaching package: 'arm'
##
## The following object is masked from 'package:car':
##
## logit
#get out only the variables I'm going to use for this example
myvars<-c("gender", "kurban_r", "race", "w1povrty", "wkmomed","p2homecm", "p2cover", "p2curmar", "p2sprhhm", "c2r4mtsc", "p2dentis", "s2_id", "s2kpupri","s2kminor" )
#subset the data
eclsk.sub<-eclsk[,myvars]
rm(eclsk); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1290867 69.0 1967602 105.1 1967602 105.1
## Vcells 2363459 18.1 489978910 3738.3 408904644 3119.7
Next, I do some recoding of variables using a mixture of the ifelse() function and the recode () function.
#recode our outcomes, the first is the child's math standardized test score in Kindergarten
eclsk.sub$math<-ifelse(eclsk.sub$c2r4mtsc<0, NA, eclsk.sub$c2r4mtsc)
#the second outcome is whether each child has seen a dentist within the last year
eclsk.sub$dentist<-recode(eclsk.sub$p2dentis, recodes = "2:3=1; -1:-9= NA; else=0")
#First we recode some Child characteristics
#Child's sex: recode as male =1
eclsk.sub$male<-recode(eclsk.sub$gender, recodes="1=1; 2=0; -9=NA")
#Recode race with white, non Hispanic as reference using dummy vars
eclsk.sub$hisp<-recode (eclsk.sub$race, recodes="3:4=1;-9=NA; else=0")
eclsk.sub$black<-recode (eclsk.sub$race, recodes="2=1;-9=NA; else=0")
eclsk.sub$asian<-recode (eclsk.sub$race, recodes="5=1;-9=NA; else=0")
eclsk.sub$nahn<-recode (eclsk.sub$race, recodes="6:7=1;-9=NA; else=0")
eclsk.sub$other<-recode (eclsk.sub$race, recodes="8=1;-9=NA; else=0")
#insurance coverage
eclsk.sub$covered<-recode(eclsk.sub$p2cover, recodes="1=1; -1:-9=NA; else=0")
#Then we recode some parent/mother characteristics
#Mother's education, recode as 2 dummys with HS = reference
eclsk.sub$mlths<-recode(eclsk.sub$wkmomed, recodes = "1:2=1; 3:9=0; else = NA")
eclsk.sub$mgths<-recode(eclsk.sub$wkmomed, recodes = "1:3=0; 4:9=1; else =NA")
#marital status, recode as 2 dummys, ref= married
eclsk.sub$single<-recode(eclsk.sub$p2curmar, recodes="5=1; -7:-9=NA; else=0")
eclsk.sub$notmar<-recode(eclsk.sub$p2curmar, recodes="2:4=1; -7:-9=NA; else=0")
#Then we do some household level variables
#Urban residence = 1
eclsk.sub$urban<-recode(eclsk.sub$kurban_r, recodes = "1:2=1; 3=0")
#poverty level in poverty = 1
eclsk.sub$pov<-recode(eclsk.sub$w1povrty , recodes ="1=1; 2=0")
#Household size
eclsk.sub$hhsize<-eclsk.sub$p2sprhhm
#school is private
eclsk.sub$privsch<-ifelse(eclsk.sub$s2kpupri==1, 1, 0)
#school has >50% minority student body
eclsk.sub$minorsch<-recode(eclsk.sub$s2kminor, recodes="1:3=0; 4:5=1; else=NA" )
#Show the first few lines of the data
head(eclsk.sub)
## gender kurban_r race w1povrty wkmomed p2homecm p2cover p2curmar p2sprhhm
## 1 2 1 1 2 6 1 1 1 4
## 2 2 1 1 2 3 1 1 1 4
## 3 2 1 1 2 9 1 1 1 5
## 4 1 1 1 2 6 1 1 1 4
## 5 2 1 1 2 6 1 1 1 4
## 6 1 1 1 2 8 2 1 1 4
## c2r4mtsc p2dentis s2_id s2kpupri s2kminor math dentist male hisp black
## 1 62.24 2 0001 1 1 62.24 1 0 0 0
## 2 67.12 3 0001 1 1 67.12 1 0 0 0
## 3 49.10 2 0001 1 1 49.10 1 0 0 0
## 4 50.50 2 0001 1 1 50.50 1 1 0 0
## 5 54.24 2 0001 1 1 54.24 1 0 0 0
## 6 51.25 2 0001 1 1 51.25 1 1 0 0
## asian nahn other covered mlths mgths single notmar urban pov hhsize
## 1 0 0 0 1 0 1 0 0 1 0 4
## 2 0 0 0 1 0 0 0 0 1 0 4
## 3 0 0 0 1 0 1 0 0 1 0 5
## 4 0 0 0 1 0 1 0 0 1 0 4
## 5 0 0 0 1 0 1 0 0 1 0 4
## 6 0 0 0 1 0 1 0 0 1 0 4
## privsch minorsch
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
The First model will fit a model that considers the individual and family level variables and a random intercept only
fit1<-lmer(math~male+hisp+black+asian+nahn+other+mlths+mgths+single+notmar+urban+pov+hhsize+(1|s2_id), data=eclsk.sub, REML=T)
display(fit1)
## lmer(formula = math ~ male + hisp + black + asian + nahn + other +
## mlths + mgths + single + notmar + urban + pov + hhsize +
## (1 | s2_id), data = eclsk.sub, REML = T)
## coef.est coef.se
## (Intercept) 52.46 0.40
## male 0.23 0.14
## hisp -3.53 0.24
## black -3.95 0.28
## asian 0.65 0.36
## nahn -2.89 0.53
## other -1.93 0.45
## mlths -2.36 0.26
## mgths 2.64 0.17
## single -1.72 0.25
## notmar -1.23 0.22
## urban 0.88 0.31
## pov -2.41 0.22
## hhsize -0.29 0.06
##
## Error terms:
## Groups Name Std.Dev.
## s2_id (Intercept) 3.18
## Residual 8.26
## ---
## number of obs: 14630, groups: s2_id, 1127
## AIC = 104465, DIC = 104400
## deviance = 104416.7
The second model considers both random intercepts and random slopes, in this case, i’m only considering a random slope for poverty status
fit2<-lmer(math~male+hisp+black+asian+nahn+other+mlths+mgths+single+notmar+urban+pov+hhsize+(1+pov|s2_id), data = eclsk.sub, REML = T)
display(fit2)
## lmer(formula = math ~ male + hisp + black + asian + nahn + other +
## mlths + mgths + single + notmar + urban + pov + hhsize +
## (1 + pov | s2_id), data = eclsk.sub, REML = T)
## coef.est coef.se
## (Intercept) 52.47 0.40
## male 0.23 0.14
## hisp -3.54 0.24
## black -3.92 0.28
## asian 0.66 0.36
## nahn -2.95 0.53
## other -1.94 0.45
## mlths -2.37 0.26
## mgths 2.63 0.17
## single -1.71 0.25
## notmar -1.22 0.22
## urban 0.86 0.31
## pov -2.52 0.22
## hhsize -0.29 0.06
##
## Error terms:
## Groups Name Std.Dev. Corr
## s2_id (Intercept) 3.25
## pov 0.78 -0.60
## Residual 8.26
## ---
## number of obs: 14630, groups: s2_id, 1127
## AIC = 104466, DIC = 104397
## deviance = 104413.6
anova (fit1, fit2)
## refitting model(s) with ML (instead of REML)
## Data: eclsk.sub
## Models:
## fit1: math ~ male + hisp + black + asian + nahn + other + mlths + mgths +
## fit1: single + notmar + urban + pov + hhsize + (1 | s2_id)
## fit2: math ~ male + hisp + black + asian + nahn + other + mlths + mgths +
## fit2: single + notmar + urban + pov + hhsize + (1 + pov | s2_id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit1 16 104449 104570 -52208 104417
## fit2 18 104450 104586 -52207 104414 3.05 2 0.22
In this case, fitting the random slope for poverty isn’t adding anything to the model I interpret this as kids living in housholds below the poverty line face a systematic disadvantage, regardless of school
Now, let’s plot the regression lines for each school!
#get the random effects
rancoefs2<-ranef(fit2)
#here are the first 10 schools random effects
head(rancoefs2$s2_id, n=10)
## (Intercept) pov
## 0001 0.46266 -0.067041
## 0002 -0.96343 0.277943
## 0005 -1.99301 0.202019
## 0006 -0.82630 0.103789
## 0007 -0.00813 0.001178
## 0009 -3.92873 0.489163
## 0010 -4.96850 0.746682
## 0011 -2.75480 0.399181
## 0013 5.60871 -0.812722
## 0014 -0.99225 0.143781
#Histograms
par(mfrow=c(1,2))
hist(rancoefs2$s2_id[,"(Intercept)"], main = "Distribution of Random Intercepts")
hist(rancoefs2$s2_id[,"pov"], main = "Distribution of Random Slopes")
#Plot of random slopes, I iterate over all schools, drawing lines
par(mfrow=c(1,1))
plot(NULL, ylim=c(35, 65), xlim=c(0,1),ylab="Math Score", xlab="HH Poverty Status")
title (main="Random Slope and Intercept Model")
cols=sample(rainbow(n=50), size = dim(rancoefs2$s2_id)[1], replace = T)
for (i in 1: dim(rancoefs2$s2_id)[1]){
abline(a=fixef(fit2)[1]+rancoefs2$s2_id[[1]][i], b=fixef(fit2)[13]+rancoefs2$s2_id[[2]][i], col=cols[i],lwd=.5 )
}
Here is the ICC from fit 2
VarCorr(fit2)
## Groups Name Std.Dev. Corr
## s2_id (Intercept) 3.251
## pov 0.785 -0.60
## Residual 8.260
VarCorr(fit2)$s2_id[1]/((attr(VarCorr(fit1), "sc")^2)+VarCorr(fit2)$s2_id[1]+VarCorr(fit2)$s2_id[4])
## [1] 0.133
#the "sc" attribute is the residual variance