Q1
#mathgainijk(j) = b0j + b1k(j) + β2 × mathkindijk(j) + β3 × sexijk(j)
#+ β4 × minorityijk(j) + β5 × sesijk(j) + εijk(j)
# i is student index, j is school index and k is class index
require(pacman)
## Loading required package: pacman
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(alr3)
## Loading required package: alr3
## Loading required package: car
require(MuMIn)
## Loading required package: MuMIn
require(ggplot2)
## Loading required package: ggplot2
require(WWGbook)
## Loading required package: WWGbook
dta<-classroom
str(dta)
## 'data.frame': 1190 obs. of 12 variables:
## $ sex : int 1 0 1 0 0 1 0 0 1 0 ...
## $ minority: int 1 1 1 1 1 1 1 1 1 1 ...
## $ mathkind: int 448 460 511 449 425 450 452 443 422 480 ...
## $ mathgain: int 32 109 56 83 53 65 51 66 88 -7 ...
## $ ses : num 0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
## $ yearstea: num 1 1 1 2 2 2 2 2 2 2 ...
## $ mathknow: num NA NA NA -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 ...
## $ housepov: num 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 ...
## $ mathprep: num 2 2 2 3.25 3.25 3.25 3.25 3.25 3.25 3.25 ...
## $ classid : int 160 160 160 217 217 217 217 217 217 217 ...
## $ schoolid: int 1 1 1 1 1 1 1 1 1 1 ...
## $ childid : int 1 2 3 4 5 6 7 8 9 10 ...
# lm plot
ggplot(dta, aes(x =mathkind , y = mathgain ,group = classid))+
stat_smooth(method = "lm", se=F , color='skyblue')+ #every class
stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+ #total data
labs(x='mathkind score',y='mathgain score')+
theme_bw()

# lm plot
ggplot(dta, aes(x =mathkind , y = mathgain ,group = schoolid))+
stat_smooth(method = "lm", se=F , color='skyblue')+ #every school
stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+ #total data
labs(x='mathkind score',y='mathgain score')+
theme_bw()

#deal with different intercept among different schools and classes
summary(m0 <- lmer(mathgain~(1|classid)+(1|schoolid)+mathkind+sex+minority+ses, data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | classid) + (1 | schoolid) + mathkind + sex +
## minority + ses
## Data: dta
##
## REML criterion at convergence: 11385.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8257 -0.6110 -0.0337 0.5538 4.2678
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 83.28 9.126
## schoolid (Intercept) 75.20 8.672
## Residual 734.57 27.103
## Number of obs: 1190, groups: classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 282.79034 10.85323 26.056
## mathkind -0.46980 0.02227 -21.100
## sex -1.25119 1.65773 -0.755
## minority -8.26213 2.34011 -3.531
## ses 5.34638 1.24109 4.308
##
## Correlation of Fixed Effects:
## (Intr) mthknd sex minrty
## mathkind -0.978
## sex -0.044 -0.031
## minority -0.307 0.163 -0.018
## ses 0.140 -0.168 0.019 0.163
#Using classese and school to predict
summary(m1 <- lmer(mathgain~(1|schoolid)+(1|classid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | schoolid) + (1 | classid)
## Data: dta
##
## REML criterion at convergence: 11768.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6441 -0.5984 -0.0336 0.5334 5.6335
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 99.23 9.961
## schoolid (Intercept) 77.49 8.803
## Residual 1028.23 32.066
## Number of obs: 1190, groups: classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 57.427 1.443 39.79
fs<-as.data.frame(c(AIC(m0,k = 2),AICc(m0,k = 2),BIC(m0)))
row.names(fs)<-c("AIC","AICC","BIC")
names(fs)<-"Fit Statistics"
fs
## Fit Statistics
## AIC 11401.80
## AICC 11401.92
## BIC 11442.46
Q3
#relate a student's English test score to his or her Ravens test score in year one
#Engijk(j) = b0j + b1k(j) + β1 × Ravensij + εijk(j),
#i is student index, j is school index and k is class index.
pacman::p_load(faraway,dplyr)
dta<-jsp
str(dta)
## 'data.frame': 3236 obs. of 9 variables:
## $ school : Factor w/ 49 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ class : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : Factor w/ 2 levels "boy","girl": 2 2 2 1 1 1 1 1 1 1 ...
## $ social : Factor w/ 9 levels "1","2","3","4",..: 9 9 9 2 2 2 2 2 9 9 ...
## $ raven : num 23 23 23 15 15 22 22 22 14 14 ...
## $ id : Factor w/ 1192 levels "1","2","3","4",..: 1 1 1 2 2 3 3 3 4 4 ...
## $ english: num 72 80 39 7 17 88 89 83 12 25 ...
## $ math : num 23 24 23 14 11 36 32 39 24 26 ...
## $ year : num 0 1 2 0 1 0 1 2 0 1 ...
# lm plot
ggplot(dta, aes(x =raven , y = english ,group = school))+
stat_smooth(method = "lm", se=F , color='grey')+ #every school
stat_smooth(aes(group = class ), method = "lm", se=F , color='#56B4E9')+ #every class
stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+ #total data
labs(x='raven score',y='englis score')+
theme_bw()

#class nest within school
summary(mr<-lmer(english~raven+(1|class/school)+(1|school),data=dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: english ~ raven + (1 | class/school) + (1 | school)
## Data: dta
##
## REML criterion at convergence: 29107.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.90145 -0.76284 0.05924 0.78127 2.82146
##
## Random effects:
## Groups Name Variance Std.Dev.
## school:class (Intercept) 40.58 6.370
## school (Intercept) 15.59 3.949
## class (Intercept) 0.00 0.000
## Residual 451.47 21.248
## Number of obs: 3236, groups: school:class, 94; school, 49; class, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 9.24949 2.01350 4.594
## raven 1.69687 0.06988 24.282
##
## Correlation of Fixed Effects:
## (Intr)
## raven -0.873
#How can I deal with the students reapeat measures in the model?
Q5
#how the language test score depends on the pupil's intelligence
#and his family's socio-economic status,
#and on some school variables.
dta<-read.table("iq_language.txt", header = TRUE)
str(dta)
## 'data.frame': 2287 obs. of 9 variables:
## $ School : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ Language : int 46 45 33 46 20 30 30 57 36 36 ...
## $ Group_size : int 29 29 29 29 29 29 29 29 29 29 ...
## $ IQ_c : num 3.166 2.666 -2.334 -0.834 -3.834 ...
## $ School_mean: num -1.51 -1.51 -1.51 -1.51 -1.51 ...
## $ Group_mean : num 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 ...
## $ SES_c : num -4.81 -17.81 -12.81 -4.81 -17.81 ...
summary(ml<-lmer(Language~ IQ_c+ SES_c+ (1|School),dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + SES_c + (1 | School)
## Data: dta
##
## REML criterion at convergence: 15140.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0086 -0.6609 0.0571 0.7075 3.0931
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 9.212 3.035
## Residual 40.049 6.328
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.67563 0.30154 134.89
## IQ_c 2.25325 0.07138 31.57
## SES_c 0.16538 0.01479 11.18
##
## Correlation of Fixed Effects:
## (Intr) IQ_c
## IQ_c 0.011
## SES_c 0.020 -0.293
plot(ml)

#ICC
print(vcml<-VarCorr(ml),comp='Variance')
## Groups Name Variance
## School (Intercept) 9.212
## Residual 40.049
vcml<-as.data.frame(vcml)
vcml[1,4] /sum( vcml [,4] )
## [1] 0.1870047
#Fit Statistics
fs5<-as.data.frame(c(AIC(ml,k = 2),AICc(ml,k = 2),BIC(ml)))
row.names(fs5)<-c("AIC","AICC","BIC")
names(fs5)<-"Fit Statistics"
fs5
## Fit Statistics
## AIC 15150.61
## AICC 15150.63
## BIC 15179.28
Q7
require(lme4)
require(ggplot2)
dta <- read.csv("attitudes2Sci.csv" ,header = T ,sep = ",")
str(dta)
## 'data.frame': 1385 obs. of 7 variables:
## $ State : Factor w/ 2 levels "ACT","NSW": 1 1 1 1 1 1 1 1 1 1 ...
## $ PrivPub: Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ school : int 1 1 1 1 1 1 1 1 1 1 ...
## $ class : int 1 1 1 1 1 1 1 1 1 1 ...
## $ sex : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 2 2 ...
## $ like : int 8 6 5 2 5 6 3 7 6 3 ...
## $ Class : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
summary(dta)
## State PrivPub school class sex
## ACT:1336 private:452 Min. : 1.00 Min. :1.000 f :691
## NSW: 49 public :933 1st Qu.:13.00 1st Qu.:1.000 m :692
## Median :21.00 Median :1.000 NA's: 2
## Mean :21.16 Mean :1.527
## 3rd Qu.:31.00 3rd Qu.:2.000
## Max. :41.00 Max. :4.000
## like Class
## Min. :1.000 Min. : 1.10
## 1st Qu.:4.000 1st Qu.:13.10
## Median :5.000 Median :21.10
## Mean :5.082 Mean :21.31
## 3rd Qu.:6.000 3rd Qu.:31.10
## Max. :8.000 Max. :41.20
dta[c(3,4,7)]<-lapply(dta[c(3,4,7)], as.factor)
#plot 41 schools
ggplot(dta,aes(x=class ,y=like )) +
geom_point() +
stat_smooth(aes(group = 1), method = "lm", se = F, col = "skyblue") +
facet_wrap( ~ school)

summary(ma1 <- lmer(like~sex+PrivPub+class+(1|State)+(1|PrivPub)+(1|school)+(1|school/class), data = dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Linear mixed model fit by REML ['lmerMod']
## Formula: like ~ sex + PrivPub + class + (1 | State) + (1 | PrivPub) +
## (1 | school) + (1 | school/class)
## Data: dta
##
## REML criterion at convergence: 5546.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6808 -0.6828 0.1369 0.6905 2.5788
##
## Random effects:
## Groups Name Variance Std.Dev.
## class.school (Intercept) 0.3277 0.5724
## school (Intercept) 0.0000 0.0000
## school.1 (Intercept) 0.0000 0.0000
## PrivPub (Intercept) 0.2058 0.4536
## State (Intercept) 0.0000 0.0000
## Residual 3.0523 1.7471
## Number of obs: 1383, groups:
## class:school, 66; school, 41; PrivPub, 2; State, 2
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.64314 0.48848 9.505
## sexm 0.18234 0.09842 1.853
## PrivPubpublic 0.40395 0.66925 0.604
## class2 0.28282 0.20233 1.398
## class3 0.13431 0.33898 0.396
## class4 0.02178 0.41352 0.053
##
## Correlation of Fixed Effects:
## (Intr) sexm PrvPbp class2 class3
## sexm -0.096
## PrivPubpblc -0.709 0.000
## class2 -0.116 0.000 -0.006
## class3 -0.091 -0.050 0.024 0.174
## class4 -0.104 -0.039 0.049 0.140 0.102
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(ma2 <- lmer(like~(1|school), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: like ~ (1 | school)
## Data: dta
##
## REML criterion at convergence: 5595.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5168 -0.6764 0.1319 0.6860 1.8570
##
## Random effects:
## Groups Name Variance Std.Dev.
## school (Intercept) 0.1894 0.4352
## Residual 3.2228 1.7952
## Number of obs: 1385, groups: school, 41
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.1346 0.0867 59.22