Q1

#mathgainijk(j) = b0j + b1k(j) + β2 × mathkindijk(j) + β3 × sexijk(j) 
                     #+ β4 × minorityijk(j) + β5 × sesijk(j) + εijk(j)
        # i is student index, j is school index and k is class index
require(pacman)
## Loading required package: pacman
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(alr3)
## Loading required package: alr3
## Loading required package: car
require(MuMIn)
## Loading required package: MuMIn
require(ggplot2)
## Loading required package: ggplot2
require(WWGbook)
## Loading required package: WWGbook
dta<-classroom
str(dta)
## 'data.frame':    1190 obs. of  12 variables:
##  $ sex     : int  1 0 1 0 0 1 0 0 1 0 ...
##  $ minority: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ mathkind: int  448 460 511 449 425 450 452 443 422 480 ...
##  $ mathgain: int  32 109 56 83 53 65 51 66 88 -7 ...
##  $ ses     : num  0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
##  $ yearstea: num  1 1 1 2 2 2 2 2 2 2 ...
##  $ mathknow: num  NA NA NA -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 ...
##  $ housepov: num  0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 ...
##  $ mathprep: num  2 2 2 3.25 3.25 3.25 3.25 3.25 3.25 3.25 ...
##  $ classid : int  160 160 160 217 217 217 217 217 217 217 ...
##  $ schoolid: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ childid : int  1 2 3 4 5 6 7 8 9 10 ...
# lm plot
ggplot(dta, aes(x =mathkind , y = mathgain  ,group = classid))+
  stat_smooth(method = "lm", se=F , color='skyblue')+                 #every class
  stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+   #total data
  labs(x='mathkind score',y='mathgain score')+
  theme_bw()

# lm plot
ggplot(dta, aes(x =mathkind , y = mathgain  ,group = schoolid))+
  stat_smooth(method = "lm", se=F , color='skyblue')+                 #every school
  stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+   #total data
  labs(x='mathkind score',y='mathgain score')+
  theme_bw()

#deal with different intercept among different schools and classes
summary(m0 <- lmer(mathgain~(1|classid)+(1|schoolid)+mathkind+sex+minority+ses, data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | classid) + (1 | schoolid) + mathkind + sex +  
##     minority + ses
##    Data: dta
## 
## REML criterion at convergence: 11385.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.8257 -0.6110 -0.0337  0.5538  4.2678 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept)  83.28    9.126  
##  schoolid (Intercept)  75.20    8.672  
##  Residual             734.57   27.103  
## Number of obs: 1190, groups:  classid, 312; schoolid, 107
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 282.79034   10.85323  26.056
## mathkind     -0.46980    0.02227 -21.100
## sex          -1.25119    1.65773  -0.755
## minority     -8.26213    2.34011  -3.531
## ses           5.34638    1.24109   4.308
## 
## Correlation of Fixed Effects:
##          (Intr) mthknd sex    minrty
## mathkind -0.978                     
## sex      -0.044 -0.031              
## minority -0.307  0.163 -0.018       
## ses       0.140 -0.168  0.019  0.163
#Using classese and school to predict
summary(m1 <- lmer(mathgain~(1|schoolid)+(1|classid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | schoolid) + (1 | classid)
##    Data: dta
## 
## REML criterion at convergence: 11768.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6441 -0.5984 -0.0336  0.5334  5.6335 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept)   99.23   9.961  
##  schoolid (Intercept)   77.49   8.803  
##  Residual             1028.23  32.066  
## Number of obs: 1190, groups:  classid, 312; schoolid, 107
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   57.427      1.443   39.79
fs<-as.data.frame(c(AIC(m0,k = 2),AICc(m0,k = 2),BIC(m0)))
row.names(fs)<-c("AIC","AICC","BIC")
names(fs)<-"Fit Statistics"
fs
##      Fit Statistics
## AIC        11401.80
## AICC       11401.92
## BIC        11442.46

Q3

#relate a student's English test score to his or her Ravens test score in year one
#Engijk(j) = b0j + b1k(j) + β1 × Ravensij + εijk(j),
#i is student index, j is school index and k is class index. 

pacman::p_load(faraway,dplyr)
dta<-jsp
str(dta)
## 'data.frame':    3236 obs. of  9 variables:
##  $ school : Factor w/ 49 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ class  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender : Factor w/ 2 levels "boy","girl": 2 2 2 1 1 1 1 1 1 1 ...
##  $ social : Factor w/ 9 levels "1","2","3","4",..: 9 9 9 2 2 2 2 2 9 9 ...
##  $ raven  : num  23 23 23 15 15 22 22 22 14 14 ...
##  $ id     : Factor w/ 1192 levels "1","2","3","4",..: 1 1 1 2 2 3 3 3 4 4 ...
##  $ english: num  72 80 39 7 17 88 89 83 12 25 ...
##  $ math   : num  23 24 23 14 11 36 32 39 24 26 ...
##  $ year   : num  0 1 2 0 1 0 1 2 0 1 ...
# lm plot
ggplot(dta, aes(x =raven , y = english ,group = school))+
  stat_smooth(method = "lm", se=F , color='grey')+                         #every school
  stat_smooth(aes(group = class ), method = "lm", se=F , color='#56B4E9')+   #every class
  stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+        #total data
  labs(x='raven score',y='englis score')+
  theme_bw()

#class nest within school
summary(mr<-lmer(english~raven+(1|class/school)+(1|school),data=dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: english ~ raven + (1 | class/school) + (1 | school)
##    Data: dta
## 
## REML criterion at convergence: 29107.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.90145 -0.76284  0.05924  0.78127  2.82146 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  school:class (Intercept)  40.58    6.370  
##  school       (Intercept)  15.59    3.949  
##  class        (Intercept)   0.00    0.000  
##  Residual                 451.47   21.248  
## Number of obs: 3236, groups:  school:class, 94; school, 49; class, 4
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  9.24949    2.01350   4.594
## raven        1.69687    0.06988  24.282
## 
## Correlation of Fixed Effects:
##       (Intr)
## raven -0.873
#How can I deal with the students reapeat measures in the model?

Q5

#how the language test score depends on the pupil's intelligence 
                                        #and his family's socio-economic status,
                                        #and on some school variables.

dta<-read.table("iq_language.txt", header = TRUE)
str(dta)
## 'data.frame':    2287 obs. of  9 variables:
##  $ School     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil      : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ IQ         : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ Language   : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ Group_size : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ IQ_c       : num  3.166 2.666 -2.334 -0.834 -3.834 ...
##  $ School_mean: num  -1.51 -1.51 -1.51 -1.51 -1.51 ...
##  $ Group_mean : num  5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 ...
##  $ SES_c      : num  -4.81 -17.81 -12.81 -4.81 -17.81 ...
summary(ml<-lmer(Language~ IQ_c+ SES_c+ (1|School),dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + SES_c + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 15140.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0086 -0.6609  0.0571  0.7075  3.0931 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  9.212   3.035   
##  Residual             40.049   6.328   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 40.67563    0.30154  134.89
## IQ_c         2.25325    0.07138   31.57
## SES_c        0.16538    0.01479   11.18
## 
## Correlation of Fixed Effects:
##       (Intr) IQ_c  
## IQ_c   0.011       
## SES_c  0.020 -0.293
plot(ml)

#ICC
print(vcml<-VarCorr(ml),comp='Variance')
##  Groups   Name        Variance
##  School   (Intercept)  9.212  
##  Residual             40.049
vcml<-as.data.frame(vcml)
vcml[1,4] /sum( vcml [,4] )
## [1] 0.1870047
#Fit Statistics
fs5<-as.data.frame(c(AIC(ml,k = 2),AICc(ml,k = 2),BIC(ml)))
row.names(fs5)<-c("AIC","AICC","BIC")
names(fs5)<-"Fit Statistics"
fs5
##      Fit Statistics
## AIC        15150.61
## AICC       15150.63
## BIC        15179.28

Q7

require(lme4)
require(ggplot2)

dta <- read.csv("attitudes2Sci.csv" ,header = T ,sep = ",")
str(dta)
## 'data.frame':    1385 obs. of  7 variables:
##  $ State  : Factor w/ 2 levels "ACT","NSW": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PrivPub: Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ school : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ class  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ sex    : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 2 2 ...
##  $ like   : int  8 6 5 2 5 6 3 7 6 3 ...
##  $ Class  : num  1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
summary(dta)
##  State         PrivPub        school          class         sex     
##  ACT:1336   private:452   Min.   : 1.00   Min.   :1.000   f   :691  
##  NSW:  49   public :933   1st Qu.:13.00   1st Qu.:1.000   m   :692  
##                           Median :21.00   Median :1.000   NA's:  2  
##                           Mean   :21.16   Mean   :1.527             
##                           3rd Qu.:31.00   3rd Qu.:2.000             
##                           Max.   :41.00   Max.   :4.000             
##       like           Class      
##  Min.   :1.000   Min.   : 1.10  
##  1st Qu.:4.000   1st Qu.:13.10  
##  Median :5.000   Median :21.10  
##  Mean   :5.082   Mean   :21.31  
##  3rd Qu.:6.000   3rd Qu.:31.10  
##  Max.   :8.000   Max.   :41.20
dta[c(3,4,7)]<-lapply(dta[c(3,4,7)], as.factor)

#plot 41 schools
ggplot(dta,aes(x=class ,y=like )) + 
  geom_point() +
  stat_smooth(aes(group = 1), method = "lm", se = F, col = "skyblue") +
  facet_wrap( ~ school) 

summary(ma1 <- lmer(like~sex+PrivPub+class+(1|State)+(1|PrivPub)+(1|school)+(1|school/class), data = dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge: degenerate Hessian with 1 negative
## eigenvalues
## Linear mixed model fit by REML ['lmerMod']
## Formula: like ~ sex + PrivPub + class + (1 | State) + (1 | PrivPub) +  
##     (1 | school) + (1 | school/class)
##    Data: dta
## 
## REML criterion at convergence: 5546.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6808 -0.6828  0.1369  0.6905  2.5788 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  class.school (Intercept) 0.3277   0.5724  
##  school       (Intercept) 0.0000   0.0000  
##  school.1     (Intercept) 0.0000   0.0000  
##  PrivPub      (Intercept) 0.2058   0.4536  
##  State        (Intercept) 0.0000   0.0000  
##  Residual                 3.0523   1.7471  
## Number of obs: 1383, groups:  
## class:school, 66; school, 41; PrivPub, 2; State, 2
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)    4.64314    0.48848   9.505
## sexm           0.18234    0.09842   1.853
## PrivPubpublic  0.40395    0.66925   0.604
## class2         0.28282    0.20233   1.398
## class3         0.13431    0.33898   0.396
## class4         0.02178    0.41352   0.053
## 
## Correlation of Fixed Effects:
##             (Intr) sexm   PrvPbp class2 class3
## sexm        -0.096                            
## PrivPubpblc -0.709  0.000                     
## class2      -0.116  0.000 -0.006              
## class3      -0.091 -0.050  0.024  0.174       
## class4      -0.104 -0.039  0.049  0.140  0.102
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
summary(ma2 <- lmer(like~(1|school), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: like ~ (1 | school)
##    Data: dta
## 
## REML criterion at convergence: 5595.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5168 -0.6764  0.1319  0.6860  1.8570 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  school   (Intercept) 0.1894   0.4352  
##  Residual             3.2228   1.7952  
## Number of obs: 1385, groups:  school, 41
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   5.1346     0.0867   59.22