library

pkgs <- c("nlme", "dplyr", "magrittr", "tidyr", "ggplot2", "lmtest","nlme","HLMdiag","alr4","lme4")
lapply(pkgs, library, character.only = TRUE)

Question 1

dta1 <- read.csv("http://www-personal.umich.edu/~bwest/classroom.csv",header = T,sep = ",")
dta1_group <- group_by(dta1,schoolid) %>% summarise(mean(ses))
dta1 %<>% left_join(dta1_group,by = "schoolid")
colnames(dta1)[13] <- c("mean_ses")

##Histogram
ot <- theme_set(theme_bw())
bw <- 2*IQR(dta1$mathkind)/(dim(dta1)[1]^(1/3))

ggplot(dta1, aes(mathkind)) +
  stat_bin(aes(fill = ..count..), binwidth = bw) +
  labs(x = "Math scores in the spring of kindergarten ")

##box plots
ggplot(dta1,
       aes(reorder(schoolid, mathkind, mean), mathkind, color = mean_ses)) +
  geom_boxplot() +
  geom_point(size = 0.6) +
  labs(x = "School ID", y = "Math scores")+
  theme(axis.text.x = element_text(size = 6))+
  ggtitle("Schools boxplot with Mean_ses")

##model 1
summary(r1_1 <- lmer(mathgain ~ mathkind+sex+minority+ses+(1 | schoolid)
                   +(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) +  
##     (1 | schoolid:classid)
##    Data: dta1
## 
## REML criterion at convergence: 11385.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.8257 -0.6110 -0.0337  0.5538  4.2678 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  schoolid:classid (Intercept)  83.28    9.126  
##  schoolid         (Intercept)  75.20    8.672  
##  Residual                     734.57   27.103  
## Number of obs: 1190, groups:  schoolid:classid, 312; schoolid, 107
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 282.79034   10.85323  26.056
## mathkind     -0.46980    0.02227 -21.100
## sex          -1.25119    1.65773  -0.755
## minority     -8.26213    2.34011  -3.531
## ses           5.34638    1.24109   4.308
## 
## Correlation of Fixed Effects:
##          (Intr) mthknd sex    minrty
## mathkind -0.978                     
## sex      -0.044 -0.031              
## minority -0.307  0.163 -0.018       
## ses       0.140 -0.168  0.019  0.163
confint(r1_1, oldNames = FALSE)
## Computing profile confidence intervals ...
##                                       2.5 %      97.5 %
## sd_(Intercept)|schoolid:classid   5.6783458  12.1890239
## sd_(Intercept)|schoolid           5.4292074  11.4919445
## sigma                            25.8529040  28.3580141
## (Intercept)                     261.4196447 304.0236327
## mathkind                         -0.5133662  -0.4259398
## sex                              -4.4952285   1.9988740
## minority                        -12.8443439  -3.6777648
## ses                               2.9110198   7.7744855
##model 2
summary(r1_2 <- lmer(mathgain ~ (1 | schoolid)
                   +(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | schoolid) + (1 | schoolid:classid)
##    Data: dta1
## 
## REML criterion at convergence: 11768.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6441 -0.5984 -0.0336  0.5334  5.6335 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  schoolid:classid (Intercept)   99.23   9.961  
##  schoolid         (Intercept)   77.49   8.803  
##  Residual                     1028.23  32.066  
## Number of obs: 1190, groups:  schoolid:classid, 312; schoolid, 107
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   57.427      1.443   39.79
confint(r1_2, oldNames = FALSE)
## Computing profile confidence intervals ...
##                                     2.5 %   97.5 %
## sd_(Intercept)|schoolid:classid  5.155825 13.87863
## sd_(Intercept)|schoolid          4.434806 12.23891
## sigma                           30.622299 33.62319
## (Intercept)                     54.577338 60.26542
##model 3
dta1$sex <- as.factor(dta1$sex)
dta1$minority <- as.factor(dta1$minority)

summary(r1_3 <- lmer(mathgain ~ mathkind+sex+minority+ses+(1 | schoolid)
                     +(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) +  
##     (1 | schoolid:classid)
##    Data: dta1
## 
## REML criterion at convergence: 11385.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.8257 -0.6110 -0.0337  0.5538  4.2678 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  schoolid:classid (Intercept)  83.28    9.126  
##  schoolid         (Intercept)  75.20    8.672  
##  Residual                     734.57   27.103  
## Number of obs: 1190, groups:  schoolid:classid, 312; schoolid, 107
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 282.79034   10.85323  26.056
## mathkind     -0.46980    0.02227 -21.100
## sex1         -1.25119    1.65773  -0.755
## minority1    -8.26213    2.34011  -3.531
## ses           5.34638    1.24109   4.308
## 
## Correlation of Fixed Effects:
##           (Intr) mthknd sex1   mnrty1
## mathkind  -0.978                     
## sex1      -0.044 -0.031              
## minority1 -0.307  0.163 -0.018       
## ses        0.140 -0.168  0.019  0.163
confint(r1_3, oldNames = FALSE)
## Computing profile confidence intervals ...
##                                       2.5 %      97.5 %
## sd_(Intercept)|schoolid:classid   5.6783458  12.1890239
## sd_(Intercept)|schoolid           5.4292074  11.4919445
## sigma                            25.8529040  28.3580141
## (Intercept)                     261.4196447 304.0236327
## mathkind                         -0.5133662  -0.4259398
## sex1                             -4.4952285   1.9988740
## minority1                       -12.8443439  -3.6777648
## ses                               2.9110198   7.7744855
# residual plots(model 1)
plot(r1_1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

# residual plots(model 2)
plot(r1_2, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

# residual plots(model 2)
plot(r1_3, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

Question2

dta2 <- read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/language_arith.csv",header = T,sep = ",")
str(dta2)
## 'data.frame':    2287 obs. of  4 variables:
##  $ School    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil     : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ Language  : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ Arithmetic: int  24 19 24 26 9 13 13 30 23 22 ...
cor(dta2[,c(3,4)])
##            Language Arithmetic
## Language   1.000000   0.694228
## Arithmetic 0.694228   1.000000
dta2$School <- as.factor(dta2$School)
dta2$Pupil <- as.factor(dta2$Pupil)
dta2_m <- dta2 %>% 
  group_by(School) %>% 
  mutate( ave_lan = mean(Language),ave_ari = mean(Arithmetic)) %>% 
  select( School, ave_lan, ave_ari ) %>%  
  unique %>%
  as.data.frame


##ggplot
ggplot() +
  geom_point(data = dta2,
             aes(x = Arithmetic, y = Language), col = "cyan") +
  stat_smooth(data = dta2, method = "lm", se = FALSE,
              aes(x = Arithmetic, y = Language), col = "cyan") +
  geom_point(data = dta2_m,
             aes(x = ave_ari, y =ave_lan), col = "blue") +
  stat_smooth(data = dta2_m, method = "lm", se = FALSE,
              aes(x = ave_ari, y = ave_lan), col = "blue") +
  theme_bw()

##lmer
summary(r2_1 <- lmer(Language ~ (1 | School), data = dta2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
##    Data: dta2
## 
## REML criterion at convergence: 16253.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11618 -0.65703  0.07597  0.74128  2.50639 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 19.63    4.431   
##  Residual             64.56    8.035   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  40.3624     0.4282   94.26
confint(r2_1, oldNames = FALSE)
## Computing profile confidence intervals ...
##                           2.5 %    97.5 %
## sd_(Intercept)|School  3.772867  5.162595
## sigma                  7.800685  8.282162
## (Intercept)           39.513798 41.201851
summary(r2_2 <- lmer(Arithmetic ~ (1 | School), data = dta2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Arithmetic ~ (1 | School)
##    Data: dta2
## 
## REML criterion at convergence: 14695.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.80522 -0.70245  0.08321  0.75577  2.68341 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 12.62    3.552   
##  Residual             32.28    5.682   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  18.9473     0.3365   56.31
confint(r2_2, oldNames = FALSE)
## Computing profile confidence intervals ...
##                           2.5 %    97.5 %
## sd_(Intercept)|School  3.063710  4.100567
## sigma                  5.516162  5.855927
## (Intercept)           18.282054 19.607176
# residual plots(model 1)
plot(r2_1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

# residual plots(model 2)
plot(r2_2, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

##Question 4

dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/course_eval.txt",header = T)
str(dta4)
## 'data.frame':    463 obs. of  7 variables:
##  $ Score   : num  4.3 4.5 3.7 4.3 4.4 4.2 4 3.4 4.5 3.9 ...
##  $ Beauty  : num  0.202 -0.826 -0.66 -0.766 1.421 ...
##  $ Gender  : int  1 0 0 1 1 0 1 1 1 0 ...
##  $ Age     : int  36 59 51 40 31 62 33 51 33 47 ...
##  $ Minority: int  1 0 0 0 0 0 0 0 0 0 ...
##  $ Tenure  : int  0 1 1 1 0 1 0 1 0 0 ...
##  $ Course  : int  3 0 4 2 0 0 4 0 0 4 ...
dta4$Gender <- as.factor(dta4$Gender)
dta4$Minority <- as.factor(dta4$Minority)
dta4$Tenure <- as.factor(dta4$Tenure)
dta4$Course <- as.factor(dta4$Course)
dta4_m <- dta4 %>% 
  group_by(Course) %>% 
  mutate( ave_sco = mean(Score),ave_bea = mean(Beauty)) %>% 
  select( Course, ave_sco, ave_bea ) %>%  
  unique %>%
  as.data.frame

##ggplot
ggplot()+
  geom_point(data = dta4, aes(x = Beauty, y = Score), col = "blue") +
  stat_smooth(data = dta4_m, method = "lm", se = F,
              aes(x = ave_bea, y = ave_sco), col = "gray50") +
  labs(x = "Beauty score", y = "Average teaching evaluation")+
  theme_bw()

##ggplot individual
ggplot(data = dta4, aes(x = Beauty, y = Score, group = Course)) +
  geom_point(col = "blue", shape = 21) +
  stat_smooth(data = dta4, method = "lm", se = FALSE,
              aes(x = Beauty, y = Score, group = Course), col = "black") +
  facet_wrap(~ Course, ncol = 6) +
  labs(x = "Beauty score", y = "Average teaching evaluation")+
  theme_bw()

##lmer
summary(r4 <- lmer(data = dta4, Score ~ Beauty + Age + Gender + Minority +
                     Tenure + (1|Course) -1)
)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course) -      1
##    Data: dta4
## 
## REML criterion at convergence: 749.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6110 -0.6666  0.0876  0.7275  2.0388 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Course   (Intercept) 0.07618  0.2760  
##  Residual             0.26282  0.5127  
## Number of obs: 463, groups:  Course, 31
## 
## Fixed effects:
##            Estimate Std. Error t value
## Beauty     0.131483   0.033863   3.883
## Age       -0.003687   0.002972  -1.240
## Gender0    4.352253   0.159658  27.260
## Gender1    4.120128   0.151891  27.126
## Minority1 -0.127408   0.075157  -1.695
## Tenure1   -0.091118   0.054955  -1.658
## 
## Correlation of Fixed Effects:
##           Beauty Age    Gendr0 Gendr1 Mnrty1
## Age        0.265                            
## Gender0   -0.229 -0.869                     
## Gender1   -0.253 -0.853  0.943              
## Minority1  0.035  0.090 -0.174 -0.208       
## Tenure1    0.047 -0.316  0.059  0.110  0.044
confint(r4, oldNames = FALSE)
## Computing profile confidence intervals ...
##                              2.5 %      97.5 %
## sd_(Intercept)|Course  0.143647959 0.414152370
## sigma                  0.477673520 0.546202579
## Beauty                 0.065589892 0.197889011
## Age                   -0.009482271 0.002227762
## Gender0                4.036616248 4.664959749
## Gender1                3.821152922 4.417403842
## Minority1             -0.273645083 0.020371627
## Tenure1               -0.198254748 0.016346769
# residual plots
plot(r4, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

##Question 5

dta5 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/iq_language.txt",header = T)
dta5$School <- as.factor(dta5$School)
str(dta5)
## 'data.frame':    2287 obs. of  9 variables:
##  $ School     : Factor w/ 131 levels "1","2","10","12",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil      : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ IQ         : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ Language   : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ Group_size : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ IQ_c       : num  3.166 2.666 -2.334 -0.834 -3.834 ...
##  $ School_mean: num  -1.51 -1.51 -1.51 -1.51 -1.51 ...
##  $ Group_mean : num  5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 ...
##  $ SES_c      : num  -4.81 -17.81 -12.81 -4.81 -17.81 ...
##lmer
summary(r5 <- lmer(data = dta5, Language ~ IQ_c
                      + (1|School))
)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + (1 | School)
##    Data: dta5
## 
## REML criterion at convergence: 15255.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0939 -0.6375  0.0579  0.7061  3.1448 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  9.602   3.099   
##  Residual             42.245   6.500   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 40.60823    0.30819   131.8
## IQ_c         2.48759    0.07008    35.5
## 
## Correlation of Fixed Effects:
##      (Intr)
## IQ_c 0.018
confint(r5, oldNames = FALSE)
## Computing profile confidence intervals ...
##                           2.5 %    97.5 %
## sd_(Intercept)|School  2.619559  3.628170
## sigma                  6.308665  6.697510
## (Intercept)           39.997753 41.212270
## IQ_c                   2.349851  2.626424
# residual plots
plot(r5, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)