pkgs <- c("nlme", "dplyr", "magrittr", "tidyr", "ggplot2", "lmtest","nlme","HLMdiag","alr4","lme4")
lapply(pkgs, library, character.only = TRUE)
dta1 <- read.csv("http://www-personal.umich.edu/~bwest/classroom.csv",header = T,sep = ",")
dta1_group <- group_by(dta1,schoolid) %>% summarise(mean(ses))
dta1 %<>% left_join(dta1_group,by = "schoolid")
colnames(dta1)[13] <- c("mean_ses")
##Histogram
ot <- theme_set(theme_bw())
bw <- 2*IQR(dta1$mathkind)/(dim(dta1)[1]^(1/3))
ggplot(dta1, aes(mathkind)) +
stat_bin(aes(fill = ..count..), binwidth = bw) +
labs(x = "Math scores in the spring of kindergarten ")
##box plots
ggplot(dta1,
aes(reorder(schoolid, mathkind, mean), mathkind, color = mean_ses)) +
geom_boxplot() +
geom_point(size = 0.6) +
labs(x = "School ID", y = "Math scores")+
theme(axis.text.x = element_text(size = 6))+
ggtitle("Schools boxplot with Mean_ses")
##model 1
summary(r1_1 <- lmer(mathgain ~ mathkind+sex+minority+ses+(1 | schoolid)
+(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) +
## (1 | schoolid:classid)
## Data: dta1
##
## REML criterion at convergence: 11385.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8257 -0.6110 -0.0337 0.5538 4.2678
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid:classid (Intercept) 83.28 9.126
## schoolid (Intercept) 75.20 8.672
## Residual 734.57 27.103
## Number of obs: 1190, groups: schoolid:classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 282.79034 10.85323 26.056
## mathkind -0.46980 0.02227 -21.100
## sex -1.25119 1.65773 -0.755
## minority -8.26213 2.34011 -3.531
## ses 5.34638 1.24109 4.308
##
## Correlation of Fixed Effects:
## (Intr) mthknd sex minrty
## mathkind -0.978
## sex -0.044 -0.031
## minority -0.307 0.163 -0.018
## ses 0.140 -0.168 0.019 0.163
confint(r1_1, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|schoolid:classid 5.6783458 12.1890239
## sd_(Intercept)|schoolid 5.4292074 11.4919445
## sigma 25.8529040 28.3580141
## (Intercept) 261.4196447 304.0236327
## mathkind -0.5133662 -0.4259398
## sex -4.4952285 1.9988740
## minority -12.8443439 -3.6777648
## ses 2.9110198 7.7744855
##model 2
summary(r1_2 <- lmer(mathgain ~ (1 | schoolid)
+(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ (1 | schoolid) + (1 | schoolid:classid)
## Data: dta1
##
## REML criterion at convergence: 11768.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6441 -0.5984 -0.0336 0.5334 5.6335
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid:classid (Intercept) 99.23 9.961
## schoolid (Intercept) 77.49 8.803
## Residual 1028.23 32.066
## Number of obs: 1190, groups: schoolid:classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 57.427 1.443 39.79
confint(r1_2, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|schoolid:classid 5.155825 13.87863
## sd_(Intercept)|schoolid 4.434806 12.23891
## sigma 30.622299 33.62319
## (Intercept) 54.577338 60.26542
##model 3
dta1$sex <- as.factor(dta1$sex)
dta1$minority <- as.factor(dta1$minority)
summary(r1_3 <- lmer(mathgain ~ mathkind+sex+minority+ses+(1 | schoolid)
+(1 | schoolid:classid), data = dta1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) +
## (1 | schoolid:classid)
## Data: dta1
##
## REML criterion at convergence: 11385.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8257 -0.6110 -0.0337 0.5538 4.2678
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid:classid (Intercept) 83.28 9.126
## schoolid (Intercept) 75.20 8.672
## Residual 734.57 27.103
## Number of obs: 1190, groups: schoolid:classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 282.79034 10.85323 26.056
## mathkind -0.46980 0.02227 -21.100
## sex1 -1.25119 1.65773 -0.755
## minority1 -8.26213 2.34011 -3.531
## ses 5.34638 1.24109 4.308
##
## Correlation of Fixed Effects:
## (Intr) mthknd sex1 mnrty1
## mathkind -0.978
## sex1 -0.044 -0.031
## minority1 -0.307 0.163 -0.018
## ses 0.140 -0.168 0.019 0.163
confint(r1_3, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|schoolid:classid 5.6783458 12.1890239
## sd_(Intercept)|schoolid 5.4292074 11.4919445
## sigma 25.8529040 28.3580141
## (Intercept) 261.4196447 304.0236327
## mathkind -0.5133662 -0.4259398
## sex1 -4.4952285 1.9988740
## minority1 -12.8443439 -3.6777648
## ses 2.9110198 7.7744855
# residual plots(model 1)
plot(r1_1, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
# residual plots(model 2)
plot(r1_2, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
# residual plots(model 2)
plot(r1_3, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
dta2 <- read.csv("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/language_arith.csv",header = T,sep = ",")
str(dta2)
## 'data.frame': 2287 obs. of 4 variables:
## $ School : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ Language : int 46 45 33 46 20 30 30 57 36 36 ...
## $ Arithmetic: int 24 19 24 26 9 13 13 30 23 22 ...
cor(dta2[,c(3,4)])
## Language Arithmetic
## Language 1.000000 0.694228
## Arithmetic 0.694228 1.000000
dta2$School <- as.factor(dta2$School)
dta2$Pupil <- as.factor(dta2$Pupil)
dta2_m <- dta2 %>%
group_by(School) %>%
mutate( ave_lan = mean(Language),ave_ari = mean(Arithmetic)) %>%
select( School, ave_lan, ave_ari ) %>%
unique %>%
as.data.frame
##ggplot
ggplot() +
geom_point(data = dta2,
aes(x = Arithmetic, y = Language), col = "cyan") +
stat_smooth(data = dta2, method = "lm", se = FALSE,
aes(x = Arithmetic, y = Language), col = "cyan") +
geom_point(data = dta2_m,
aes(x = ave_ari, y =ave_lan), col = "blue") +
stat_smooth(data = dta2_m, method = "lm", se = FALSE,
aes(x = ave_ari, y = ave_lan), col = "blue") +
theme_bw()
##lmer
summary(r2_1 <- lmer(Language ~ (1 | School), data = dta2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
## Data: dta2
##
## REML criterion at convergence: 16253.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.11618 -0.65703 0.07597 0.74128 2.50639
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 19.63 4.431
## Residual 64.56 8.035
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.3624 0.4282 94.26
confint(r2_1, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|School 3.772867 5.162595
## sigma 7.800685 8.282162
## (Intercept) 39.513798 41.201851
summary(r2_2 <- lmer(Arithmetic ~ (1 | School), data = dta2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Arithmetic ~ (1 | School)
## Data: dta2
##
## REML criterion at convergence: 14695.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.80522 -0.70245 0.08321 0.75577 2.68341
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 12.62 3.552
## Residual 32.28 5.682
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 18.9473 0.3365 56.31
confint(r2_2, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|School 3.063710 4.100567
## sigma 5.516162 5.855927
## (Intercept) 18.282054 19.607176
# residual plots(model 1)
plot(r2_1, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
# residual plots(model 2)
plot(r2_2, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
##Question 4
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/course_eval.txt",header = T)
str(dta4)
## 'data.frame': 463 obs. of 7 variables:
## $ Score : num 4.3 4.5 3.7 4.3 4.4 4.2 4 3.4 4.5 3.9 ...
## $ Beauty : num 0.202 -0.826 -0.66 -0.766 1.421 ...
## $ Gender : int 1 0 0 1 1 0 1 1 1 0 ...
## $ Age : int 36 59 51 40 31 62 33 51 33 47 ...
## $ Minority: int 1 0 0 0 0 0 0 0 0 0 ...
## $ Tenure : int 0 1 1 1 0 1 0 1 0 0 ...
## $ Course : int 3 0 4 2 0 0 4 0 0 4 ...
dta4$Gender <- as.factor(dta4$Gender)
dta4$Minority <- as.factor(dta4$Minority)
dta4$Tenure <- as.factor(dta4$Tenure)
dta4$Course <- as.factor(dta4$Course)
dta4_m <- dta4 %>%
group_by(Course) %>%
mutate( ave_sco = mean(Score),ave_bea = mean(Beauty)) %>%
select( Course, ave_sco, ave_bea ) %>%
unique %>%
as.data.frame
##ggplot
ggplot()+
geom_point(data = dta4, aes(x = Beauty, y = Score), col = "blue") +
stat_smooth(data = dta4_m, method = "lm", se = F,
aes(x = ave_bea, y = ave_sco), col = "gray50") +
labs(x = "Beauty score", y = "Average teaching evaluation")+
theme_bw()
##ggplot individual
ggplot(data = dta4, aes(x = Beauty, y = Score, group = Course)) +
geom_point(col = "blue", shape = 21) +
stat_smooth(data = dta4, method = "lm", se = FALSE,
aes(x = Beauty, y = Score, group = Course), col = "black") +
facet_wrap(~ Course, ncol = 6) +
labs(x = "Beauty score", y = "Average teaching evaluation")+
theme_bw()
##lmer
summary(r4 <- lmer(data = dta4, Score ~ Beauty + Age + Gender + Minority +
Tenure + (1|Course) -1)
)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course) - 1
## Data: dta4
##
## REML criterion at convergence: 749.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6110 -0.6666 0.0876 0.7275 2.0388
##
## Random effects:
## Groups Name Variance Std.Dev.
## Course (Intercept) 0.07618 0.2760
## Residual 0.26282 0.5127
## Number of obs: 463, groups: Course, 31
##
## Fixed effects:
## Estimate Std. Error t value
## Beauty 0.131483 0.033863 3.883
## Age -0.003687 0.002972 -1.240
## Gender0 4.352253 0.159658 27.260
## Gender1 4.120128 0.151891 27.126
## Minority1 -0.127408 0.075157 -1.695
## Tenure1 -0.091118 0.054955 -1.658
##
## Correlation of Fixed Effects:
## Beauty Age Gendr0 Gendr1 Mnrty1
## Age 0.265
## Gender0 -0.229 -0.869
## Gender1 -0.253 -0.853 0.943
## Minority1 0.035 0.090 -0.174 -0.208
## Tenure1 0.047 -0.316 0.059 0.110 0.044
confint(r4, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|Course 0.143647959 0.414152370
## sigma 0.477673520 0.546202579
## Beauty 0.065589892 0.197889011
## Age -0.009482271 0.002227762
## Gender0 4.036616248 4.664959749
## Gender1 3.821152922 4.417403842
## Minority1 -0.273645083 0.020371627
## Tenure1 -0.198254748 0.016346769
# residual plots
plot(r4, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
##Question 5
dta5 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/iq_language.txt",header = T)
dta5$School <- as.factor(dta5$School)
str(dta5)
## 'data.frame': 2287 obs. of 9 variables:
## $ School : Factor w/ 131 levels "1","2","10","12",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ Language : int 46 45 33 46 20 30 30 57 36 36 ...
## $ Group_size : int 29 29 29 29 29 29 29 29 29 29 ...
## $ IQ_c : num 3.166 2.666 -2.334 -0.834 -3.834 ...
## $ School_mean: num -1.51 -1.51 -1.51 -1.51 -1.51 ...
## $ Group_mean : num 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 5.9 ...
## $ SES_c : num -4.81 -17.81 -12.81 -4.81 -17.81 ...
##lmer
summary(r5 <- lmer(data = dta5, Language ~ IQ_c
+ (1|School))
)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + (1 | School)
## Data: dta5
##
## REML criterion at convergence: 15255.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0939 -0.6375 0.0579 0.7061 3.1448
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 9.602 3.099
## Residual 42.245 6.500
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.60823 0.30819 131.8
## IQ_c 2.48759 0.07008 35.5
##
## Correlation of Fixed Effects:
## (Intr)
## IQ_c 0.018
confint(r5, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|School 2.619559 3.628170
## sigma 6.308665 6.697510
## (Intercept) 39.997753 41.212270
## IQ_c 2.349851 2.626424
# residual plots
plot(r5, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)