Reproduce the results in homework example using the rmarkdown file and publish it with a web link on the course moodle page.
Kreft and de Leeuw (1998) obtained a sub-sample of students in eighth grade from the National Education Longitudinal Study of 1988 (NELS–88) collected by the National Center for Educational Statistics of the U.S. Department of Education.
The students are nested in schools.
Here, we consider the following subset of the variables:
schid: school identifier
math: continuous measure of achievement in mathematics (standardized to have a mean of 50 and a standard deviation of 10)
homework: number of hours of homework done per week
library(tidyverse)
library(haven)
fL <- "https://stats.idre.ucla.edu/stat/examples/imm/imm10.dta"
dta <- read_dta(fL)
glimpse(dta)
Observations: 260
Variables: 19
$ schid <dbl> 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472,...
$ stuid <dbl> 3, 8, 13, 17, 27, 28, 30, 36, 37, 42, 52, 53, 61, 64,...
$ ses <dbl> -0.13, -0.39, -0.80, -0.72, -0.74, -0.58, -0.83, -0.5...
$ meanses <dbl> -0.48261, -0.48261, -0.48261, -0.48261, -0.48261, -0....
$ homework <dbl> 1, 0, 0, 1, 2, 1, 5, 1, 1, 2, 1, 1, 1, 2, 1, 4, 1, 2,...
$ white <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ parented <dbl> 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 1, 2, 3, 3, 1,...
$ public <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ ratio <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 1...
$ percmin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ math <dbl> 48, 48, 53, 42, 43, 57, 33, 64, 36, 56, 48, 48, 44, 3...
$ sex <dbl> 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2,...
$ race <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
$ sctype <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ cstr <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ scsize <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,...
$ urban <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ region <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ schnum <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
# make schid a factor type
dta$schid <- factor(dta$schid)
coef(m0 <- nlme::lmList(math ~ homework | schid, data=dta))
(Intercept) homework
7472 50.684 -3.5538
7829 49.012 -2.9201
7930 38.750 7.9091
24725 34.394 5.5927
25456 53.939 -4.7184
25642 49.259 -2.4861
62821 59.210 1.0946
68448 36.055 6.4963
68493 38.520 5.8600
72292 37.714 6.3351
ggplot(data=dta, aes(homework, math, color=schid)) +
geom_point(alpha=0.5) +
stat_smooth(aes(group=1), method="lm", formula=y~x) +
stat_smooth(method="lm", formula=y ~ x, se=F) +
labs(x="Homework (hours per week)",
y="Math score") +
guides(color=guide_legend(ncol=2))+
theme_minimal() +
theme(legend.position=c(.9,.3))
library(lme4)
summary(m1 <- lmer(math ~ homework + (homework | schid), data=dta))
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ homework + (homework | schid)
Data: dta
REML criterion at convergence: 1764
Scaled residuals:
Min 1Q Median 3Q Max
-2.5111 -0.5357 0.0175 0.6121 2.5708
Random effects:
Groups Name Variance Std.Dev. Corr
schid (Intercept) 69.3 8.33
homework 22.5 4.74 -0.81
Residual 43.1 6.56
Number of obs: 260, groups: schid, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 44.77 2.74 16.32
homework 2.04 1.55 1.31
Correlation of Fixed Effects:
(Intr)
homework -0.804
library(lattice)
qqmath(ranef(m1))
$schid
plot(m1, resid(., scaled=TRUE) ~ fitted(.) | schid,
xlab="Fitted values", ylab= "Standardized residuals",
abline=0, lty=3)
qqmath(m1, grid=TRUE)
Download the data file from the language and math example first to a data folder. In Rstudio, open the R script and compile a report in html directly or use the command
knitr::spin("foo.R", knit=FALSE)
to render it as an RMD file format first.
library(tidyverse)
dta <- read.csv("~/data/langMath.csv", h=T)
dta_a <- dta %>%
group_by(School) %>%
summarize(ave_lang = mean(Lang, na.rm=TRUE),
ave_arith = mean(Arith, na.rm=TRUE))
ggplot(data=dta, aes(x=Arith, y=Lang)) +
geom_point(color="skyblue") +
stat_smooth(method="lm", formula=y ~ x, se=F, col="skyblue") +
geom_point(data=dta_a, aes(ave_arith, ave_lang), color="steelblue") +
stat_smooth(data=dta_a, aes(ave_arith, ave_lang),
method="lm", formula= y ~ x, se=F, color="steelblue") +
labs(x="Arithmetic score",
y="Language score") +
theme_bw()
Draw a scatter plot of the variable normexam against the variable standLRT from the data set Exam{mlmRev}. Superimpose on it another scatter plot using the means of the same two variables by school. You can start by editing this rmarkdown template in RStudio before submitting the output to moodle.
Please see the result in here
Edit this rmarkdown template in RStudio to reproduce the following histogram for correlation coefficients between written and course variables by school from the data set Gcsemv{mlmRev}. The two vertical lines indicate averaged correlations over schools and correlation computed over individuals ignoring school label. Which is which?
histogram
# install.package("mlmRev")
library(mlmRev)
# load the data from the package
data(Gcsemv, package="mlmRev")
# invoke help document
?Gcsemv
# view first 6 lines
head(Gcsemv)
school student gender written course
1 20920 16 M 23 NA
2 20920 25 F NA 71.2
3 20920 27 F 39 76.8
4 20920 31 F 36 87.9
5 20920 42 M 16 44.4
6 20920 62 F 36 NA
with(Gcsemv, cor(written, course, use="pairwise"))
[1] 0.47417
# compute the means by school
course_schavg <- with(Gcsemv, tapply(course, school, mean, na.rm=T))
written_schavg <- with(Gcsemv, tapply(written, school, mean, na.rm=T))
cor(course_schavg, written_schavg)
[1] 0.39568
library(tidyverse)
dta <- Gcsemv %>%
group_by(school) %>%
mutate(r_sch = cor(course, written, use="pairwise"))
dtar <- dta[!duplicated(dta$school),"r_sch"]
ggplot(data=dtar, aes(r_sch)) +
geom_histogram(fill="skyblue") +
geom_vline(xintercept=c(0.39568, 0.47414),
col=c("peru","black"), # 修改模板中的實線顏色,將red改成black,以符合示範圖
lty=c(3,1)) + # 1為實線,3為虛線
labs(x="Estimated correlation coefficients",
y="Counts") +
theme_bw()
圖中二條直線,實線為以樣本個體的course和written的correlation,虛線則為在school的分群下course和written的平均數之間的correlation。
Reproduce the results of the loudness example by completing the R markdown file
Ten subjects were played tones at each of 5 loudnesses, in random order. Subjects were asked to draw a line on paper whose length matched the loudness of the tone. Each subject repeated each loudness 3 times, for a total of 30 trials per subject. Here is the mean of the 3 log-lengths for each loudness, the sd of the three log-lengths, and the number of replications, which is always 3.
A data frame with 50 observations on the following 5 variables.
subject
a factor with unique values for each subject
loudness
either 50, 60, 70, 80 or 90 db. Decibels are a logrithmic scale
y
a numeric vector giving the mean of the log-lengths of three lines drawn.
sd
a numeric vector, giving the sd of the three log lengths
n
a numeric vector, equal to the constant value 3
# install.packages("alr4")
library(alr4)
data(Stevens, package="alr4")
library(tidyverse)
library(lme4)
glimpse(Stevens)
Observations: 50
Variables: 5
$ subject <fct> A, A, A, A, A, B, B, B, B, B, C, C, C, C, C, D, D, D,...
$ loudness <dbl> 50, 60, 70, 80, 90, 50, 60, 70, 80, 90, 50, 60, 70, 8...
$ y <dbl> 0.379, 1.727, 3.016, 3.924, 4.413, 0.260, 0.833, 2.00...
$ sd <dbl> 0.507, 0.904, 0.553, 0.363, 0.092, 0.077, 0.287, 0.39...
$ n <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,...
coef(m0 <- nlme::lmList(y ~ loudness | subject, data=Stevens))
(Intercept) loudness
A -4.4937 0.10265
B -4.5853 0.09355
C -2.2936 0.06950
D -2.9194 0.07506
E -5.3851 0.10391
F -2.4212 0.07740
G -2.8109 0.06497
H -3.4207 0.08223
I -1.9487 0.06561
J -1.7591 0.05525
ggplot(data=Stevens, aes(x=loudness, y=y)) +
geom_point(alpha=0.5) +
stat_smooth(method="lm", formula=y ~ x, se=F) +
facet_grid(. ~ subject) +
labs(x="Loudness (in db)",
y="Mean Line length (in log)") +
theme_minimal()
library(lme4)
summary(m1 <- lmer(y ~ loudness + (1 | subject), data=Stevens))
Linear mixed model fit by REML ['lmerMod']
Formula: y ~ loudness + (1 | subject)
Data: Stevens
REML criterion at convergence: 57.1
Scaled residuals:
Min 1Q Median 3Q Max
-2.8081 -0.5060 0.0184 0.5530 1.7721
Random effects:
Groups Name Variance Std.Dev.
subject (Intercept) 0.1428 0.378
Residual 0.0986 0.314
Number of obs: 50, groups: subject, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) -3.20377 0.25405 -12.6
loudness 0.07901 0.00314 25.2
Correlation of Fixed Effects:
(Intr)
loudness -0.865
library(lattice)
qqmath(ranef(m1))
$subject
plot(m1, resid(., scaled=TRUE) ~ fitted(.) | subject,
xlab="Fitted values", ylab= "Standardized residuals",
abline=0, lty=3)
qqmath(m1, grid=TRUE)