# mixed-effects cumulative logit model (proportional odds)
# loading package
pacman::p_load(tidyverse, HH, ordinal)
dta2 <- read.table("C:/Users/user/Desktop/multilevel/1123/teach_again.txt", h=T)
head(dta2)
## Answer Task School
## 1 Yes -0.2642783 S01
## 2 Yes 0.5709041 S01
## 3 Yes 0.1329710 S01
## 4 Unsure -0.2642783 S01
## 5 No -1.0994610 S01
## 6 Yes 0.5302202 S01
## Answer No Unsure Yes
## School
## S01 5 4 15
## S02 9 9 20
## S03 14 10 17
## S04 0 1 8
## S05 1 5 23
## S06 9 12 18
## S07 12 4 26
## S08 7 5 18
## S09 2 8 18
## S10 21 27 69
## S11 14 12 32
## S12 15 19 18
## S13 13 6 18
## S14 0 1 7
## S15 17 11 25
## S16 13 10 22
dta2p <- prop.table(with(dta2, ftable(School, Answer)), 1)
aggregate(Task ~ School, mean, data = dta2)
## School Task
## 1 S01 0.02089652
## 2 S02 -0.11779710
## 3 S03 -0.17287423
## 4 S04 0.08085690
## 5 S05 -0.14908077
## 6 S06 0.23740790
## 7 S07 -0.13334256
## 8 S08 0.18657692
## 9 S09 0.42902949
## 10 S10 0.01057772
## 11 S11 -0.15770350
## 12 S12 -0.08074803
## 13 S13 0.13885760
## 14 S14 0.89935413
## 15 S15 -0.12168721
## 16 S16 -0.01327148
## 'data.frame': 16 obs. of 3 variables:
## $ No : num 5 9 14 0 1 9 12 7 2 21 ...
## $ Unsure: num 4 9 10 1 5 12 4 5 8 27 ...
## $ Yes : num 15 20 17 8 23 18 26 18 18 69 ...
# set the data frame and set "Answer" as the factor
dta2pl <- as.data.frame(dta2p) %>%
mutate(Answer = factor(Answer))
ggplot(dta2pl, aes(Answer, Freq, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
ylim(c(0, 1)) +
labs(x = "Answer", y = "Categorical response proprtions")
#
ggplot(dta2pl, aes(Task, Freq, color = Answer)) +
geom_point()+
stat_smooth(method = "lm", se=F) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.9, .5))
## `geom_smooth()` using formula 'y ~ x'
# caculate no, unsure and yes cumsum
dta2cp <- data.frame(School = levels(dta2pl$School),
as.data.frame(t(apply(dta2p, 1, cumsum))))
# wide to long form
dta2cpl <- gather(dta2cp, Answer, Prop, 2:4) %>%
mutate(Answer = factor(Answer))
# theme_set
ot <- theme_set(theme_bw())
#
ggplot(dta2cpl, aes(Answer, Prop, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
labs(x = "Answer", y = "Cumulative proportions")
# ordinal package
# cumulative mixed proportional odds model
dta2 <- dta2 %>%
mutate(Answer = factor(Answer))
summary(m0 <- clmm(Answer ~ Task + (1 | School), data=dta2))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: Answer ~ Task + (1 | School)
## data: dta2
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 650 -642.14 1292.27 177(393) 1.91e-04 6.4e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 0.09088 0.3015
## Number of groups: School 16
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Task 0.36488 0.08792 4.15 3.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## No|Unsure -1.2659 0.1301 -9.731
## Unsure|Yes -0.2169 0.1176 -1.844
dta2_m0 <- data.frame(na.omit(dta2), phat = fitted(m0))
#
ggplot(dta2_m0, aes(Task, phat, color = Answer)) +
geom_point(alpha = .2, pch = 20)+
geom_point(data = dta2pl, aes(Task, Freq, color = Answer)) +
stat_smooth(method = "lm", se=F, alpha = .2) +
stat_smooth(data = dta2pl, aes(Task, Freq, color = Answer),
method = "lm", se=F, linetype = "dotted") +
scale_y_continuous(limits=(c(0, 1))) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#
pn <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Yes"))
# add phat = 0 to S04 and S14 in the No answer category
# fix(pn)
pn <- rbind(pn, c("S04", 0), c("S14", 0))
# put them in the right order by school
pn <- pn[order(pn$School),]
# append predicted prob to the observed p-table
dta2pl$phat <- c(pn[,2], pu[,2], py[,2])
dta2pl <- dta2pl %>%
mutate(phat = as.numeric(phat))
# plot observed categ. prop and fitted prob. against Task
ggplot(dta2pl, aes(Task, Freq, color = Answer)) +
geom_point(alpha = .3)+
stat_smooth(method = "lm", se = F) +
geom_point(aes(Task, phat, color = Answer), pch = 1)+
stat_smooth(aes(Task, phat, color = Answer),
method = "lm", se = F, lty = 2, lwd = .8) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
theme(legend.position = c(.9, .5))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'