1 data input

# mixed-effects cumulative logit model (proportional odds)
# loading package
pacman::p_load(tidyverse, HH, ordinal)

dta2 <- read.table("C:/Users/user/Desktop/multilevel/1123/teach_again.txt", h=T)

head(dta2)

##   Answer       Task School
## 1    Yes -0.2642783    S01
## 2    Yes  0.5709041    S01
## 3    Yes  0.1329710    S01
## 4 Unsure -0.2642783    S01
## 5     No -1.0994610    S01
## 6    Yes  0.5302202    S01

2 Descriptive

with(dta2, ftable(School, Answer))

##        Answer No Unsure Yes
## School                     
## S01            5      4  15
## S02            9      9  20
## S03           14     10  17
## S04            0      1   8
## S05            1      5  23
## S06            9     12  18
## S07           12      4  26
## S08            7      5  18
## S09            2      8  18
## S10           21     27  69
## S11           14     12  32
## S12           15     19  18
## S13           13      6  18
## S14            0      1   7
## S15           17     11  25
## S16           13     10  22

dta2p <- prop.table(with(dta2, ftable(School, Answer)), 1)
aggregate(Task ~ School, mean, data = dta2)

##    School        Task
## 1     S01  0.02089652
## 2     S02 -0.11779710
## 3     S03 -0.17287423
## 4     S04  0.08085690
## 5     S05 -0.14908077
## 6     S06  0.23740790
## 7     S07 -0.13334256
## 8     S08  0.18657692
## 9     S09  0.42902949
## 10    S10  0.01057772
## 11    S11 -0.15770350
## 12    S12 -0.08074803
## 13    S13  0.13885760
## 14    S14  0.89935413
## 15    S15 -0.12168721
## 16    S16 -0.01327148

3 HH packge for likert scale plots

#as.numeric
m <- as.numeric(with(dta2, table(School, Answer)))

#as.data.frame
m <- as.data.frame(matrix(m, 16, 3))

#columname
names(m) <- c("No", "Unsure", "Yes")

# rowname
rownames(m) <- levels(dta2$School)
str(m)

## 'data.frame':    16 obs. of  3 variables:
##  $ No    : num  5 9 14 0 1 9 12 7 2 21 ...
##  $ Unsure: num  4 9 10 1 5 12 4 5 8 27 ...
##  $ Yes   : num  15 20 17 8 23 18 26 18 18 69 ...

# total of answer
m$tot <- apply(m, 1, sum)

# No+Unsure/Total
m <- m[order((m[,1]+m[,2])/m[,4]), ]

4 Likert plot

#delete tot
likert(m[, -4], as.percent = T, main="", ylab="")

5 category response proportion and lm

# set the data frame and set "Answer" as the factor 
dta2pl <- as.data.frame(dta2p) %>% 
  mutate(Answer = factor(Answer))

# add the mean value of Task
dta2pl$Task <- rep(aggregate(Task ~ School, mean, data = dta2)[,2],3)

ggplot(dta2pl, aes(Answer, Freq, group = School)) +
  geom_point(alpha = .5)+
  geom_line(alpha = .5) +
  ylim(c(0, 1)) +
  labs(x = "Answer", y = "Categorical response proprtions")

#
ggplot(dta2pl, aes(Task, Freq, color = Answer)) +
  geom_point()+
  stat_smooth(method = "lm", se=F) +
  scale_y_continuous(limits=c(0, 1)) +
  labs(x = "Task", y = "Categorical response proprtions") +
  theme(legend.position = c(.9, .5))

## `geom_smooth()` using formula 'y ~ x'

6 cumulative proportion of response

# caculate no, unsure and yes cumsum
dta2cp <- data.frame(School = levels(dta2pl$School),
                    as.data.frame(t(apply(dta2p, 1, cumsum))))

# wide to long form
dta2cpl <- gather(dta2cp, Answer, Prop, 2:4) %>%
          mutate(Answer = factor(Answer))
# theme_set
ot <- theme_set(theme_bw())

#
ggplot(dta2cpl, aes(Answer, Prop, group = School)) +
  geom_point(alpha = .5)+
  geom_line(alpha = .5) +
  labs(x = "Answer", y = "Cumulative proportions")

7 ordinal GLMM

# ordinal package
# cumulative mixed proportional odds model

dta2 <- dta2 %>%
  mutate(Answer = factor(Answer))

summary(m0 <- clmm(Answer ~ Task + (1 | School), data=dta2))

## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: Answer ~ Task + (1 | School)
## data:    dta2
## 
##  link  threshold nobs logLik  AIC     niter    max.grad cond.H 
##  logit flexible  650  -642.14 1292.27 177(393) 1.91e-04 6.4e+01
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  School (Intercept) 0.09088  0.3015  
## Number of groups:  School 16 
## 
## Coefficients:
##      Estimate Std. Error z value Pr(>|z|)    
## Task  0.36488    0.08792    4.15 3.32e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## No|Unsure   -1.2659     0.1301  -9.731
## Unsure|Yes  -0.2169     0.1176  -1.844

dta2_m0 <- data.frame(na.omit(dta2), phat = fitted(m0))
#

ggplot(dta2_m0, aes(Task, phat, color = Answer)) +
  geom_point(alpha = .2, pch = 20)+
  geom_point(data = dta2pl, aes(Task, Freq, color = Answer)) +
  stat_smooth(method = "lm", se=F, alpha = .2) +
  stat_smooth(data = dta2pl, aes(Task, Freq, color = Answer),
              method = "lm", se=F, linetype = "dotted") +
  scale_y_continuous(limits=(c(0, 1))) +
  labs(x = "Task", y = "Categorical response proprtions") +
  theme(legend.position = c(.1, .8))

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

8 This is done because there are 0 responses in the frequency table

#
pn <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Yes"))

# add phat = 0 to S04 and S14 in the No answer category
# fix(pn)
pn <- rbind(pn, c("S04", 0), c("S14", 0)) 

# put them in the right order by school
pn <- pn[order(pn$School),]

# append predicted prob to the observed p-table
dta2pl$phat <- c(pn[,2], pu[,2], py[,2])

dta2pl <- dta2pl %>% 
  mutate(phat = as.numeric(phat))

# plot observed categ. prop and fitted prob. against Task
ggplot(dta2pl, aes(Task, Freq, color = Answer)) +
  geom_point(alpha = .3)+
  stat_smooth(method = "lm", se = F) +
  geom_point(aes(Task, phat, color = Answer), pch = 1)+
  stat_smooth(aes(Task, phat, color = Answer),
                  method = "lm", se = F, lty = 2, lwd = .8) +
  scale_y_continuous(limits=c(0, 1)) +
  labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
  theme(legend.position = c(.9, .5))

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

inclass2

1 data input

2 Descriptive

3 HH packge for likert scale plots

4 Likert plot

5 category response proportion and lm

6 cumulative proportion of response

7 ordinal GLMM

8 This is done because there are 0 responses in the frequency table