dtaCE2 <- read.table("C:/Users/ASUS/Desktop/data/teach_again.txt", h=T)
head(dtaCE2)
## Answer Task School
## 1 Yes -0.2642783 S01
## 2 Yes 0.5709041 S01
## 3 Yes 0.1329710 S01
## 4 Unsure -0.2642783 S01
## 5 No -1.0994610 S01
## 6 Yes 0.5302202 S01
with(dtaCE2, ftable(School, Answer))
## Answer No Unsure Yes
## School
## S01 5 4 15
## S02 9 9 20
## S03 14 10 17
## S04 0 1 8
## S05 1 5 23
## S06 9 12 18
## S07 12 4 26
## S08 7 5 18
## S09 2 8 18
## S10 21 27 69
## S11 14 12 32
## S12 15 19 18
## S13 13 6 18
## S14 0 1 7
## S15 17 11 25
## S16 13 10 22
prop.table(with(dtaCE2, ftable(School, Answer)),1)
## Answer No Unsure Yes
## School
## S01 0.20833333 0.16666667 0.62500000
## S02 0.23684211 0.23684211 0.52631579
## S03 0.34146341 0.24390244 0.41463415
## S04 0.00000000 0.11111111 0.88888889
## S05 0.03448276 0.17241379 0.79310345
## S06 0.23076923 0.30769231 0.46153846
## S07 0.28571429 0.09523810 0.61904762
## S08 0.23333333 0.16666667 0.60000000
## S09 0.07142857 0.28571429 0.64285714
## S10 0.17948718 0.23076923 0.58974359
## S11 0.24137931 0.20689655 0.55172414
## S12 0.28846154 0.36538462 0.34615385
## S13 0.35135135 0.16216216 0.48648649
## S14 0.00000000 0.12500000 0.87500000
## S15 0.32075472 0.20754717 0.47169811
## S16 0.28888889 0.22222222 0.48888889
dta_a<-aggregate(Task ~ School, mean, data = dtaCE2)
dta_a
## School Task
## 1 S01 0.02089652
## 2 S02 -0.11779710
## 3 S03 -0.17287423
## 4 S04 0.08085690
## 5 S05 -0.14908077
## 6 S06 0.23740790
## 7 S07 -0.13334256
## 8 S08 0.18657692
## 9 S09 0.42902949
## 10 S10 0.01057772
## 11 S11 -0.15770350
## 12 S12 -0.08074803
## 13 S13 0.13885760
## 14 S14 0.89935413
## 15 S15 -0.12168721
## 16 S16 -0.01327148
library(HH)
## Loading required package: lattice
## Loading required package: grid
## Loading required package: latticeExtra
## Loading required package: multcomp
## Warning: package 'multcomp' was built under R version 4.0.3
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 4.0.3
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Loading required package: gridExtra
m <- as.numeric(with(dtaCE2, table(School, Answer)))
m <- as.data.frame(matrix(m, 16, 3))
names(m) <- c("No", "Unsure", "Yes")
rownames(m) <- levels(dtaCE2$School)
m$tot <- apply(m, 1, sum)
m <- m[order((m[,1]+m[,2])/m[,4]), ]
likert(m[, -4], as.percent = T, main="", ylab="")
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dtap <- prop.table(with(dtaCE2, ftable(School, Answer)), 1)
dtapl <- as.data.frame(dtap) %>%
mutate(Answer = factor(Answer))
head(dtapl)
## School Answer Freq
## 1 S01 No 0.20833333
## 2 S02 No 0.23684211
## 3 S03 No 0.34146341
## 4 S04 No 0.00000000
## 5 S05 No 0.03448276
## 6 S06 No 0.23076923
#make a cumsum data frame
dtapl$Task <- rep(aggregate(Task ~ School, mean, data = dtaCE2)[,2],3)
dtacp <- data.frame(School = levels(dtapl$School),
as.data.frame(t(apply(dtap, 1, cumsum))))
# transform to long form
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
dtacpl <- gather(dtacp, Answer, Prop, 2:4) %>%
mutate( Answer = factor(Answer))
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
##
## layer
ggplot(dtapl, aes(Answer, Freq, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
ylim(c(0, 1)) +
labs(x = "Answer", y = "Categorical response proprtions")
ggplot(dtacpl, aes(Answer, Prop, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
labs(x = "Answer", y = "Cumulative proportions")
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
geom_point()+
stat_smooth(method = "lm", se=F) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.9, .5))
## `geom_smooth()` using formula 'y ~ x'
# Analysis
cumulative mixed proportional odds model
# ordinal package
library(ordinal)
## Warning: package 'ordinal' was built under R version 4.0.3
##
## Attaching package: 'ordinal'
## The following object is masked from 'package:dplyr':
##
## slice
summary(m0 <- clmm(factor(Answer) ~ Task + (1 | School), data=dtaCE2))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: factor(Answer) ~ Task + (1 | School)
## data: dtaCE2
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 650 -642.14 1292.27 177(393) 1.91e-04 6.4e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 0.09088 0.3015
## Number of groups: School 16
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Task 0.36488 0.08792 4.15 3.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## No|Unsure -1.2659 0.1301 -9.731
## Unsure|Yes -0.2169 0.1176 -1.844
dta2_m0<- data.frame(dtaCE2, phat = fitted(m0))
ggplot(dta2_m0, aes(Task, phat, color = Answer)) +
geom_point(alpha = .2, pch = 20)+
geom_point(data = dtapl, aes(Task, Freq, color = Answer)) +
stat_smooth(method = "lm", se=F, alpha = .2) +
stat_smooth(data = dtapl, aes(Task, Freq, color = Answer),
method = "lm", se=F, linetype = "dotted") +
scale_y_continuous(limits=(c(0, 1))) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
# This is done because there are 0 responses in the frequency table
pn <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Yes"))
# add phat = 0 to S04 and S14 in the No answer category
#fix(pn)
pn <- rbind(pn, c("S04", 0), c("S14", 0))
# put them in the right order by school
pn <- pn[order(pn$School),]
# append predicted prob to the observed p-table
dtapl$phat <- c(pn[,2], pu[,2], py[,2])
dtapl$phat<- as.numeric(dtapl$phat)
# plot observed categ. prop and fitted prob. against Task
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
geom_point(alpha = .3)+
stat_smooth(method = "lm", se = F) +
geom_point(aes(Task, phat, color = Answer), pch = 1)+
stat_smooth(aes(Task, phat, color = Answer),
method = "lm", se = F, lty = 2, lwd = .8) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
theme(legend.position = c(.9, .5))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
The End