Teachers from a sample of 16 schools in California and Michigan were asked:
“If you could go back to college and start all over again, would you again choose teaching as a profession?”
The teachers’ perception of task variety was measured by the extent to which teachers followed the same routine each day.
Source: Raudenbush, S.W., Rowan, B., & Cheong, Y. (1993). Teaching as a non-routine task: Implications for the organizational design of schools. Educational Administrative Quarterly, 29(4), 479-500.
Column 1: Response categories: Yes, Not sure, No Column 2: Task routine Column 3: School ID
#load packages
pacman::p_load(tidyverse, HH, ordinal,likert)
#input data
dta <- read.table("C:/Users/Ching-Fang Wu/Documents/data/teach_again.txt", h=T)
#show first 8 lines
head(dta,8)
## Answer Task School
## 1 Yes -0.2642783 S01
## 2 Yes 0.5709041 S01
## 3 Yes 0.1329710 S01
## 4 Unsure -0.2642783 S01
## 5 No -1.0994610 S01
## 6 Yes 0.5302202 S01
## 7 Unsure 0.6115880 S01
## 8 Yes 0.5709041 S01
#
str(dta)
## 'data.frame': 650 obs. of 3 variables:
## $ Answer: chr "Yes" "Yes" "Yes" "Unsure" ...
## $ Task : num -0.264 0.571 0.133 -0.264 -1.099 ...
## $ School: chr "S01" "S01" "S01" "S01" ...
#各校回答No、Unsure、Yes的人數
with(dta, ftable(School, Answer))
## Answer No Unsure Yes
## School
## S01 5 4 15
## S02 9 9 20
## S03 14 10 17
## S04 0 1 8
## S05 1 5 23
## S06 9 12 18
## S07 12 4 26
## S08 7 5 18
## S09 2 8 18
## S10 21 27 69
## S11 14 12 32
## S12 15 19 18
## S13 13 6 18
## S14 0 1 7
## S15 17 11 25
## S16 13 10 22
#各校回答No、Unsure、Yes的比率
prop.table(with(dta, ftable(School, Answer)),1)
## Answer No Unsure Yes
## School
## S01 0.20833333 0.16666667 0.62500000
## S02 0.23684211 0.23684211 0.52631579
## S03 0.34146341 0.24390244 0.41463415
## S04 0.00000000 0.11111111 0.88888889
## S05 0.03448276 0.17241379 0.79310345
## S06 0.23076923 0.30769231 0.46153846
## S07 0.28571429 0.09523810 0.61904762
## S08 0.23333333 0.16666667 0.60000000
## S09 0.07142857 0.28571429 0.64285714
## S10 0.17948718 0.23076923 0.58974359
## S11 0.24137931 0.20689655 0.55172414
## S12 0.28846154 0.36538462 0.34615385
## S13 0.35135135 0.16216216 0.48648649
## S14 0.00000000 0.12500000 0.87500000
## S15 0.32075472 0.20754717 0.47169811
## S16 0.28888889 0.22222222 0.48888889
#各校Task平均數
aggregate(Task ~ School, mean, data = dta)
## School Task
## 1 S01 0.02089652
## 2 S02 -0.11779710
## 3 S03 -0.17287423
## 4 S04 0.08085690
## 5 S05 -0.14908077
## 6 S06 0.23740790
## 7 S07 -0.13334256
## 8 S08 0.18657692
## 9 S09 0.42902949
## 10 S10 0.01057772
## 11 S11 -0.15770350
## 12 S12 -0.08074803
## 13 S13 0.13885760
## 14 S14 0.89935413
## 15 S15 -0.12168721
## 16 S16 -0.01327148
Task是衡量教師每天重複相同事情的程度。S14重複程度最高(Task=0.89935413),S09次之(Task=0.42902949);最低為S03(Task=-0.17287423)。
#as.numeric將資料轉換為數值
m <- as.numeric(with(dta, table(School, Answer)))
#as.data.frame將向量轉換為資料框架
m <- as.data.frame(matrix(m, 16, 3))
#將V1、V2、V3重新命名
names(m) <- c("No", "Unsure", "Yes")
#rownames設定row的名稱
rownames(m) <- levels(dta$School) #levels()可取得因子的level
#新增變數Total,各校No+Unsure+Yes
m$tot <- apply(m, 1, sum)
#計算No及Unsure佔Total比率,由低排到高排序
m <- m[order((m[,1]+m[,2])/m[,4]), ]
#
#likert(m[, -4], as.percent = T, main="", ylab="")
無法排除錯誤訊息 Error in likert(m[, -4], as.percent = T, main = "“, ylab =”“) : unused arguments (as.percent = T, main =”“, ylab =”")
#
dtap <- prop.table(with(dta, ftable(School, Answer)), 1)
#
dtapl <- as.data.frame(dtap) %>%
mutate( Answer = factor(Answer))
#使用aggregate()函數,對dta資料中各School的Task,求算其平均數
#rep([指定的向量],[重複的次數])
dtapl$Task <- rep(aggregate(Task ~ School, mean, data = dta)[,2],3)
dtacp <- data.frame(School = levels(dtapl$School), #用levels()函式查看類別變數存在那些類別
as.data.frame(t(apply(dtap, 1, cumsum)))) #apply(資料集名稱,1表逐列,套用函數名稱)
#t是Matrix Transpose
#
dtacpl <- gather(dtacp, Answer, Prop, 2:4) %>%
mutate( Answer = factor(Answer))
#
ot <- theme_set(theme_bw())
#劃出各校三種回答的比率
ggplot(dtapl, aes(Answer, Freq, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
ylim(c(0, 1)) +
labs(x = "Answer", y = "Categorical response proprtions")
#累加機率
ggplot(dtacpl, aes(Answer, Prop, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
labs(x = "Answer", y = "Cumulative proportions")
#Task重複的比率與三類答案之間的關係
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
geom_point()+
stat_smooth(method = "lm", se=F) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.9, .5))
## `geom_smooth()` using formula 'y ~ x'
# cumulative mixed proportional odds model
dta <- dta %>%
mutate(Answer = factor(Answer))
summary(m0 <- clmm(Answer ~ Task + (1 | School), data=dta))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: Answer ~ Task + (1 | School)
## data: dta
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 650 -642.14 1292.27 177(393) 1.91e-04 6.4e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 0.09088 0.3015
## Number of groups: School 16
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Task 0.36488 0.08792 4.15 3.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## No|Unsure -1.2659 0.1301 -9.731
## Unsure|Yes -0.2169 0.1176 -1.844
#
dta_m0 <- data.frame(na.omit(dta), phat = fitted(m0))
ggplot(dta_m0, aes(Task, phat, color = Answer)) +
geom_point(alpha = .2, pch = 20)+
geom_point(data = dtapl, aes(Task, Freq, color = Answer)) +
stat_smooth(method = "lm", se=F, alpha = .2) +
stat_smooth(data = dtapl, aes(Task, Freq, color = Answer),
method = "lm", se=F, linetype = "dotted") +
scale_y_continuous(limits=(c(0, 1))) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
#
# This is done because there are 0 responses in the frequency table
pn <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Yes"))
# add phat = 0 to S04 and S14 in the No answer category
fix(pn)
# put them in the right order by school
pn <- pn[order(pn$School),]
# append predicted prob to the observed p-table
#dtapl$phat <- c(pn[,2], pu[,2], py[,2])
# plot observed categ. prop and fitted prob. against Task
# ggplot(dtapl, aes(Task, Freq, color = Answer)) +
# geom_point(alpha = .3)+
# stat_smooth(method = "lm", se = F) +
# geom_point(aes(Task, phat, color = Answer), pch = 1)+
# stat_smooth(aes(Task, phat, color = Answer),
# method = "lm", se = F, lty = 2, lwd = .8) +
# scale_y_continuous(limits=c(0, 1)) +
# labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
# theme(legend.position = c(.9, .5))