1 Introduction

Teachers from a sample of 16 schools in California and Michigan were asked:

“If you could go back to college and start all over again, would you again choose teaching as a profession?”

The teachers’ perception of task variety was measured by the extent to which teachers followed the same routine each day.

Source: Raudenbush, S.W., Rowan, B., & Cheong, Y. (1993). Teaching as a non-routine task: Implications for the organizational design of schools. Educational Administrative Quarterly, 29(4), 479-500.

Column 1: Response categories: Yes, Not sure, No Column 2: Task routine Column 3: School ID

2 Data management

#load packages
pacman::p_load(tidyverse, HH, ordinal,likert)

#input data
dta <- read.table("C:/Users/Ching-Fang Wu/Documents/data/teach_again.txt", h=T)

#show first 8 lines
head(dta,8)

##   Answer       Task School
## 1    Yes -0.2642783    S01
## 2    Yes  0.5709041    S01
## 3    Yes  0.1329710    S01
## 4 Unsure -0.2642783    S01
## 5     No -1.0994610    S01
## 6    Yes  0.5302202    S01
## 7 Unsure  0.6115880    S01
## 8    Yes  0.5709041    S01

#
str(dta)

## 'data.frame':    650 obs. of  3 variables:
##  $ Answer: chr  "Yes" "Yes" "Yes" "Unsure" ...
##  $ Task  : num  -0.264 0.571 0.133 -0.264 -1.099 ...
##  $ School: chr  "S01" "S01" "S01" "S01" ...

3 numerical summary

#各校回答No、Unsure、Yes的人數
with(dta, ftable(School, Answer))

##        Answer No Unsure Yes
## School                     
## S01            5      4  15
## S02            9      9  20
## S03           14     10  17
## S04            0      1   8
## S05            1      5  23
## S06            9     12  18
## S07           12      4  26
## S08            7      5  18
## S09            2      8  18
## S10           21     27  69
## S11           14     12  32
## S12           15     19  18
## S13           13      6  18
## S14            0      1   7
## S15           17     11  25
## S16           13     10  22

#各校回答No、Unsure、Yes的比率
prop.table(with(dta, ftable(School, Answer)),1)

##        Answer         No     Unsure        Yes
## School                                        
## S01           0.20833333 0.16666667 0.62500000
## S02           0.23684211 0.23684211 0.52631579
## S03           0.34146341 0.24390244 0.41463415
## S04           0.00000000 0.11111111 0.88888889
## S05           0.03448276 0.17241379 0.79310345
## S06           0.23076923 0.30769231 0.46153846
## S07           0.28571429 0.09523810 0.61904762
## S08           0.23333333 0.16666667 0.60000000
## S09           0.07142857 0.28571429 0.64285714
## S10           0.17948718 0.23076923 0.58974359
## S11           0.24137931 0.20689655 0.55172414
## S12           0.28846154 0.36538462 0.34615385
## S13           0.35135135 0.16216216 0.48648649
## S14           0.00000000 0.12500000 0.87500000
## S15           0.32075472 0.20754717 0.47169811
## S16           0.28888889 0.22222222 0.48888889

#各校Task平均數
aggregate(Task ~ School, mean, data = dta)

##    School        Task
## 1     S01  0.02089652
## 2     S02 -0.11779710
## 3     S03 -0.17287423
## 4     S04  0.08085690
## 5     S05 -0.14908077
## 6     S06  0.23740790
## 7     S07 -0.13334256
## 8     S08  0.18657692
## 9     S09  0.42902949
## 10    S10  0.01057772
## 11    S11 -0.15770350
## 12    S12 -0.08074803
## 13    S13  0.13885760
## 14    S14  0.89935413
## 15    S15 -0.12168721
## 16    S16 -0.01327148

Task是衡量教師每天重複相同事情的程度。S14重複程度最高(Task=0.89935413)，S09次之(Task=0.42902949)；最低為S03(Task=-0.17287423)。

4 HH for likert scale plots

#as.numeric將資料轉換為數值
m <- as.numeric(with(dta, table(School, Answer)))

#as.data.frame將向量轉換為資料框架
m <- as.data.frame(matrix(m, 16, 3))

#將V1、V2、V3重新命名
names(m) <- c("No", "Unsure", "Yes")

#rownames設定row的名稱
rownames(m) <- levels(dta$School) #levels()可取得因子的level

#新增變數Total，各校No+Unsure+Yes
m$tot <- apply(m, 1, sum)

#計算No及Unsure佔Total比率，由低排到高排序
m <- m[order((m[,1]+m[,2])/m[,4]), ]

#
#likert(m[, -4], as.percent = T, main="", ylab="")

無法排除錯誤訊息 Error in likert(m[, -4], as.percent = T, main = "“, ylab =”“) : unused arguments (as.percent = T, main =”“, ylab =”")

#
dtap <- prop.table(with(dta, ftable(School, Answer)), 1)

5 Data Visualization

#
dtapl <- as.data.frame(dtap) %>%
         mutate( Answer = factor(Answer))

#使用aggregate()函數，對dta資料中各School的Task，求算其平均數
#rep([指定的向量],[重複的次數]) 
dtapl$Task <- rep(aggregate(Task ~ School, mean, data = dta)[,2],3)

dtacp <- data.frame(School = levels(dtapl$School), #用levels()函式查看類別變數存在那些類別
                   as.data.frame(t(apply(dtap, 1, cumsum)))) #apply(資料集名稱，1表逐列，套用函數名稱)
#t是Matrix Transpose

#
dtacpl <- gather(dtacp, Answer, Prop, 2:4) %>%
          mutate( Answer = factor(Answer))

#
ot <- theme_set(theme_bw())

#劃出各校三種回答的比率
ggplot(dtapl, aes(Answer, Freq, group = School)) +
 geom_point(alpha = .5)+
 geom_line(alpha = .5) +
 ylim(c(0, 1)) +
 labs(x = "Answer", y = "Categorical response proprtions")

#累加機率
ggplot(dtacpl, aes(Answer, Prop, group = School)) +
 geom_point(alpha = .5)+
 geom_line(alpha = .5) +
 labs(x = "Answer", y = "Cumulative proportions")

#Task重複的比率與三類答案之間的關係
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
 geom_point()+
 stat_smooth(method = "lm", se=F) +
 scale_y_continuous(limits=c(0, 1)) +
 labs(x = "Task", y = "Categorical response proprtions") +
 theme(legend.position = c(.9, .5))

## `geom_smooth()` using formula 'y ~ x'

6 ordinal package

# cumulative mixed proportional odds model
dta <- dta %>%
  mutate(Answer = factor(Answer))

summary(m0 <- clmm(Answer ~ Task + (1 | School), data=dta))

## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: Answer ~ Task + (1 | School)
## data:    dta
## 
##  link  threshold nobs logLik  AIC     niter    max.grad cond.H 
##  logit flexible  650  -642.14 1292.27 177(393) 1.91e-04 6.4e+01
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  School (Intercept) 0.09088  0.3015  
## Number of groups:  School 16 
## 
## Coefficients:
##      Estimate Std. Error z value Pr(>|z|)    
## Task  0.36488    0.08792    4.15 3.32e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## No|Unsure   -1.2659     0.1301  -9.731
## Unsure|Yes  -0.2169     0.1176  -1.844

#
dta_m0 <- data.frame(na.omit(dta), phat = fitted(m0))

ggplot(dta_m0, aes(Task, phat, color = Answer)) +
 geom_point(alpha = .2, pch = 20)+
 geom_point(data = dtapl, aes(Task, Freq, color = Answer)) +
 stat_smooth(method = "lm", se=F, alpha = .2) +
 stat_smooth(data = dtapl, aes(Task, Freq, color = Answer),
             method = "lm", se=F, linetype = "dotted") +
 scale_y_continuous(limits=(c(0, 1))) +
 labs(x = "Task", y = "Categorical response proprtions") +
 theme(legend.position = c(.1, .8))

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#
# This is done because there are 0 responses in the frequency table
pn <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Yes"))

# add phat = 0 to S04 and S14 in the No answer category
fix(pn)

# put them in the right order by school
pn <- pn[order(pn$School),]

# append predicted prob to the observed p-table
#dtapl$phat <- c(pn[,2], pu[,2], py[,2])

# plot observed categ. prop and fitted prob. against Task
# ggplot(dtapl, aes(Task, Freq, color = Answer)) +
 # geom_point(alpha = .3)+
 # stat_smooth(method = "lm", se = F) +
 # geom_point(aes(Task, phat, color = Answer), pch = 1)+
 # stat_smooth(aes(Task, phat, color = Answer),
  #               method = "lm", se = F, lty = 2, lwd = .8) +
 # scale_y_continuous(limits=c(0, 1)) +
 # labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
 # theme(legend.position = c(.9, .5))

Week11 in-class exercise2：mixed-effects cumulative logit model (proportional odds)

Ching-Fang Wu

Sun Nov 29 17:06:38 2020