Source code and equations are published with permission from Prof. Ohtomo, Prof. Nakamura, and Mr. Akiyama.

For more details, refer to 中村(2002).『テストで言語能力は測れるか—言語テストデータ分析入門—』大友（監）桐原書店.

1 Installation

install.packages("CTT")
install.packages("psych")
install.packages("entropy")
install.packages("knitr")
install.packages("DT")

library(CTT)
library(psych)

## Warning: package 'psych' was built under R version 3.3.2

library(entropy)
library(knitr)
library(DT)

1.1 Import Data

# Import Data
## Item
score <- read.csv("http://lang-tech.net/doc/sample.score.csv", header = TRUE, sep = ",")

## Answer key
key <- read.csv("http://lang-tech.net/doc/sample.key.csv", header = TRUE, sep =",")
key <- as.matrix(key)

1.2 Scoring

myScore <- score(score, key, output.scored=TRUE)

2 Descriptive statistics

基礎統計量

result <- describe(myScore$score)
result[,-1]

##      n mean sd median trimmed  mad min max range skew kurtosis   se
## X1 241 9.44  3      9    9.34 2.97   1  19    18 0.26    -0.14 0.19

3 Reliability

信頼性

alpha(myScore$scored, check.keys = FALSE)

## Some items ( V10 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## 
## Reliability analysis   
## Call: alpha(x = myScore$scored, check.keys = FALSE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.54      0.53    0.57     0.054 1.1 0.042 0.47 0.15
## 
##  lower alpha upper     95% confidence boundaries
## 0.46 0.54 0.62 
## 
##  Reliability if an item is dropped:
##     raw_alpha std.alpha G6(smc) average_r  S/N alpha se
## V1       0.54      0.53    0.57     0.056 1.12    0.043
## V2       0.54      0.53    0.57     0.056 1.13    0.043
## V3       0.55      0.54    0.58     0.058 1.18    0.042
## V4       0.55      0.54    0.58     0.058 1.17    0.042
## V5       0.54      0.53    0.57     0.056 1.13    0.043
## V6       0.49      0.49    0.53     0.047 0.94    0.047
## V7       0.54      0.54    0.57     0.058 1.16    0.042
## V8       0.54      0.53    0.57     0.056 1.13    0.043
## V9       0.53      0.52    0.56     0.055 1.10    0.043
## V10      0.56      0.56    0.59     0.062 1.25    0.040
## V11      0.53      0.52    0.56     0.054 1.09    0.044
## V12      0.53      0.53    0.57     0.055 1.12    0.043
## V13      0.52      0.51    0.55     0.052 1.04    0.045
## V14      0.52      0.51    0.55     0.053 1.06    0.044
## V15      0.52      0.52    0.56     0.053 1.07    0.044
## V16      0.50      0.49    0.53     0.049 0.97    0.046
## V17      0.47      0.47    0.50     0.044 0.88    0.049
## V18      0.49      0.48    0.52     0.047 0.93    0.048
## V19      0.54      0.53    0.57     0.057 1.14    0.043
## V20      0.54      0.53    0.57     0.056 1.13    0.043
## 
##  Item statistics 
##       n raw.r std.r  r.cor r.drop mean   sd
## V1  241  0.25  0.27  0.171  0.115 0.76 0.43
## V2  241  0.25  0.26  0.153  0.104 0.73 0.44
## V3  241  0.20  0.21  0.078  0.051 0.26 0.44
## V4  241  0.21  0.21  0.087  0.059 0.68 0.47
## V5  241  0.27  0.27  0.161  0.111 0.37 0.48
## V6  241  0.51  0.50  0.497  0.366 0.54 0.50
## V7  241  0.22  0.22  0.106  0.070 0.28 0.45
## V8  241  0.27  0.27  0.163  0.111 0.48 0.50
## V9  241  0.27  0.30  0.212  0.152 0.83 0.38
## V10 241  0.12  0.11 -0.044 -0.043 0.44 0.50
## V11 241  0.32  0.32  0.231  0.170 0.34 0.47
## V12 241  0.29  0.28  0.183  0.128 0.56 0.50
## V13 241  0.38  0.37  0.317  0.231 0.39 0.49
## V14 241  0.37  0.35  0.292  0.213 0.45 0.50
## V15 241  0.34  0.34  0.270  0.195 0.31 0.46
## V16 241  0.47  0.47  0.455  0.335 0.64 0.48
## V17 241  0.59  0.58  0.636  0.461 0.44 0.50
## V18 241  0.53  0.52  0.546  0.401 0.33 0.47
## V19 241  0.22  0.25  0.144  0.096 0.19 0.39
## V20 241  0.27  0.26  0.164  0.105 0.44 0.50
## 
## Non missing response frequency for each item
##          0    1 miss
##  [1,] 0.24 0.76    0
##  [2,] 0.27 0.73    0
##  [3,] 0.74 0.26    0
##  [4,] 0.32 0.68    0
##  [5,] 0.63 0.37    0
##  [6,] 0.46 0.54    0
##  [7,] 0.72 0.28    0
##  [8,] 0.52 0.48    0
##  [9,] 0.17 0.83    0
## [10,] 0.56 0.44    0
## [11,] 0.66 0.34    0
## [12,] 0.44 0.56    0
## [13,] 0.61 0.39    0
## [14,] 0.55 0.45    0
## [15,] 0.69 0.31    0
## [16,] 0.36 0.64    0
## [17,] 0.56 0.44    0
## [18,] 0.67 0.33    0
## [19,] 0.81 0.19    0
## [20,] 0.56 0.44    0

4 Z-score, T-score, Five point scale, Stanine scale

Z-得点, 偏差値, 5段階スケール, ９段階スケール

#Z-score
zscore <- scale(rowSums(myScore$scored), center = TRUE, scale = TRUE)

#T-score
tscore <- zscore*10+50

#five point scale
five.point.scale <- vector("numeric")
i <- 1
for (i in 1:length(myScore$score)){
  five.point.scale[i] <- round(zscore[i]+3, 0)
  if (five.point.scale[i] <= 0){
    five.point.scale[i] <- 1}
  if (five.point.scale[i] >= 6){
    five.point.scale[i] <- 5
  }
}

#stanine scale
stanine.scale <- vector("numeric")
i <- 1
for (i in 1:length(myScore$score)){
  stanine.scale[i] <- round(1.96*zscore[i]+5, 0)
  if (stanine.scale[i] <= 0){
    stanine.scale[i] <- 1}
  if (stanine.scale[i] >= 10){
    stanine.scale[i] <- 9
  }
}

#generate the table

p.id <- 1:length(myScore$score)
list1 <- data.frame(p.id, myScore$score, zscore, tscore, five.point.scale, stanine.scale)
datatable(round(list1,3), colnames = c("ID", "Score", "Z-score", "T-score", "Five.s", "Stanine.s"), rownames = FALSE)

5 Item analysis

5.1 DIFF: Item difficulty index

項目困難度

diff <- colSums(myScore$scored)/nrow(score)
data.matrix(round(diff, 3))

##        [,1]
##  [1,] 0.759
##  [2,] 0.734
##  [3,] 0.257
##  [4,] 0.680
##  [5,] 0.365
##  [6,] 0.539
##  [7,] 0.278
##  [8,] 0.481
##  [9,] 0.830
## [10,] 0.436
## [11,] 0.336
## [12,] 0.560
## [13,] 0.390
## [14,] 0.448
## [15,] 0.311
## [16,] 0.639
## [17,] 0.436
## [18,] 0.332
## [19,] 0.191
## [20,] 0.436

5.2 pU-pL: Item discrimination power index by Upper(27%) - Lower(27%)

上位下位項目弁別力指数

list2 <- cbind(myScore$scored, myScore$score)
sort.list <- list2[order(list2[,ncol(list2)], decreasing = TRUE),]
num <- round(length(myScore$score)*0.27, 0) #number of upper and lowwer groups
tU <- head(sort.list, n = num) #upper group scores
tL <- tail(sort.list, n = num) #lower group scores

ul <- vector("numeric")
i <- 1
for (i in 1:ncol(myScore$scored)){
  pU <- sum(tU[,i]/nrow(tU))
  pL <- sum(tL[,i]/nrow(tL))
  ul[i] <- round(pU-pL, 3)
}

5.3 DISC: Item discrimination power index by point biserial correlation coefficient

点双列相関指数による項目弁別力指数

disc <- vector("numeric")
i <- 1
for (i in 1:ncol(myScore$scored)){
  gU <- subset(myScore$scored, myScore$scored[,i] == 1)
  gL <- subset(myScore$scored, myScore$scored[,i] == 0 )
  disc[i] <- round((sum(gU)/nrow(gU) - sum(gL)/nrow(gL)) / sd(myScore$score) *
                     sqrt(diff[i] * (1-diff[i])), 3)
}

5.4 AENO: Actual equivalent number of options

実質選択肢数

aeno <- vector("numeric")
i <- 1
for (i in 1:ncol(score)){
  per <- table(score[,i]) / nrow(score)
  aeno[i] <- round(2^entropy.empirical(per, unit="log2"), 3)
}

5.5 ADIF: Appropriateness of difficulty

項目困難度適切度

adif <- (1 - abs(((0.5 + 0.5 * (1 / nrow(summary(score))))-diff) * 2))

5.6 ADIS: Appropriateness of discrimination power index

項目弁別力適切度

adis <- (disc^2) / (1 - (disc^2))

5.7 AAEN: Appropriatenss of actual equivalent number of options

実質選択肢数適切度

5.7.1 Ideal entropy

最適平均情報量

q <- -(diff * (log10(diff)/log10(2)) + (nrow(summary(score))-1) * 
         (((1-diff) / (nrow(summary(score))-1))) * (log10((1-diff) / 
         (nrow(summary(score))-1)) / log10(2)))

5.7.2 Ideal actual equivalent number of options

最適実質選択肢数

pamc <- 2^q

5.7.3 AAEN

実質選択肢数適切度

aaen <- (aeno-1) / (pamc-1)

5.8 SADIF: Standard appropriateness of difficulty

標準項目困難度適切度

zadif <- (adif-mean(adif))/sd(adif)
sadif <- zadif*0.1+0.5

5.9 SADIS: Standard appropriateness of discrimination power index

標準項目弁別力適切度

zadis <- (adis-mean(adis))/sd(adis)
sadis <- zadis*0.1+0.5

5.10 SAAEN: Standard appropriatenss of actual equivalent number of options

標準実質選択肢数適切度

zaaen <- (aaen-mean(aaen))/sd(aaen)
saaen <- zaaen*0.1+0.5

5.11 SATOT: Standard appropriateness total

標準適切度の合計

satot <- sadif + sadis + saaen

5.12 RANK: Rank in SATOT order

標準適切度の一覧

i.id <- 1:ncol(myScore$scored)
list3 <- data.frame(i.id, diff, ul,disc, aeno, adif, adis, aaen, sadif, sadis, saaen, satot)
datatable(round(list3,3), colnames = c("ID", "DIFF", "pU-pL", "DISC", "AENO", "ADIF",
                                       "ADIS", "AAEN", "SADIF", "SADIS", "SAAEN", "SATOT"), rownames = FALSE)

Classical Test Theory

sei sumi

3/6/2017