Source code and equations are published with permission from Prof. Ohtomo, Prof. Nakamura, and Mr. Akiyama.
For more details, refer to 中村(2002).『テストで言語能力は測れるか—言語テストデータ分析入門—』大友(監)桐原書店.
install.packages("CTT")
install.packages("psych")
install.packages("entropy")
install.packages("knitr")
install.packages("DT")
library(CTT)
library(psych)
## Warning: package 'psych' was built under R version 3.3.2
library(entropy)
library(knitr)
library(DT)
# Import Data
## Item
score <- read.csv("http://lang-tech.net/doc/sample.score.csv", header = TRUE, sep = ",")
## Answer key
key <- read.csv("http://lang-tech.net/doc/sample.key.csv", header = TRUE, sep =",")
key <- as.matrix(key)
myScore <- score(score, key, output.scored=TRUE)
基礎統計量
result <- describe(myScore$score)
result[,-1]
## n mean sd median trimmed mad min max range skew kurtosis se
## X1 241 9.44 3 9 9.34 2.97 1 19 18 0.26 -0.14 0.19
信頼性
alpha(myScore$scored, check.keys = FALSE)
## Some items ( V10 ) were negatively correlated with the total scale and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = myScore$scored, check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.54 0.53 0.57 0.054 1.1 0.042 0.47 0.15
##
## lower alpha upper 95% confidence boundaries
## 0.46 0.54 0.62
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## V1 0.54 0.53 0.57 0.056 1.12 0.043
## V2 0.54 0.53 0.57 0.056 1.13 0.043
## V3 0.55 0.54 0.58 0.058 1.18 0.042
## V4 0.55 0.54 0.58 0.058 1.17 0.042
## V5 0.54 0.53 0.57 0.056 1.13 0.043
## V6 0.49 0.49 0.53 0.047 0.94 0.047
## V7 0.54 0.54 0.57 0.058 1.16 0.042
## V8 0.54 0.53 0.57 0.056 1.13 0.043
## V9 0.53 0.52 0.56 0.055 1.10 0.043
## V10 0.56 0.56 0.59 0.062 1.25 0.040
## V11 0.53 0.52 0.56 0.054 1.09 0.044
## V12 0.53 0.53 0.57 0.055 1.12 0.043
## V13 0.52 0.51 0.55 0.052 1.04 0.045
## V14 0.52 0.51 0.55 0.053 1.06 0.044
## V15 0.52 0.52 0.56 0.053 1.07 0.044
## V16 0.50 0.49 0.53 0.049 0.97 0.046
## V17 0.47 0.47 0.50 0.044 0.88 0.049
## V18 0.49 0.48 0.52 0.047 0.93 0.048
## V19 0.54 0.53 0.57 0.057 1.14 0.043
## V20 0.54 0.53 0.57 0.056 1.13 0.043
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## V1 241 0.25 0.27 0.171 0.115 0.76 0.43
## V2 241 0.25 0.26 0.153 0.104 0.73 0.44
## V3 241 0.20 0.21 0.078 0.051 0.26 0.44
## V4 241 0.21 0.21 0.087 0.059 0.68 0.47
## V5 241 0.27 0.27 0.161 0.111 0.37 0.48
## V6 241 0.51 0.50 0.497 0.366 0.54 0.50
## V7 241 0.22 0.22 0.106 0.070 0.28 0.45
## V8 241 0.27 0.27 0.163 0.111 0.48 0.50
## V9 241 0.27 0.30 0.212 0.152 0.83 0.38
## V10 241 0.12 0.11 -0.044 -0.043 0.44 0.50
## V11 241 0.32 0.32 0.231 0.170 0.34 0.47
## V12 241 0.29 0.28 0.183 0.128 0.56 0.50
## V13 241 0.38 0.37 0.317 0.231 0.39 0.49
## V14 241 0.37 0.35 0.292 0.213 0.45 0.50
## V15 241 0.34 0.34 0.270 0.195 0.31 0.46
## V16 241 0.47 0.47 0.455 0.335 0.64 0.48
## V17 241 0.59 0.58 0.636 0.461 0.44 0.50
## V18 241 0.53 0.52 0.546 0.401 0.33 0.47
## V19 241 0.22 0.25 0.144 0.096 0.19 0.39
## V20 241 0.27 0.26 0.164 0.105 0.44 0.50
##
## Non missing response frequency for each item
## 0 1 miss
## [1,] 0.24 0.76 0
## [2,] 0.27 0.73 0
## [3,] 0.74 0.26 0
## [4,] 0.32 0.68 0
## [5,] 0.63 0.37 0
## [6,] 0.46 0.54 0
## [7,] 0.72 0.28 0
## [8,] 0.52 0.48 0
## [9,] 0.17 0.83 0
## [10,] 0.56 0.44 0
## [11,] 0.66 0.34 0
## [12,] 0.44 0.56 0
## [13,] 0.61 0.39 0
## [14,] 0.55 0.45 0
## [15,] 0.69 0.31 0
## [16,] 0.36 0.64 0
## [17,] 0.56 0.44 0
## [18,] 0.67 0.33 0
## [19,] 0.81 0.19 0
## [20,] 0.56 0.44 0
Z-得点, 偏差値, 5段階スケール, 9段階スケール
#Z-score
zscore <- scale(rowSums(myScore$scored), center = TRUE, scale = TRUE)
#T-score
tscore <- zscore*10+50
#five point scale
five.point.scale <- vector("numeric")
i <- 1
for (i in 1:length(myScore$score)){
five.point.scale[i] <- round(zscore[i]+3, 0)
if (five.point.scale[i] <= 0){
five.point.scale[i] <- 1}
if (five.point.scale[i] >= 6){
five.point.scale[i] <- 5
}
}
#stanine scale
stanine.scale <- vector("numeric")
i <- 1
for (i in 1:length(myScore$score)){
stanine.scale[i] <- round(1.96*zscore[i]+5, 0)
if (stanine.scale[i] <= 0){
stanine.scale[i] <- 1}
if (stanine.scale[i] >= 10){
stanine.scale[i] <- 9
}
}
#generate the table
p.id <- 1:length(myScore$score)
list1 <- data.frame(p.id, myScore$score, zscore, tscore, five.point.scale, stanine.scale)
datatable(round(list1,3), colnames = c("ID", "Score", "Z-score", "T-score", "Five.s", "Stanine.s"), rownames = FALSE)
項目困難度
diff <- colSums(myScore$scored)/nrow(score)
data.matrix(round(diff, 3))
## [,1]
## [1,] 0.759
## [2,] 0.734
## [3,] 0.257
## [4,] 0.680
## [5,] 0.365
## [6,] 0.539
## [7,] 0.278
## [8,] 0.481
## [9,] 0.830
## [10,] 0.436
## [11,] 0.336
## [12,] 0.560
## [13,] 0.390
## [14,] 0.448
## [15,] 0.311
## [16,] 0.639
## [17,] 0.436
## [18,] 0.332
## [19,] 0.191
## [20,] 0.436
上位下位項目弁別力指数
list2 <- cbind(myScore$scored, myScore$score)
sort.list <- list2[order(list2[,ncol(list2)], decreasing = TRUE),]
num <- round(length(myScore$score)*0.27, 0) #number of upper and lowwer groups
tU <- head(sort.list, n = num) #upper group scores
tL <- tail(sort.list, n = num) #lower group scores
ul <- vector("numeric")
i <- 1
for (i in 1:ncol(myScore$scored)){
pU <- sum(tU[,i]/nrow(tU))
pL <- sum(tL[,i]/nrow(tL))
ul[i] <- round(pU-pL, 3)
}
点双列相関指数による項目弁別力指数
disc <- vector("numeric")
i <- 1
for (i in 1:ncol(myScore$scored)){
gU <- subset(myScore$scored, myScore$scored[,i] == 1)
gL <- subset(myScore$scored, myScore$scored[,i] == 0 )
disc[i] <- round((sum(gU)/nrow(gU) - sum(gL)/nrow(gL)) / sd(myScore$score) *
sqrt(diff[i] * (1-diff[i])), 3)
}
実質選択肢数
aeno <- vector("numeric")
i <- 1
for (i in 1:ncol(score)){
per <- table(score[,i]) / nrow(score)
aeno[i] <- round(2^entropy.empirical(per, unit="log2"), 3)
}
項目困難度適切度
adif <- (1 - abs(((0.5 + 0.5 * (1 / nrow(summary(score))))-diff) * 2))
項目弁別力適切度
adis <- (disc^2) / (1 - (disc^2))
実質選択肢数適切度
最適平均情報量
q <- -(diff * (log10(diff)/log10(2)) + (nrow(summary(score))-1) *
(((1-diff) / (nrow(summary(score))-1))) * (log10((1-diff) /
(nrow(summary(score))-1)) / log10(2)))
最適実質選択肢数
pamc <- 2^q
実質選択肢数適切度
aaen <- (aeno-1) / (pamc-1)
標準項目困難度適切度
zadif <- (adif-mean(adif))/sd(adif)
sadif <- zadif*0.1+0.5
標準項目弁別力適切度
zadis <- (adis-mean(adis))/sd(adis)
sadis <- zadis*0.1+0.5
標準実質選択肢数適切度
zaaen <- (aaen-mean(aaen))/sd(aaen)
saaen <- zaaen*0.1+0.5
標準適切度の合計
satot <- sadif + sadis + saaen
標準適切度の一覧
i.id <- 1:ncol(myScore$scored)
list3 <- data.frame(i.id, diff, ul,disc, aeno, adif, adis, aaen, sadif, sadis, saaen, satot)
datatable(round(list3,3), colnames = c("ID", "DIFF", "pU-pL", "DISC", "AENO", "ADIF",
"ADIS", "AAEN", "SADIF", "SADIS", "SAAEN", "SATOT"), rownames = FALSE)