simlation setting
# n students and n graders
n = 500
#every one graded by p people
p = 10
#outcome category number (e.g. 1-5)
k = 5
# parameters to be estimated
ability = runif(n,0,5)
bias = runif(n,-1,1)
reliable = runif(n,0,1) # or gamma
true_value <- list(ability=ability,
bias=bias,
reliable = reliable)
Student_matrix = matrix(0,n,p)
Grader_matrix = matrix(0,n,p)
Obs_Score = matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
Student_matrix[ii,jj] <- ii
}
}
for(jj in 1:p){
Grader_matrix[,jj] <- sample(1:n,n)
}
#Grader_matrix <- matrix(c(2,1,1,3,3,2),3,2)
#ab = ability; bi = bias, re = reliability, k = catefory of possible outcome
calc_prob<-function(ab,bi,re,k){
tmp<- exp( -((1:k) - (ab + bi))^2 * re )
tmp / sum(tmp)
}
### generate obs data
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
tmp<- rmultinom(1, 1, calc_prob(ability[id_student],
bias[id_grader],
reliable[id_grader],k) )
Obs_Score[ii,jj] = which(tmp==1)
}
}
### calc derivative
d_function<-function(ab,bi,re,k,obs_s){
# ab<-ability[ii]
# bi<-bias[jj]
# re<-reliable[jj]
# k
# obs_s<-Obs_Score[ii,jj]
tmp0 <- -((1:k) - (ab + bi))^2 * re
d1_tmp0 <- ((1:k) - (ab + bi))*2 * re
d2_tmp0 <- ((1:k) - (ab + bi))^2 * (-1)
tmp1<- exp( tmp0 )
d1_tmp1<- exp( tmp0 ) * ((1:k) - (ab + bi))*2 * re
d2_tmp1<- exp( tmp0 ) * ((1:k) - (ab + bi))^2 * (-1)
return( c(d1_tmp0[obs_s] - sum(d1_tmp1) / sum(tmp1),
d2_tmp0[obs_s] - sum(d2_tmp1) / sum(tmp1) ) )
}
calc_likelihood<-function(ab,bi,re,k,obs_s){
tmp0 <- -((1:k) - (ab + bi))^2 * re
tmp1<- exp( tmp0 )
return( tmp0[obs_s] - log(sum(tmp1)) )
}
calc_overall_likelihood<-function(){
Likely <- matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
Likely[ii,jj]<- calc_likelihood(ability[id_student],bias[id_grader],
reliable[id_grader],k,Obs_Score[ii,jj])
}
}
return(sum(Likely))
}
Calc_derivative <- function(){
d_matrix_1 <- matrix(0,n,p)
d_matrix_2 <- matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
res<- d_function(ability[id_student],bias[id_grader],
reliable[id_grader],k,Obs_Score[ii,jj])
d_matrix_1[ii,jj] <- res[1]
d_matrix_2[ii,jj] <- res[2]
}
}
d_ability <- rep(0,n)
d_bias <- rep(0,n)
d_reliable <- rep(0,n)
### Assume first student's ability = 0
for(ii in 2:n){
d_ability[ii] <- sum(d_matrix_1[which(Student_matrix==ii)])
}
for(jj in 1:n){
d_bias[jj] <- sum(d_matrix_1[which(Grader_matrix==jj)])
d_reliable[jj] <- sum(d_matrix_2[which(Grader_matrix==jj)])
}
return(list(d_ability = d_ability,
d_bias = d_bias,
d_reliable = d_reliable))
}
Ground truth
ability <- true_value$ability
bias <- true_value$bias
reliable <- true_value$reliable
true_likelihood <- calc_overall_likelihood()
initialization
set.seed(123)
ability = runif(n,0,5)
ability[1] <- 0
bias = runif(n,-1,1)
reliable = runif(n,0,1) # or gamma
initialization
### SGD (can also try ADAM or other optimization algorithm)
max_it <- 1000
alpha<- 0.1
record<-rep(0,max_it)
# for(ite in 1:max_it){
# res<- Calc_derivative()
# ability = ability + res$d_ability * alpha / sqrt(ite)
# bias = bias + res$d_bias * alpha/ sqrt(ite)
# reliable = reliable + res$d_reliable * alpha/ sqrt(ite)
# record[ite] <- calc_overall_likelihood()
# }
beta1<- 0.9
gamma <- 0.01
B<- 1
for(ite in 1:max_it){
res<- Calc_derivative()
beta2 <- B / (B + ite^(-gamma))
B <- B + ite^(-gamma)
m1<-rep(0,n);v1<-rep(0,n)
m2<-rep(0,n);v2<-rep(0,n)
m3<-rep(0,n);v3<-rep(0,n)
m1 <- beta1 * m1 + (1-beta1) * res$d_ability
v1 <- beta2 * v1 + (1-beta2) * (res$d_ability)^2
m2 <- beta1 * m2 + (1-beta1) * res$d_bias
v2 <- beta2 * v2 + (1-beta2) * (res$d_bias)^2
m3 <- beta1 * m3 + (1-beta1) * res$d_reliable
v3 <- beta2 * v3 + (1-beta2) * (res$d_reliable)^2
ability = ability + m1/(sqrt(v1) + 1e-8)* (1-beta2^ite)/(1-beta1^ite)* alpha/sqrt(ite)
bias = bias + m2/(sqrt(v2) + 1e-8)* (1-beta2^ite)/(1-beta1^ite)* alpha/sqrt(ite)
reliable = reliable + m3/(sqrt(v3) + 1e-8)* (1-beta2^ite)/(1-beta1^ite)* alpha/sqrt(ite)
#reliable[reliable<0.01] <- 0.01
#ability
#ite<-ite + 1
record[ite] <- calc_overall_likelihood()
}
plot((record[-(1:100)]))

max(record)
## [1] -4897.087
true_likelihood
## [1] -5738.26
MSE and MAE (not important, because outliers can be extreme)
### MSE
mean((ability - true_value$ability + bias - true_value$bias )^2)
## [1] 5.248655
mean((ability - true_value$ability )^2)
## [1] 0.6474958
mean((bias - true_value$bias)^2)
## [1] 4.656846
mean((reliable - true_value$reliable)^2)
## [1] 4.081671
mean(abs(ability - true_value$ability + bias - true_value$bias ))
## [1] 1.392053
mean(abs(ability - true_value$ability ))
## [1] 0.5537557
mean(abs(bias - true_value$bias))
## [1] 1.152281
True_mean <- matrix(0,n,p)
est_mean <- matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
True_mean[ii,jj] <- true_value$ability[id_student] + true_value$bias[id_grader]
est_mean[ii,jj] <- ability[id_student] + bias[id_grader]
}
}
plot(density(True_mean),main="ability + bias")
lines(density(est_mean),col=2)

mean((est_mean - True_mean)^2)
## [1] 5.234901
###
calc_unit_variance<-function(ab,bi,re,k){
# ab<-ability[id_student]
# bi<-bias[id_grader]
# re<-reliable[id_grader]
# k
prob <- calc_prob(ab,bi,re,k)
tmp1 <- ((1:k) - (ab + bi)) * prob
tmp2 <- ((1:k) - (ab + bi))^2 * prob
tmp3 <- ((1:k) - (ab + bi))^3 * prob
tmp4 <- ((1:k) - (ab + bi))^4 * prob
res<- rep(0,3)
res[1]<- (sum(tmp2) - (sum(tmp1))^2) * re^2
res[2]<- (sum(tmp3) - (sum(tmp1)* sum(tmp2))) * re
res[3]<- sum(tmp4) - (sum(tmp2))^2
res[2] <- - res[2]
return(res)
}
Calc_variance <- function(){
v_matrix_1 <- matrix(0,n,p)
v_matrix_2 <- matrix(0,n,p)
v_matrix_3 <- matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
res<- calc_unit_variance(ability[id_student],bias[id_grader],
reliable[id_grader],k)
v_matrix_1[ii,jj] <- res[1]
v_matrix_2[ii,jj] <- res[2]
v_matrix_3[ii,jj] <- res[3]
}
}
V_est<- matrix(0,3*n,3*n)
V_count<- matrix(0,3*n,3*n)
for(ii in 1:n){
V_est[ii,ii] <- sum(v_matrix_1[which(Student_matrix==ii)])
}
for(jj in 1:n){
V_est[n+jj,n+jj] <- sum(v_matrix_1[which(Grader_matrix==jj)])
}
for(jj in 1:n){
V_est[2*n+jj,2*n+jj] <- sum(v_matrix_3[which(Grader_matrix==jj)])
}
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj]
# derivative of ab,bi
V_est[id_student,n+id_grader] <- V_est[id_student,n+id_grader] + v_matrix_1[ii,jj]
V_est[n+id_grader,id_student] <- V_est[n+id_grader,id_student] + v_matrix_1[ii,jj]
# derivative of ab,re
V_est[id_student,2*n+id_grader] <- V_est[id_student,2*n+id_grader] + v_matrix_2[ii,jj]
V_est[2*n+id_grader,id_student] <- V_est[2*n+id_grader,id_student] + v_matrix_2[ii,jj]
# derivative of bi,re
V_est[n+id_grader,2*n+id_grader] <- V_est[n+id_grader,2*n+id_grader] + v_matrix_2[ii,jj]
V_est[2*n+id_grader,n+id_grader] <- V_est[2*n+id_grader,n+id_grader] + v_matrix_2[ii,jj]
}
}
return(V_est)
#return(V_count)
}
v0 <- Calc_variance()
v0<- v0[-1,-1]
### get inverse, when full rank
#v1 <- solve(v0)
### get inverse, when not full rank
svd0<- svd(v0)
d <- 1 / svd0$d
d[d>1e3] <- 0
v1<- svd0$u %*% diag(d) %*% t(svd0$v)
tmp<-matrix(0,3*n,3*n)
tmp[-1,-1] <- v1
v1<-tmp
True_mean <- matrix(0,n,p)
est_mean <- matrix(0,n,p)
est_sd <- matrix(0,n,p)
for(ii in 1:n){
for(jj in 1:p){
id_student<- Student_matrix[ii,jj] #studnet
id_grader<- Grader_matrix[ii,jj] # grader
True_mean[ii,jj] <- true_value$ability[id_student] + true_value$bias[id_grader]
est_mean[ii,jj] <- ability[id_student] + bias[id_grader]
chosen <- c(id_student, n+id_grader)
est_sd[ii,jj] <- sqrt(sum(v1[chosen, chosen]))
}
}
True_mean <- rep(0,n)
est_mean <- rep(0,n)
est_sd <- rep(0,n)
for(ii in 1:n){
True_mean[ii] <- true_value$ability[ii] + true_value$bias[ii]
est_mean[ii] <- ability[ii] + bias[ii]
chosen <- c(ii, n+ii)
est_sd[ii] <- sqrt(sum(v1[chosen, chosen]))
}
res<-(est_mean - True_mean)
cat("n sample, every one judge himself\n")
## n sample, every one judge himself
cat("coverage rate (95%CI): ",mean( (res/est_sd<1.96) & (res/est_sd>-1.96) ))
## coverage rate (95%CI): 0.928
sd(res/est_sd)
## [1] 2.587068
plot(density(res/est_sd),main="standardized residual")

sd_est<- sqrt(diag(v1))
sd_reliable <- sd_est[2*n+(1:n)]
tmp1<- (true_value$reliable < reliable + 1.96 * sd_reliable)
tmp2<- (true_value$reliable > reliable - 1.96 * sd_reliable)
plot(density(((true_value$reliable - reliable) / sd_reliable)),main="reliable")

cat("coverage rate (95%CI): ",mean(tmp1&tmp2))
## coverage rate (95%CI): 0.814