setwd("/Users/adarsa/ilimi/github/Data-Science/explore/adhoc/NDDT")
s3_loc = "s3://lpdev-ekstep/Data-Sciences/explore/adhoc/NDDT/"
nddt_file_name = "NDDT_Responses.csv"
nddt_file_s3 = paste(s3_loc,nddt_file_name,sep="")
cmd <- system(paste("aws s3 cp ",nddt_file_s3,nddt_file_name,sep=" "))
df <- read.csv(nddt_file_name)
score.df <- df[,-c(1:8)]
score.df <- round(score.df)
qid<-colnames(score.df)
sid<-unique(df$Child.ID)
score.df$student<-df$Child.ID
test.df <- melt(score.df,id.vars="student",value.name="score",variable.name ="qid", na.rm=FALSE)
test.df[which(test.df$score<0.5),]$score <- -1
#get only attempted <qid sid>
train_index <- which(!is.na(test.df$score))
train.df <- test.df[train_index,]
#assign zeros to missing values. Handled by libFM
test.df$score[which(is.na(test.df$score))]<-0
test_features_df<-dummy.data.frame(as.data.frame(test.df), names=c("student","qid"), sep="_")
train_features_df<-test_features_df[train_index,]
write.csv(train.df,"libfm_train",row.names=FALSE)
write.csv(test.df,"libfm_test",row.names=FALSE)
system("./triple_format_to_libfm.pl --in libfm_train,libfm_test --header 1 --target_column 2 --separator \",\" ")
system(" ./libFM -train libfm_train.libfm -test libfm_test.libfm -dim '1,1,10' -init_stdev 0.01 -iter 1000 -method 'als' -task c -regular '1,10,10' -rlog 'rlogout' -out 'libfm.output_als' -learn_rate 0.1 -seed 1234 -save_model 'fm.model_als' ")
libfm_output<-read.csv("libfm.output_als",header = FALSE)
libfm_out_pred<-libfm_output[train_index,]

pred<-prediction(libfm_out_pred,train.df$score)
#"ppv","sens","rpp" "acc", "rec" ,"tpr"
#recall precision
perf<- ROCR:::performance(pred,"rec","prec")
#precision recall
EvalParams1<-list((as.data.frame(perf@x.values))[-1,], (as.data.frame(perf@y.values))[-1,],(as.data.frame(perf@alpha.values))[-1,])
names(EvalParams1)<-c(perf@x.name,perf@y.name, perf@alpha.name)
plot_ly(data = EvalParams1, x = Recall, y =Precision ,text=paste("threshold:",Cutoff))
#tpr fpr
perf<- ROCR:::performance(pred,"tpr","fpr")
perf1<- ROCR:::performance(pred,"tpr","acc")
EvalParams2<-list((as.data.frame(perf@x.values))[-1,], (as.data.frame(perf@y.values))[-1,],(as.data.frame(perf@alpha.values))[-1,],(as.data.frame(perf1@y.values))[-1,])
names(EvalParams2)<-c("FalsePositiveRate","TruePositiveRate","Cutoff", "Accuracy")
plot_ly(data = EvalParams2, x = FalsePositiveRate, y = TruePositiveRate ,text=paste("threshold:",Cutoff,"Accuracy:",Accuracy))
## l1q1 l1q2 l1q3 l1q4 l1q5 l1q6
## 0.48339844 0.48144531 0.29199219 0.48828125 0.39941406 0.27636719
## l1q7 l1q8 l1q9 l1q10 l1q11 l1q12
## 0.51660156 0.53710938 0.42089844 0.32324219 0.50000000 0.51855469
## l2q1 l2q2 l2q3 l2q4 l2q5 l2q6
## 0.59765625 0.22753906 0.37402344 0.24121094 0.61328125 0.50000000
## l2q7 l2q8 l2q9 l2q10 l2q11 l2q12
## 0.50585938 0.40039062 0.32031250 0.57128906 0.64453125 0.38281250
## l3q1 l3q2 l3q3 l3q4 l3q5 l3q6
## 0.10839844 0.42871094 0.28417969 0.41992188 0.17089844 0.16796875
## l3q7 l3q8 l3q9 l3q10 l3q11 l3q12
## 0.21386719 0.14062500 0.21777344 0.34179688 0.11230469 0.48925781
## l4q1 l4q2 l4q3 l4q4 l4q5 l4q6
## 0.18457031 0.05078125 0.09863281 0.25390625 0.09082031 0.10156250
## l4q7 l4q8 l4q9 l4q10 l4q11 l4q12
## 0.21875000 0.26660156 0.27929688 0.07617188 0.27636719 0.32324219
## l5q1 l5q2 l5q3 l5q4 l5q5 l5q6
## 0.17675781 0.04101562 0.01855469 0.08398438 0.03027344 0.10937500
## l5q7 l5q8 l5q9 l5q10 l5q11 l5q12
## 0.08593750 0.04492188 0.02929688 0.08105469 0.03710938 0.03125000
threshold=0.64635 #corresponding to fpr of 5%
libfm_out_hardclass<-matrix(0,nrow = dim(libfm_output)[1], ncol = 1)
libfm_out_hardclass[which(libfm_output>threshold)]<-1
libfm_out_hardclass[which(libfm_output<=threshold)]<--1
train_pred<-libfm_out_hardclass[train_index]
tpr<-matrix(0,nrow = length(question_perf$question), ncol = 1)
fpr<-matrix(0,nrow = length(question_perf$question), ncol = 1)
tnr<-matrix(0,nrow = length(question_perf$question), ncol = 1)
fnr<-matrix(0,nrow = length(question_perf$question), ncol = 1)
i=1
for(question_id in question_perf$question){
stud_answered_pred<-train_pred[which(train.df$qid==question_id)]
stud_answered_actual<-train.df$score[which(train.df$qid==question_id)]
TruePositive<-length(which(stud_answered_pred[which(stud_answered_actual==1)]==1))
FalsePositive<-length(which(stud_answered_pred[which(stud_answered_actual==1)]==-1))
TrueNegetive<-length(which(stud_answered_pred[which(stud_answered_actual==-1)]==-1))
FalseNegetive<-length(which(stud_answered_pred[which(stud_answered_actual==-1)]==1))
tpr[i]=TruePositive/length(which(stud_answered_actual==1))
fpr[i]=FalsePositive/length(which(stud_answered_actual==-1))
tnr[i]=TrueNegetive/length(which(stud_answered_actual==-1))
fnr[i]=FalseNegetive/length(which(stud_answered_actual==1))
i=i+1
}
plot(tpr, type="l",col=1, lty=2)
lines(tnr, type="l" ,col=3, lty=4)
legend(50, 0.8, c("tpr", "tnr"), col = c(1, 3),text.col = "green4", lty = c(2, 4 ), bg = "gray90")

plot(fpr, type="l",col=1, lty=2)
lines(fnr, type="l" ,col=10, lty=5)
legend(5, .5, c("fpr", "fnr"), col = c(1, 10),text.col = "green4", lty = c(2, 4 ), bg = "gray90")

#print(TruePositive/length(which(stud_answered_actual==1)))
#print(FalsePositive/length(which(stud_answered_actual==-1)))
#print(FalseNegetive/length(which(stud_answered_actual==1)))
#print(TrueNegetive/length(which(stud_answered_actual==-1)))
cos_dist<-as.matrix(dist(ques_unary_inter_Wj.als$question_coef, method = "manhattan", diag = TRUE, upper = FALSE, p = 2))
#cos_dist<-cos_dist/norm(as.matrix(ques_unary_inter_Wj.als$question_coef))
plot_ly(z = cos_dist,x=ques_unary_inter_Wj.als$quest_num, y=ques_unary_inter_Wj.als$quest_num, type = "heatmap")
#cos_dist<-((unary_inter_Wj.als)%*% t(unary_inter_Wj.als))/norm(as.matrix(unary_inter_Wj.als))
#library(heatmaply)
#heatmaply(as.data.frame(cos_dist), k_col = 2, k_row = 3,na.value = "grey50",row_dend_left=TRUE, return_ppxpy = TRUE) %>% layout(margin = list(l = 130, b = 40))
Nodes<-data.frame(id=c(qid))
Nodes$label<-c(qid)
Nodes$group<-level[1015:1074]
Nodes$title<-qid
Links <- data.frame(value=cos_dist[lower.tri(cos_dist, diag = FALSE)])
elements<-which(lower.tri(cos_dist, diag = FALSE)==TRUE)
row_val<-elements%%60
row_val[row_val==0]<-60
Links$from=qid[row_val]
col_val<-((elements-1)%/%60)+1
Links$to=qid[col_val]
#subset links
sub_Links<-Links[Links$value>1.4,]
gr<-visNetwork(nodes=Nodes, edges=sub_Links) %>% visNodes(shadow = TRUE)%>% visOptions(selectedBy = "group",highlightNearest= list(enabled =TRUE, degree = 2, hover = T), nodesIdSelection = TRUE) %>% visLegend()
gr %>% visSave(file ="LibFMCoeffGraph.html")
htmltools::includeHTML("LibFMCoeffGraph.html")