PREPARATION
setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/50")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/50/Feature Set 1: 50th Percentile.csv")
Features <- Features[-1]
RECODE LABELS FOR ONE-VS-ALL
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
PARTITIONING TRAINING & VALIDATION
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
Labels
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
SVM MODEL
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
VOTING
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.08719072 0.9128093 0.6532595 0.34674053 0.7062136 0.2937864
## 14 0.01180700 0.9881930 0.9280888 0.07191119 0.7086065 0.2913935
## 16 0.01375576 0.9862442 0.8973203 0.10267970 0.7945674 0.2054326
## 26 0.02268871 0.9773113 0.8764225 0.12357754 0.6575025 0.3424975
## 28 0.03345721 0.9665428 0.8645264 0.13547356 0.7017126 0.2982874
## 29 0.01339542 0.9866046 0.8479731 0.15202688 0.6016552 0.3983448
## Class 5: 0 Class5: 1
## 5 0.9267292 0.07327081
## 14 0.3743564 0.62564361
## 16 0.6028495 0.39715053
## 26 0.6906187 0.30938125
## 28 0.3981108 0.60188923
## 29 0.6404162 0.35958385
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.08719072 0.34674053 0.2937864 0.07327081
## 14 0.01180700 0.07191119 0.2913935 0.62564361
## 16 0.01375576 0.10267970 0.2054326 0.39715053
## 26 0.02268871 0.12357754 0.3424975 0.30938125
## 28 0.03345721 0.13547356 0.2982874 0.60188923
## 29 0.01339542 0.15202688 0.3983448 0.35958385
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.087190723 0.34674053 0.2937864 0.07327081 3 4
## 14 0.011807000 0.07191119 0.2913935 0.62564361 5 5
## 16 0.013755763 0.10267970 0.2054326 0.39715053 5 5
## 26 0.022688712 0.12357754 0.3424975 0.30938125 4 4
## 28 0.033457210 0.13547356 0.2982874 0.60188923 5 4
## 29 0.013395425 0.15202688 0.3983448 0.35958385 4 4
## 39 0.196424619 0.16682350 0.3305483 0.07171274 4 5
## 40 0.006311111 0.30569632 0.5451396 0.29860299 4 3
## 60 0.009279821 0.05138992 0.2935639 0.46618495 5 5
## 61 0.179890313 0.22045583 0.4306961 0.15076684 4 3
## 72 0.007454634 0.08264296 0.3406109 0.62751420 5 4
## 81 0.018878480 0.10829177 0.3599779 0.41379739 5 3
## 86 0.019742049 0.12449562 0.2473895 0.33521459 5 5
## 90 0.048318096 0.13455068 0.2927701 0.35427070 5 4
## 92 0.030525581 0.13163501 0.3294593 0.20216185 4 4
## 113 0.039657019 0.06546208 0.3793242 0.33748725 4 5
## 116 0.039035417 0.13822583 0.2411203 0.26170062 5 4
## 117 0.024185155 0.10315757 0.2945958 0.34320474 5 5
## 122 0.055870951 0.14284028 0.3208412 0.16039479 4 4
## 123 0.018546251 0.07987575 0.3593784 0.37782153 5 2
## 124 0.014624122 0.13188837 0.3685614 0.25046495 4 4
## 131 0.008747729 0.12314381 0.3189686 0.42790283 5 4
## 135 0.424681902 0.20665430 0.4081185 0.05310929 2 3
## 137 0.009892244 0.07982538 0.3424585 0.32289456 4 5
## 140 0.016888840 0.15473249 0.3119504 0.30996126 4 4
## 142 0.017254054 0.08464047 0.3407115 0.54746410 5 5
## 149 0.021690042 0.07130429 0.3475125 0.60975894 5 4
## 154 0.034984139 0.10419262 0.2561026 0.54465964 5 5
## 156 0.143248797 0.08069949 0.4185158 0.21076247 4 3
## 158 0.394893526 0.22903289 0.5081642 0.01038736 4 3
## 169 0.014495903 0.07554127 0.2713229 0.61153067 5 5
## 185 0.013712189 0.10453185 0.2588369 0.46834796 5 5
## 187 0.008148973 0.08842511 0.3911020 0.50000000 5 5
## 192 0.020031116 0.16494116 0.5000000 0.13787716 4 3
## 194 0.023643895 0.20241996 0.3556112 0.37850745 5 4
## 195 0.024737539 0.14734805 0.2626312 0.45965253 5 4
## 196 0.222638752 0.12634305 0.4905218 0.11358779 4 5
## 197 0.452086773 0.09770692 0.3192206 0.05439899 2 3
## 199 0.007260424 0.10777315 0.2342020 0.74997509 5 5
## 210 0.160304495 0.12829783 0.3357897 0.16630417 4 3
## 216 0.018609392 0.06880404 0.1825177 0.77329708 5 5
## 220 0.020241747 0.21491396 0.3521250 0.23026153 4 4
## 227 0.419086669 0.03236850 0.5773028 0.06430521 4 5
## 234 0.025178301 0.13803935 0.4461902 0.33750258 4 3
## 240 0.028699226 0.09175029 0.3993332 0.37079990 4 5
## 245 0.029194343 0.10769732 0.3095234 0.52019818 5 4
## 249 0.021154075 0.12665268 0.3205186 0.49414718 5 5
## 261 0.023766742 0.13196847 0.3058559 0.49070577 5 3
## 277 0.011715167 0.07709940 0.2596686 0.81414540 5 5
## 283 0.017841680 0.11310118 0.2965839 0.45184630 5 5
## 290 0.013574748 0.09101754 0.2289714 0.79085941 5 4
## 293 0.012712981 0.09598305 0.3746080 0.40104324 5 5
## 302 0.013689316 0.13656948 0.3092763 0.28380868 4 4
## 305 0.021805079 0.09685378 0.3312502 0.60366342 5 4
## 308 0.032633038 0.11528220 0.2498620 0.57709748 5 4
## 311 0.013165131 0.08159990 0.2569642 0.61955179 5 5
## 320 0.017465563 0.09121525 0.2647106 0.73048706 5 2
## 322 0.018638586 0.06814609 0.2253928 0.79188459 5 5
## 330 0.013275525 0.07981217 0.2234993 0.78605074 5 4
## 332 0.020166744 0.11849290 0.3867359 0.36192044 4 4
## 333 0.018759835 0.08359774 0.2896525 0.75434196 5 5
## 339 0.018179899 0.10296821 0.2619691 0.60926468 5 5
## 341 0.018292240 0.10612312 0.3934318 0.44579202 5 4
## 344 0.021341740 0.07212949 0.3203790 0.72006837 5 5
## 349 0.014001532 0.08492201 0.1947592 0.75826662 5 5
## 355 0.015265601 0.07198040 0.2363252 0.83537514 5 5
## 356 0.028123219 0.08848581 0.2922649 0.63285523 5 3
## 365 0.012619262 0.13584438 0.2759820 0.50000000 5 3
## 366 0.012319246 0.11096464 0.2965364 0.40054229 5 4
## 369 0.010979868 0.09340020 0.3176367 0.36998478 5 4
## 371 0.014943784 0.09588605 0.2338813 0.70444867 5 5
## 373 0.014496675 0.08194890 0.3158659 0.66348992 5 5
## 389 0.029987100 0.10151516 0.3030418 0.55199328 5 2
## 390 0.030132556 0.12109115 0.2842531 0.58952905 5 4
## 396 0.015658229 0.07676180 0.3559792 0.48390414 5 4
## 412 0.007122523 0.09890038 0.3332388 0.51537732 5 5
## 413 0.027048481 0.10407992 0.3182199 0.14091527 4 3
## 415 0.011719684 0.10329810 0.3534772 0.50000000 5 4
## 422 0.109850671 0.10581676 0.4182783 0.35684234 4 5
## 425 0.016338304 0.07310734 0.2219466 0.86707020 5 5
## 434 0.018452671 0.06076072 0.3235348 0.28344823 4 5
## 438 0.008467501 0.08938608 0.2847070 0.68898377 5 4
## 441 0.234282919 0.06752392 0.4403016 0.07220685 4 5
## 442 0.027822548 0.08958690 0.2286234 0.70003086 5 5
## 445 0.020369822 0.09381332 0.3947092 0.31026705 4 5
## 447 0.047790350 0.14192253 0.3643009 0.12249222 4 3
## 453 0.032865653 0.15616542 0.4310806 0.43529885 5 4
## 454 0.066676011 0.11678189 0.3075200 0.08321992 4 5
## 462 0.006720221 0.07100283 0.2426965 0.67700698 5 5
## 474 0.034877260 0.10099860 0.3127425 0.50839483 5 3
## 476 0.020068810 0.13604011 0.2818025 0.43678694 5 3
## 493 0.011175347 0.10555166 0.2930956 0.52683188 5 5
## 502 0.012279677 0.17788151 0.4513695 0.32824283 4 4
## 503 0.087956366 0.04518004 0.3202322 0.41206991 5 5
## 506 0.018260901 0.10556564 0.2703494 0.61870380 5 5
## 508 0.015700025 0.06528105 0.3405290 0.75318844 5 5
## 512 0.024807770 0.09440417 0.2564118 0.69445518 5 5
## 513 0.014695971 0.11700314 0.2505463 0.42398297 5 5
## 521 0.054324723 0.08809108 0.2774292 0.65850736 5 2
## 524 0.028208828 0.12365440 0.2973006 0.31540082 5 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 1 6
## 3 2 0 11 12
## 4 1 1 20 40
## 5 0 0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5673077
Precision
## 2
## 0.5673077
Recall
## 2
## 0.4685035