PREPARATION
setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/10-30")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/10-30/Feature Set 1: 10 to 30th Percentile.csv")
Features <- Features[-1]
RECODE LABELS FOR ONE-VS-ALL
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
PARTITIONING TRAINING & VALIDATION
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
Labels
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
NAIVE BAYES MODEL
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
VOTING
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.08547205 0.9145280 0.6566529 0.34334707 0.7036284 0.2963716
## 14 0.01202274 0.9879773 0.9280069 0.07199312 0.7053650 0.2946350
## 16 0.01387237 0.9861276 0.9000704 0.09992963 0.7898209 0.2101791
## 26 0.02378320 0.9762168 0.8781510 0.12184900 0.6606955 0.3393045
## 28 0.03266428 0.9673357 0.8677411 0.13225894 0.7018990 0.2981010
## 29 0.01441988 0.9855801 0.8456658 0.15433422 0.6010457 0.3989543
## Class 5: 0 Class5: 1
## 5 0.9152238 0.08477624
## 14 0.3813637 0.61863629
## 16 0.6074497 0.39255031
## 26 0.6867812 0.31321883
## 28 0.3855213 0.61447875
## 29 0.6277253 0.37227473
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.08547205 0.34334707 0.2963716 0.08477624
## 14 0.01202274 0.07199312 0.2946350 0.61863629
## 16 0.01387237 0.09992963 0.2101791 0.39255031
## 26 0.02378320 0.12184900 0.3393045 0.31321883
## 28 0.03266428 0.13225894 0.2981010 0.61447875
## 29 0.01441988 0.15433422 0.3989543 0.37227473
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.085472045 0.34334707 0.2963716 0.08477624 3 4
## 14 0.012022744 0.07199312 0.2946350 0.61863629 5 5
## 16 0.013872369 0.09992963 0.2101791 0.39255031 5 5
## 26 0.023783204 0.12184900 0.3393045 0.31321883 4 4
## 28 0.032664282 0.13225894 0.2981010 0.61447875 5 4
## 29 0.014419876 0.15433422 0.3989543 0.37227473 4 4
## 39 0.214897836 0.16504593 0.3281248 0.07753896 4 5
## 40 0.006580473 0.29452482 0.5508210 0.30724048 4 3
## 60 0.008640354 0.05206319 0.2942697 0.45646939 5 5
## 61 0.181904126 0.20897992 0.4296970 0.16616735 4 3
## 72 0.007192594 0.08413272 0.3415113 0.62850788 5 4
## 81 0.020484066 0.10690055 0.3557239 0.41802948 5 3
## 86 0.020489219 0.11915166 0.2501746 0.34274197 5 5
## 90 0.051132109 0.12971481 0.2931121 0.37581182 5 4
## 92 0.033347181 0.13386216 0.3315232 0.22201250 4 4
## 113 0.038705935 0.06405057 0.3793537 0.35486170 4 5
## 116 0.041550333 0.13351331 0.2389954 0.27463601 5 4
## 117 0.023782370 0.10184144 0.2966695 0.36489499 5 5
## 122 0.054083770 0.13918421 0.3212767 0.18444421 4 4
## 123 0.018347076 0.08137488 0.3604422 0.39340657 5 2
## 124 0.014972835 0.13617127 0.3744226 0.26322526 4 4
## 131 0.008802068 0.12162820 0.3152965 0.43855962 5 4
## 135 0.456954819 0.19349504 0.4184925 0.06130111 2 3
## 137 0.009984332 0.08056198 0.3418051 0.33971073 4 5
## 140 0.016824634 0.15344805 0.3150152 0.32915780 5 4
## 142 0.016991974 0.08716409 0.3396473 0.55313462 5 5
## 149 0.021124401 0.07403776 0.3573193 0.60758237 5 4
## 154 0.035826296 0.10181066 0.2641018 0.55244277 5 5
## 156 0.157193100 0.08268927 0.4175070 0.22495676 4 3
## 158 0.404103541 0.21255899 0.5132562 0.01203972 4 3
## 169 0.014200585 0.07587525 0.2725372 0.60387709 5 5
## 185 0.013430051 0.10210540 0.2606621 0.46893370 5 5
## 187 0.008486077 0.08990863 0.3910884 0.50772123 5 5
## 192 0.020639217 0.16750610 0.4932324 0.15891544 4 3
## 194 0.022757812 0.19589223 0.3544082 0.40517911 5 4
## 195 0.024349472 0.14849541 0.2686432 0.46940476 5 4
## 196 0.222374807 0.12140614 0.5000000 0.13323739 4 5
## 197 0.447163601 0.09992311 0.3276895 0.06341907 2 3
## 199 0.007279882 0.10661845 0.2388612 0.72779372 5 5
## 210 0.147868566 0.12432114 0.3433263 0.19398922 4 3
## 216 0.018691633 0.06856213 0.1834996 0.75393596 5 5
## 220 0.020611712 0.20758960 0.3498181 0.25168890 4 4
## 227 0.403401285 0.03497746 0.5758734 0.07332662 4 5
## 234 0.026176803 0.13390870 0.4459418 0.35496653 4 3
## 240 0.028718439 0.09162028 0.3969969 0.39106503 4 5
## 245 0.028457220 0.10816393 0.3099416 0.54065736 5 4
## 249 0.020881628 0.12349432 0.3191613 0.50738096 5 5
## 261 0.023632432 0.12858040 0.3036343 0.50588311 5 3
## 277 0.011655698 0.07808109 0.2589843 0.80054827 5 5
## 283 0.017940068 0.11118687 0.2976560 0.45984296 5 5
## 290 0.013224233 0.09171076 0.2293738 0.78215469 5 4
## 293 0.013691091 0.09653867 0.3698898 0.42306715 5 5
## 302 0.014213975 0.13850332 0.3130927 0.29482417 4 4
## 305 0.020591923 0.09737659 0.3312859 0.61719458 5 4
## 308 0.031162989 0.11281071 0.2507979 0.59036412 5 4
## 311 0.013283258 0.08225982 0.2589666 0.61431153 5 5
## 320 0.016871752 0.09188037 0.2647275 0.72838496 5 2
## 322 0.017526514 0.07016695 0.2257918 0.78080316 5 5
## 330 0.012678855 0.08034923 0.2253681 0.77579685 5 4
## 332 0.020404921 0.11747009 0.3858219 0.38671948 5 4
## 333 0.018411626 0.08455076 0.2884456 0.74975231 5 5
## 339 0.018166760 0.10293504 0.2612546 0.61411164 5 5
## 341 0.017392644 0.10705352 0.3921894 0.47259762 5 4
## 344 0.020577260 0.07478555 0.3177792 0.71941995 5 5
## 349 0.014134195 0.08531594 0.1962472 0.74499679 5 5
## 355 0.014832226 0.07320404 0.2361999 0.82367480 5 5
## 356 0.027591983 0.08814719 0.2924420 0.63883561 5 3
## 365 0.012503572 0.13257245 0.2766179 0.51344852 5 3
## 366 0.012408447 0.11315536 0.2942943 0.41119021 5 4
## 369 0.011160990 0.09329207 0.3155697 0.38887314 5 4
## 371 0.014854994 0.09580901 0.2333518 0.69477685 5 5
## 373 0.014225151 0.08380653 0.3143158 0.65700304 5 5
## 389 0.028696589 0.10175898 0.3028918 0.56769017 5 2
## 390 0.028359136 0.11905881 0.2837544 0.60391241 5 4
## 396 0.014792359 0.07861171 0.3540121 0.50000000 5 4
## 412 0.007107856 0.10089574 0.3292915 0.51500789 5 5
## 413 0.028524740 0.10718546 0.3224879 0.15729573 4 3
## 415 0.012351525 0.10009868 0.3509467 0.51021872 5 4
## 422 0.105956950 0.11175781 0.4160585 0.36857117 4 5
## 425 0.015550719 0.07331124 0.2221505 0.85193774 5 5
## 434 0.018481696 0.06169364 0.3220067 0.28948831 4 5
## 438 0.008219456 0.09034836 0.2825791 0.67686970 5 4
## 441 0.220532445 0.06914151 0.4446927 0.08077281 4 5
## 442 0.028671085 0.08850723 0.2300474 0.69419592 5 5
## 445 0.021316453 0.09140980 0.3933320 0.31816095 4 5
## 447 0.046022293 0.14359846 0.3658527 0.13529684 4 3
## 453 0.031732155 0.15349707 0.4385595 0.45959327 5 4
## 454 0.067018163 0.12013318 0.3076758 0.09432540 4 5
## 462 0.006810181 0.07227828 0.2422851 0.65816547 5 5
## 474 0.033052234 0.10405238 0.3206793 0.51684040 5 3
## 476 0.019980161 0.13880975 0.2807088 0.44191476 5 3
## 493 0.011559209 0.10624582 0.2877772 0.53667794 5 5
## 502 0.011417896 0.17389026 0.4517280 0.35053653 4 4
## 503 0.091765073 0.04655090 0.3390371 0.41392192 5 5
## 506 0.018319346 0.10784108 0.2702484 0.62165168 5 5
## 508 0.015247765 0.06820652 0.3392760 0.74783150 5 5
## 512 0.024480434 0.09995428 0.2619933 0.68803012 5 5
## 513 0.016651752 0.11204315 0.2530864 0.42771423 5 5
## 521 0.052057617 0.08933317 0.2802501 0.66251229 5 2
## 524 0.028316410 0.12164892 0.2989574 0.34116661 5 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 1 6
## 3 2 0 11 12
## 4 1 1 18 42
## 5 0 0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5576923
Precision
## 2
## 0.5576923
Recall
## 2
## 0.4565852