PREPARATION
setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/70")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/70/Feature Set 1 70th Percentile.csv")
Features <- Features[-1]
RECODE LABELS FOR ONE-VS-ALL
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
PARTITIONING TRAINING & VALIDATION
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
Labels
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
SVM MODEL
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
VOTING
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.06999499 0.9300050 0.6367846 0.36321541 0.7164152 0.2835848
## 14 0.01165801 0.9883420 0.9313712 0.06862882 0.7138358 0.2861642
## 16 0.01419389 0.9858061 0.8987670 0.10123295 0.7956412 0.2043588
## 26 0.02217467 0.9778253 0.8770113 0.12298871 0.6477620 0.3522380
## 28 0.03409561 0.9659044 0.8614346 0.13856539 0.7020682 0.2979318
## 29 0.01374107 0.9862589 0.8554391 0.14456091 0.6069858 0.3930142
## Class 5: 0 Class5: 1
## 5 0.9228503 0.07714967
## 14 0.3517208 0.64827916
## 16 0.5643600 0.43564001
## 26 0.6652175 0.33478249
## 28 0.4002016 0.59979844
## 29 0.6346555 0.36534451
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.06999499 0.36321541 0.2835848 0.07714967
## 14 0.01165801 0.06862882 0.2861642 0.64827916
## 16 0.01419389 0.10123295 0.2043588 0.43564001
## 26 0.02217467 0.12298871 0.3522380 0.33478249
## 28 0.03409561 0.13856539 0.2979318 0.59979844
## 29 0.01374107 0.14456091 0.3930142 0.36534451
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.069994992 0.36321541 0.2835848 0.07714967 3 4
## 14 0.011658015 0.06862882 0.2861642 0.64827916 5 5
## 16 0.014193890 0.10123295 0.2043588 0.43564001 5 5
## 26 0.022174671 0.12298871 0.3522380 0.33478249 4 4
## 28 0.034095607 0.13856539 0.2979318 0.59979844 5 4
## 29 0.013741067 0.14456091 0.3930142 0.36534451 4 4
## 39 0.204126478 0.16471406 0.3167844 0.10371014 4 5
## 40 0.006050409 0.31515901 0.5596942 0.31466461 4 3
## 60 0.010280800 0.04335960 0.2968872 0.52773411 5 5
## 61 0.196407201 0.21896918 0.4395709 0.17830099 4 3
## 72 0.007460476 0.07996050 0.3582897 0.63256615 5 4
## 81 0.019262784 0.10284807 0.3531934 0.42399661 5 3
## 86 0.019482843 0.13078752 0.2367841 0.34672830 5 5
## 90 0.048124938 0.14037571 0.2915932 0.34698593 5 4
## 92 0.028457834 0.12823037 0.3291250 0.19655180 4 4
## 113 0.037522489 0.06323598 0.3760399 0.36547054 4 5
## 116 0.035619455 0.13858182 0.2397979 0.26709700 5 4
## 117 0.026476547 0.10075506 0.2948418 0.33279109 5 5
## 122 0.056859608 0.14288887 0.3210111 0.14750743 4 4
## 123 0.017267073 0.08037583 0.3606677 0.37477311 5 2
## 124 0.012434948 0.12754823 0.3699227 0.27335178 4 4
## 131 0.009008990 0.12087409 0.3348890 0.42448373 5 4
## 135 0.409329577 0.19130556 0.3846603 0.06182095 2 3
## 137 0.009747096 0.07831564 0.3474767 0.31565793 4 5
## 140 0.017672244 0.15716387 0.3167418 0.30895274 4 4
## 142 0.018392604 0.07926924 0.3407340 0.54303571 5 5
## 149 0.022937001 0.06452194 0.3503481 0.61261292 5 4
## 154 0.035010012 0.10267228 0.2509320 0.54591845 5 5
## 156 0.134534895 0.07189891 0.4067825 0.22792561 4 3
## 158 0.402518015 0.21360700 0.5000000 0.01549377 4 3
## 169 0.015216907 0.07049969 0.2671130 0.62409863 5 5
## 185 0.014076176 0.10748574 0.2549915 0.49228426 5 5
## 187 0.007841698 0.08491622 0.3913813 0.52173337 5 5
## 192 0.019576265 0.15346085 0.5165931 0.14069945 4 3
## 194 0.026080537 0.21039953 0.3571186 0.36411130 5 4
## 195 0.027130399 0.15673692 0.2633267 0.45151401 5 4
## 196 0.213725086 0.12329714 0.4834687 0.12236951 4 5
## 197 0.429821415 0.09910777 0.3114916 0.05368269 2 3
## 199 0.007424905 0.10567049 0.2360916 0.77681454 5 5
## 210 0.133072970 0.15205098 0.3172857 0.16181556 4 3
## 216 0.018905481 0.06447273 0.1794497 0.79788484 5 5
## 220 0.017038119 0.21620228 0.3401354 0.24227891 4 4
## 227 0.446111574 0.02659893 0.5368909 0.08319245 4 5
## 234 0.021627610 0.13576278 0.4444042 0.34347769 4 3
## 240 0.027916781 0.08903561 0.3999866 0.36246483 4 5
## 245 0.031060530 0.10732669 0.3042077 0.51383341 5 4
## 249 0.022161313 0.13256756 0.3160356 0.48330000 5 5
## 261 0.023983218 0.13377398 0.3095487 0.47939074 5 3
## 277 0.012405668 0.07357440 0.2603720 0.82744624 5 5
## 283 0.015957736 0.11300873 0.3003923 0.45806827 5 5
## 290 0.014814624 0.08810028 0.2264873 0.79772816 5 4
## 293 0.014514729 0.09394313 0.3658231 0.39464802 5 5
## 302 0.013400040 0.14030055 0.3106059 0.27847473 4 4
## 305 0.024658965 0.09107746 0.3308436 0.59343570 5 4
## 308 0.035030755 0.11285099 0.2478932 0.56420178 5 4
## 311 0.013437465 0.08066596 0.2557949 0.63299476 5 5
## 320 0.019241492 0.09053756 0.2635032 0.73229005 5 2
## 322 0.020965903 0.06668342 0.2235069 0.80496536 5 5
## 330 0.014786803 0.07686905 0.2212658 0.79037539 5 4
## 332 0.019621549 0.11889274 0.3847489 0.34199680 4 4
## 333 0.020220775 0.08133113 0.2904181 0.76055312 5 5
## 339 0.018815538 0.10582934 0.2610308 0.60874737 5 5
## 341 0.020089461 0.10547993 0.3952009 0.42340598 5 4
## 344 0.022726903 0.07013328 0.3177385 0.72946175 5 5
## 349 0.014477726 0.08300157 0.1911309 0.77583570 5 5
## 355 0.016845404 0.06870197 0.2378757 0.84369222 5 5
## 356 0.030636227 0.08414457 0.2919971 0.63557004 5 3
## 365 0.013110195 0.14182714 0.2749469 0.48682680 5 3
## 366 0.012767098 0.11292928 0.2999308 0.40230982 5 4
## 369 0.010683395 0.09335571 0.3186219 0.35948457 5 4
## 371 0.015786528 0.09464426 0.2331255 0.71269014 5 5
## 373 0.015412129 0.08217783 0.3142281 0.67347461 5 5
## 389 0.032190206 0.09844358 0.3019977 0.53393844 5 2
## 390 0.030585098 0.12287148 0.2827149 0.57259883 5 4
## 396 0.016843121 0.07685740 0.3558283 0.48268151 5 4
## 412 0.007320686 0.10160173 0.3342611 0.51651296 5 5
## 413 0.031951379 0.10003945 0.3190404 0.15594497 4 3
## 415 0.012763777 0.10020763 0.3667104 0.50670098 5 4
## 422 0.119271539 0.09197578 0.4202823 0.39202640 4 5
## 425 0.017671245 0.07162814 0.2171302 0.88248206 5 5
## 434 0.018012424 0.05610614 0.3293198 0.28679035 4 5
## 438 0.008710180 0.08648518 0.2850454 0.70227998 5 4
## 441 0.268050936 0.06461175 0.4392119 0.08039005 4 5
## 442 0.028613155 0.08689779 0.2265606 0.71640132 5 5
## 445 0.018761855 0.09519705 0.4107271 0.31342205 4 5
## 447 0.046439840 0.14613217 0.3450755 0.12689532 4 3
## 453 0.024624374 0.17380595 0.4461621 0.43238962 4 4
## 454 0.059333167 0.11922567 0.3027591 0.09125061 4 5
## 462 0.006670241 0.06813814 0.2458830 0.70626101 5 5
## 474 0.033481223 0.09661368 0.3185650 0.50847595 5 3
## 476 0.017746754 0.13612634 0.2865060 0.44210539 5 3
## 493 0.010995404 0.10305047 0.2979367 0.52056037 5 5
## 502 0.012434657 0.18897850 0.4480706 0.33709141 4 4
## 503 0.079936868 0.03820556 0.3360126 0.45595297 5 5
## 506 0.018764855 0.10801290 0.2664851 0.63270568 5 5
## 508 0.017208107 0.06119975 0.3443292 0.76460171 5 5
## 512 0.028420355 0.09172148 0.2544290 0.69796150 5 5
## 513 0.014673908 0.12203173 0.2501608 0.42982699 5 5
## 521 0.061117952 0.08442360 0.2760865 0.66600252 5 2
## 524 0.028864234 0.11815615 0.2856029 0.31091367 5 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 1 6
## 3 2 0 11 12
## 4 1 1 21 39
## 5 0 0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5721154
Precision
## 2
## 0.5721154
Recall
## 2
## 0.4742733