PREPARATION
setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/6.Feature Set 5/Combined")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
#Import Features
Features1 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/6.Feature Set 5/Directives/Feature Set 4 TP.csv")
Features1 <- Features1[-1]
#Import Features
Features2 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/6.Feature Set 5/Directives/Directives.csv")
Features2 <- Features2[4:10]
Features2 <- Features2[1:1000,]
#Import Features
Features3 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/6.Feature Set 5/Combined/Feature Set 1 10th TP.csv")
Features3 <- Features3[-1]
#Import Features
Features <- cbind(Features1, Features2,Features3)
RECODE LABELS FOR ONE-VS-ALL
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
PARTITIONING TRAINING & VALIDATION
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
Labels
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
SVM MODEL
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
VOTING
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.033267895 0.9667321 0.7701351 0.22986494 0.6295299 0.3704701
## 14 0.007046284 0.9929537 0.9364755 0.06352449 0.6510205 0.3489795
## 16 0.016207328 0.9837927 0.9127025 0.08729747 0.7642258 0.2357742
## 26 0.041707585 0.9582924 0.8810822 0.11891777 0.6779024 0.3220976
## 28 0.036899831 0.9631002 0.8742241 0.12577590 0.6866571 0.3133429
## 29 0.018580648 0.9814194 0.7803499 0.21965005 0.6167349 0.3832651
## Class 5: 0 Class5: 1
## 5 0.8940293 0.1059707
## 14 0.4799104 0.5200896
## 16 0.4841181 0.5158819
## 26 0.6564912 0.3435088
## 28 0.5360040 0.4639960
## 29 0.8977058 0.1022942
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.033267895 0.22986494 0.3704701 0.1059707
## 14 0.007046284 0.06352449 0.3489795 0.5200896
## 16 0.016207328 0.08729747 0.2357742 0.5158819
## 26 0.041707585 0.11891777 0.3220976 0.3435088
## 28 0.036899831 0.12577590 0.3133429 0.4639960
## 29 0.018580648 0.21965005 0.3832651 0.1022942
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.033267895 0.22986494 0.3704701 0.10597071 4 4
## 14 0.007046284 0.06352449 0.3489795 0.52008960 5 5
## 16 0.016207328 0.08729747 0.2357742 0.51588193 5 5
## 26 0.041707585 0.11891777 0.3220976 0.34350879 5 4
## 28 0.036899831 0.12577590 0.3133429 0.46399601 5 4
## 29 0.018580648 0.21965005 0.3832651 0.10229423 4 4
## 39 0.246838934 0.08845296 0.3175243 0.12997166 4 5
## 40 0.010268879 0.20974317 0.6167814 0.08010712 4 3
## 60 0.011278903 0.03912085 0.2639434 0.71118072 5 5
## 61 0.070475507 0.22956819 0.2465999 0.29065439 5 3
## 72 0.006857182 0.07570470 0.3385350 0.65842551 5 4
## 81 0.014425186 0.11430731 0.3464016 0.34075985 4 3
## 86 0.024253203 0.11833838 0.2545710 0.62535484 5 5
## 90 0.061166821 0.17082528 0.3337738 0.11782279 4 4
## 92 0.044421012 0.13340066 0.3417338 0.11161602 4 4
## 113 0.051200262 0.09493488 0.3791105 0.17917310 4 5
## 116 0.019995411 0.13544740 0.2921960 0.37570592 5 4
## 117 0.021538001 0.08407803 0.3293934 0.37427313 5 5
## 122 0.047749255 0.12163997 0.3625535 0.09073309 4 4
## 123 0.019392682 0.07285956 0.3362575 0.26998463 4 2
## 124 0.036105185 0.14872024 0.3427491 0.08240051 4 4
## 131 0.005335071 0.14116585 0.2944579 0.57225068 5 4
## 135 0.303322371 0.21197766 0.4937574 0.03698463 4 3
## 137 0.005111162 0.04288481 0.3550835 0.52499133 5 5
## 140 0.021478291 0.15836425 0.3428721 0.25044534 4 4
## 142 0.018254480 0.06647287 0.3362945 0.51958873 5 5
## 149 0.012492293 0.08375389 0.3580355 0.54222001 5 4
## 154 0.028386317 0.13429846 0.2410511 0.37366298 5 5
## 156 0.151573286 0.07603637 0.3645300 0.15707232 4 3
## 158 0.319035984 0.23345338 0.4798679 0.01683784 4 3
## 169 0.009131563 0.07501636 0.2134527 0.74638191 5 5
## 185 0.009450654 0.11474220 0.1827547 0.63966955 5 5
## 187 0.004517574 0.07284008 0.4569466 0.34127914 4 5
## 192 0.027066343 0.18446225 0.5289220 0.02686208 4 3
## 194 0.019310121 0.16619024 0.4244824 0.19946698 4 4
## 195 0.016855708 0.20779608 0.2655812 0.28199559 5 4
## 196 0.109511659 0.25942885 0.4331641 0.02816314 4 5
## 197 0.342122254 0.22371486 0.2433400 0.04633762 2 3
## 199 0.007653984 0.09376028 0.2349792 0.77432916 5 5
## 210 0.111575385 0.16087217 0.4233720 0.02720181 4 3
## 216 0.016789192 0.06508396 0.1522983 0.88511958 5 5
## 220 0.008316982 0.26279908 0.2934770 0.10769836 4 4
## 227 0.171272421 0.03492257 0.4367985 0.14752257 4 5
## 234 0.029753125 0.12533733 0.4205840 0.20080794 4 3
## 240 0.017561296 0.10676460 0.4167785 0.17927590 4 5
## 245 0.056410198 0.10966879 0.3184389 0.36872987 5 4
## 249 0.015571061 0.13469231 0.3092670 0.40734536 5 5
## 261 0.021687909 0.15491413 0.3320233 0.34147074 5 3
## 277 0.010178784 0.07224125 0.2486351 0.90428341 5 5
## 283 0.019030318 0.09150699 0.2717528 0.57882521 5 5
## 290 0.010486320 0.08234748 0.2100014 0.86576321 5 4
## 293 0.014024234 0.07610174 0.3805381 0.20379828 4 5
## 302 0.008800356 0.12830259 0.2875159 0.43734287 5 4
## 305 0.022944057 0.08428531 0.3553281 0.55901129 5 4
## 308 0.023274547 0.13440225 0.2440147 0.41424432 5 4
## 311 0.008430076 0.07618812 0.2263460 0.77887294 5 5
## 320 0.015575303 0.08864748 0.2482555 0.74196970 5 2
## 322 0.022630034 0.06498186 0.2338307 0.86512986 5 5
## 330 0.012147331 0.07517736 0.1982960 0.88604975 5 4
## 332 0.035183980 0.11406298 0.4279327 0.17691128 4 4
## 333 0.024206440 0.08671714 0.2771593 0.74225004 5 5
## 339 0.009955665 0.09437638 0.2711950 0.59187220 5 5
## 341 0.021726711 0.09104037 0.4686544 0.16496620 4 4
## 344 0.037699073 0.06158840 0.3314145 0.75340891 5 5
## 349 0.010473201 0.08310644 0.1864154 0.86622944 5 5
## 355 0.016233929 0.07260807 0.2098375 0.90332970 5 5
## 356 0.027201592 0.08754045 0.2913860 0.53318193 5 3
## 365 0.012354838 0.14301311 0.2808465 0.37115170 5 3
## 366 0.010238775 0.13207526 0.3151428 0.48098655 5 4
## 369 0.007218669 0.09298494 0.3180546 0.40438852 5 4
## 371 0.009940091 0.08717532 0.2200316 0.81636219 5 5
## 373 0.012996721 0.06732117 0.3129316 0.71406584 5 5
## 389 0.043171527 0.10342477 0.2828768 0.48954717 5 2
## 390 0.032886505 0.13440071 0.2671518 0.49349123 5 4
## 396 0.015539380 0.06797890 0.4271963 0.28700819 4 4
## 412 0.004229798 0.08014581 0.3889156 0.36856425 4 5
## 413 0.013426748 0.10687917 0.3743012 0.30231140 4 3
## 415 0.010074684 0.10446410 0.3531298 0.44234824 5 4
## 422 0.041032909 0.13035896 0.3502483 0.29688660 4 5
## 425 0.015739298 0.07321179 0.1985252 0.94092100 5 5
## 434 0.014392050 0.06003812 0.3305199 0.47406946 5 5
## 438 0.005963988 0.10348076 0.3086695 0.70839998 5 4
## 441 0.109989312 0.18248481 0.3232859 0.15799043 4 5
## 442 0.021882603 0.09538805 0.1898538 0.68039756 5 5
## 445 0.010633041 0.12165124 0.3924755 0.47376338 5 5
## 447 0.034267404 0.15383532 0.3552458 0.08185476 4 3
## 453 0.051574206 0.13353782 0.4164864 0.24030809 4 4
## 454 0.161531919 0.06494027 0.2455613 0.13845241 4 5
## 462 0.006099191 0.06192108 0.2159038 0.83165252 5 5
## 474 0.025462300 0.10113731 0.3062843 0.47938362 5 3
## 476 0.014820178 0.18021560 0.2776666 0.30909575 5 3
## 493 0.014266076 0.11671003 0.2441808 0.44636963 5 5
## 502 0.014527904 0.20653048 0.4144212 0.20241364 4 4
## 503 0.142664661 0.03579311 0.3110127 0.24726522 4 5
## 506 0.014939105 0.13888521 0.2676077 0.52156793 5 5
## 508 0.012183357 0.04868685 0.4002587 0.74539545 5 5
## 512 0.023506221 0.12145280 0.1988512 0.71591127 5 5
## 513 0.022295737 0.11032412 0.2035339 0.41953415 5 5
## 521 0.184631190 0.07426878 0.2746467 0.55262663 5 2
## 524 0.025289828 0.13825312 0.3234257 0.29291149 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 0 15 9
## 4 0 0 27 35
## 5 0 1 20 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5769231
Precision
## 2
## 0.5769231
Recall
## 2
## 0.4853141