PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/70")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/70/Feature Set 1 70th Percentile.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO FACTOR VARIABLES

#Transform Integer to Factor
for(i in 1:784){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  784 variables:
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 3 0 0 1 0 1 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ back         : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bacon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bad          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bag          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bang         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bank         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bar          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bare         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basement     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basic        : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ bath         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti       : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed          : num  0 0 0 2 0 0 1 0 0 1 ...
##  $ bedroom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beer         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ believ       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ big          : num  2 0 0 0 0 0 0 0 0 2 ...
##  $ bigger       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bike         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ birthday     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ bit          : num  0 2 1 0 0 0 0 0 0 0 ...
##  $ black        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ block        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ board        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ boiler       : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ bonus        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ book         : num  6 0 0 0 3 0 0 0 0 0 ...
##  $ boutiqu      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ box          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bread        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ breakfast    : num  0 0 1 0 0 0 0 1 0 1 ...
##  $ bright       : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ brilliant    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ broken       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ brought      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ buffet       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ build        : num  0 0 0 2 1 0 0 0 0 0 ...
##  $ busi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ but          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ buy          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cab          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cafe         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ call         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ came         : num  1 0 0 2 0 0 0 0 0 0 ...
##  $ can          : num  3 1 0 0 0 0 0 0 0 0 ...
##  $ car          : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.06999499 0.9300050  0.6367846 0.36321541  0.7164152 0.2835848
## 14 0.01165801 0.9883420  0.9313712 0.06862882  0.7138358 0.2861642
## 16 0.01419389 0.9858061  0.8987670 0.10123295  0.7956412 0.2043588
## 26 0.02217467 0.9778253  0.8770113 0.12298871  0.6477620 0.3522380
## 28 0.03409561 0.9659044  0.8614346 0.13856539  0.7020682 0.2979318
## 29 0.01374107 0.9862589  0.8554391 0.14456091  0.6069858 0.3930142
##    Class 5: 0  Class5: 1
## 5   0.9228503 0.07714967
## 14  0.3517208 0.64827916
## 16  0.5643600 0.43564001
## 26  0.6652175 0.33478249
## 28  0.4002016 0.59979844
## 29  0.6346555 0.36534451
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4          5
## 5  0.06999499 0.36321541 0.2835848 0.07714967
## 14 0.01165801 0.06862882 0.2861642 0.64827916
## 16 0.01419389 0.10123295 0.2043588 0.43564001
## 26 0.02217467 0.12298871 0.3522380 0.33478249
## 28 0.03409561 0.13856539 0.2979318 0.59979844
## 29 0.01374107 0.14456091 0.3930142 0.36534451
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.069994992 0.36321541 0.2835848 0.07714967    3      4
## 14  0.011658015 0.06862882 0.2861642 0.64827916    5      5
## 16  0.014193890 0.10123295 0.2043588 0.43564001    5      5
## 26  0.022174671 0.12298871 0.3522380 0.33478249    4      4
## 28  0.034095607 0.13856539 0.2979318 0.59979844    5      4
## 29  0.013741067 0.14456091 0.3930142 0.36534451    4      4
## 39  0.204126478 0.16471406 0.3167844 0.10371014    4      5
## 40  0.006050409 0.31515901 0.5596942 0.31466461    4      3
## 60  0.010280800 0.04335960 0.2968872 0.52773411    5      5
## 61  0.196407201 0.21896918 0.4395709 0.17830099    4      3
## 72  0.007460476 0.07996050 0.3582897 0.63256615    5      4
## 81  0.019262784 0.10284807 0.3531934 0.42399661    5      3
## 86  0.019482843 0.13078752 0.2367841 0.34672830    5      5
## 90  0.048124938 0.14037571 0.2915932 0.34698593    5      4
## 92  0.028457834 0.12823037 0.3291250 0.19655180    4      4
## 113 0.037522489 0.06323598 0.3760399 0.36547054    4      5
## 116 0.035619455 0.13858182 0.2397979 0.26709700    5      4
## 117 0.026476547 0.10075506 0.2948418 0.33279109    5      5
## 122 0.056859608 0.14288887 0.3210111 0.14750743    4      4
## 123 0.017267073 0.08037583 0.3606677 0.37477311    5      2
## 124 0.012434948 0.12754823 0.3699227 0.27335178    4      4
## 131 0.009008990 0.12087409 0.3348890 0.42448373    5      4
## 135 0.409329577 0.19130556 0.3846603 0.06182095    2      3
## 137 0.009747096 0.07831564 0.3474767 0.31565793    4      5
## 140 0.017672244 0.15716387 0.3167418 0.30895274    4      4
## 142 0.018392604 0.07926924 0.3407340 0.54303571    5      5
## 149 0.022937001 0.06452194 0.3503481 0.61261292    5      4
## 154 0.035010012 0.10267228 0.2509320 0.54591845    5      5
## 156 0.134534895 0.07189891 0.4067825 0.22792561    4      3
## 158 0.402518015 0.21360700 0.5000000 0.01549377    4      3
## 169 0.015216907 0.07049969 0.2671130 0.62409863    5      5
## 185 0.014076176 0.10748574 0.2549915 0.49228426    5      5
## 187 0.007841698 0.08491622 0.3913813 0.52173337    5      5
## 192 0.019576265 0.15346085 0.5165931 0.14069945    4      3
## 194 0.026080537 0.21039953 0.3571186 0.36411130    5      4
## 195 0.027130399 0.15673692 0.2633267 0.45151401    5      4
## 196 0.213725086 0.12329714 0.4834687 0.12236951    4      5
## 197 0.429821415 0.09910777 0.3114916 0.05368269    2      3
## 199 0.007424905 0.10567049 0.2360916 0.77681454    5      5
## 210 0.133072970 0.15205098 0.3172857 0.16181556    4      3
## 216 0.018905481 0.06447273 0.1794497 0.79788484    5      5
## 220 0.017038119 0.21620228 0.3401354 0.24227891    4      4
## 227 0.446111574 0.02659893 0.5368909 0.08319245    4      5
## 234 0.021627610 0.13576278 0.4444042 0.34347769    4      3
## 240 0.027916781 0.08903561 0.3999866 0.36246483    4      5
## 245 0.031060530 0.10732669 0.3042077 0.51383341    5      4
## 249 0.022161313 0.13256756 0.3160356 0.48330000    5      5
## 261 0.023983218 0.13377398 0.3095487 0.47939074    5      3
## 277 0.012405668 0.07357440 0.2603720 0.82744624    5      5
## 283 0.015957736 0.11300873 0.3003923 0.45806827    5      5
## 290 0.014814624 0.08810028 0.2264873 0.79772816    5      4
## 293 0.014514729 0.09394313 0.3658231 0.39464802    5      5
## 302 0.013400040 0.14030055 0.3106059 0.27847473    4      4
## 305 0.024658965 0.09107746 0.3308436 0.59343570    5      4
## 308 0.035030755 0.11285099 0.2478932 0.56420178    5      4
## 311 0.013437465 0.08066596 0.2557949 0.63299476    5      5
## 320 0.019241492 0.09053756 0.2635032 0.73229005    5      2
## 322 0.020965903 0.06668342 0.2235069 0.80496536    5      5
## 330 0.014786803 0.07686905 0.2212658 0.79037539    5      4
## 332 0.019621549 0.11889274 0.3847489 0.34199680    4      4
## 333 0.020220775 0.08133113 0.2904181 0.76055312    5      5
## 339 0.018815538 0.10582934 0.2610308 0.60874737    5      5
## 341 0.020089461 0.10547993 0.3952009 0.42340598    5      4
## 344 0.022726903 0.07013328 0.3177385 0.72946175    5      5
## 349 0.014477726 0.08300157 0.1911309 0.77583570    5      5
## 355 0.016845404 0.06870197 0.2378757 0.84369222    5      5
## 356 0.030636227 0.08414457 0.2919971 0.63557004    5      3
## 365 0.013110195 0.14182714 0.2749469 0.48682680    5      3
## 366 0.012767098 0.11292928 0.2999308 0.40230982    5      4
## 369 0.010683395 0.09335571 0.3186219 0.35948457    5      4
## 371 0.015786528 0.09464426 0.2331255 0.71269014    5      5
## 373 0.015412129 0.08217783 0.3142281 0.67347461    5      5
## 389 0.032190206 0.09844358 0.3019977 0.53393844    5      2
## 390 0.030585098 0.12287148 0.2827149 0.57259883    5      4
## 396 0.016843121 0.07685740 0.3558283 0.48268151    5      4
## 412 0.007320686 0.10160173 0.3342611 0.51651296    5      5
## 413 0.031951379 0.10003945 0.3190404 0.15594497    4      3
## 415 0.012763777 0.10020763 0.3667104 0.50670098    5      4
## 422 0.119271539 0.09197578 0.4202823 0.39202640    4      5
## 425 0.017671245 0.07162814 0.2171302 0.88248206    5      5
## 434 0.018012424 0.05610614 0.3293198 0.28679035    4      5
## 438 0.008710180 0.08648518 0.2850454 0.70227998    5      4
## 441 0.268050936 0.06461175 0.4392119 0.08039005    4      5
## 442 0.028613155 0.08689779 0.2265606 0.71640132    5      5
## 445 0.018761855 0.09519705 0.4107271 0.31342205    4      5
## 447 0.046439840 0.14613217 0.3450755 0.12689532    4      3
## 453 0.024624374 0.17380595 0.4461621 0.43238962    4      4
## 454 0.059333167 0.11922567 0.3027591 0.09125061    4      5
## 462 0.006670241 0.06813814 0.2458830 0.70626101    5      5
## 474 0.033481223 0.09661368 0.3185650 0.50847595    5      3
## 476 0.017746754 0.13612634 0.2865060 0.44210539    5      3
## 493 0.010995404 0.10305047 0.2979367 0.52056037    5      5
## 502 0.012434657 0.18897850 0.4480706 0.33709141    4      4
## 503 0.079936868 0.03820556 0.3360126 0.45595297    5      5
## 506 0.018764855 0.10801290 0.2664851 0.63270568    5      5
## 508 0.017208107 0.06119975 0.3443292 0.76460171    5      5
## 512 0.028420355 0.09172148 0.2544290 0.69796150    5      5
## 513 0.014673908 0.12203173 0.2501608 0.42982699    5      5
## 521 0.061117952 0.08442360 0.2760865 0.66600252    5      2
## 524 0.028864234 0.11815615 0.2856029 0.31091367    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  1  6
##   3  2  0 11 12
##   4  1  1 21 39
##   5  0  0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5721154
Precision
##         2 
## 0.5721154
Recall
##         2 
## 0.4742733