PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/90")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TF feature set with a 90th percentile cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/90/Feature Set 1 90th Percentile.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO FACTOR VARIABLES

#Transform Integer to Factor
for(i in 1:264){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  264 variables:
##  $ access     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ also       : num  0 0 0 3 0 0 1 0 1 0 ...
##  $ although   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amsterdam  : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ area       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ around     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arriv      : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask        : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ avail      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ away       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ back       : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bad        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bar        : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bath       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti     : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed        : num  0 0 0 2 0 0 1 0 0 1 ...
##  $ bedroom    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ big        : num  2 0 0 0 0 0 0 0 0 2 ...
##  $ bit        : num  0 2 1 0 0 0 0 0 0 0 ...
##  $ book       : num  6 0 0 0 3 0 0 0 0 0 ...
##  $ breakfast  : num  0 0 1 0 0 0 0 1 0 1 ...
##  $ buffet     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ build      : num  0 0 0 2 1 0 0 0 0 0 ...
##  $ busi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ but        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ can        : num  3 1 0 0 0 0 0 0 0 0 ...
##  $ center     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ centr      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ central    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ chang      : num  2 0 0 0 0 0 2 0 0 0 ...
##  $ charg      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ check      : num  2 1 0 0 0 0 0 0 1 0 ...
##  $ choic      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ citi       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ clean      : num  0 0 0 2 0 0 1 0 1 0 ...
##  $ close      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ coff       : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ cold       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ come       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ comfi      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ comfort    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ complet    : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ condit     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ construct  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ conveni    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cool       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ couldn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ court      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ day        : num  3 0 0 1 0 0 0 0 0 0 ...
##  $ decor      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ definit    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ design     : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ desk       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ didn       : num  0 0 0 0 1 0 1 0 0 0 ...
##  $ differ     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ direct     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ dirti      : num  0 0 0 3 0 0 0 0 1 0 ...
##  $ don        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ door       : num  0 0 0 2 0 0 0 0 0 0 ...
##  $ doubl      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ drink      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ due        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ earl       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ easi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ english    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enough     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ especi     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ etc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ even       : num  2 1 0 0 0 0 0 0 1 1 ...
##  $ everi      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ everyth    : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ excel      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ expect     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expen      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ experi     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extra      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extrem     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ facil      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fantast    : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ far        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ feel       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ find       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ first      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ floor      : num  1 0 0 3 0 0 1 0 0 0 ...
##  $ food       : num  0 1 0 0 0 0 0 1 0 1 ...
##  $ free       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fresh      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ friend     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ front      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ garden     : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ get        : num  1 0 0 0 2 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for the voting procedure. Choose the class with the highest probability.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.113170438 0.8868296  0.7480734 0.25192659  0.7457255 0.2542745
## 14 0.009789896 0.9902101  0.9167770 0.08322297  0.6781690 0.3218310
## 16 0.014183265 0.9858167  0.8991730 0.10082698  0.7964142 0.2035858
## 26 0.012109176 0.9878908  0.8423571 0.15764290  0.5995988 0.4004012
## 28 0.031180622 0.9688194  0.8621126 0.13788744  0.6611635 0.3388365
## 29 0.010007803 0.9899922  0.8725593 0.12744072  0.5696900 0.4303100
##    Class 5: 0  Class5: 1
## 5   0.9245541 0.07544593
## 14  0.2875220 0.71247804
## 16  0.3435467 0.65645333
## 26  0.6086922 0.39130777
## 28  0.4586563 0.54134366
## 29  0.6652376 0.33476237
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4          5
## 5  0.113170438 0.25192659 0.2542745 0.07544593
## 14 0.009789896 0.08322297 0.3218310 0.71247804
## 16 0.014183265 0.10082698 0.2035858 0.65645333
## 26 0.012109176 0.15764290 0.4004012 0.39130777
## 28 0.031180622 0.13788744 0.3388365 0.54134366
## 29 0.010007803 0.12744072 0.4303100 0.33476237
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.113170438 0.25192659 0.2542745 0.07544593    4      4
## 14  0.009789896 0.08322297 0.3218310 0.71247804    5      5
## 16  0.014183265 0.10082698 0.2035858 0.65645333    5      5
## 26  0.012109176 0.15764290 0.4004012 0.39130777    4      4
## 28  0.031180622 0.13788744 0.3388365 0.54134366    5      4
## 29  0.010007803 0.12744072 0.4303100 0.33476237    4      4
## 39  0.241878932 0.16370742 0.2514697 0.33935773    5      5
## 40  0.008413410 0.22860597 0.4822876 0.28289385    4      3
## 60  0.020625701 0.05202337 0.2816641 0.75723124    5      5
## 61  0.148797129 0.20908105 0.4049671 0.32066732    4      3
## 72  0.009606560 0.09833616 0.3072906 0.67423842    5      4
## 81  0.016474651 0.13002225 0.3171480 0.47217314    5      3
## 86  0.020221420 0.10268421 0.2646464 0.43438540    5      5
## 90  0.062906031 0.13213743 0.2886418 0.30947576    5      4
## 92  0.024602670 0.13026500 0.3072023 0.21198057    4      4
## 113 0.031654863 0.09287719 0.3431116 0.50615358    5      5
## 116 0.042153127 0.10735871 0.2790056 0.31159952    5      4
## 117 0.028630537 0.10907168 0.3070955 0.27114281    4      5
## 122 0.068393950 0.12206120 0.3092223 0.11286104    4      4
## 123 0.011468180 0.08641007 0.3575354 0.34142198    4      2
## 124 0.015010984 0.10840569 0.3493938 0.29558940    4      4
## 131 0.009038744 0.13246864 0.3008578 0.46047391    5      4
## 135 0.452759356 0.21973388 0.3167478 0.12134530    2      3
## 137 0.011282924 0.06748215 0.3925076 0.36122185    4      5
## 140 0.020631395 0.12358237 0.3590458 0.30861344    4      4
## 142 0.024182118 0.07902405 0.3105592 0.57138189    5      5
## 149 0.030160163 0.07972370 0.3622406 0.60852716    5      4
## 154 0.024518130 0.13484791 0.2495132 0.57600593    5      5
## 156 0.080739620 0.09202794 0.4142885 0.28465041    4      3
## 158 0.433178373 0.14977337 0.4325710 0.07189432    2      3
## 169 0.012623473 0.07050800 0.2841396 0.71958150    5      5
## 185 0.015162617 0.09015942 0.2732526 0.60545167    5      5
## 187 0.007704957 0.10424796 0.3831986 0.47831657    5      5
## 192 0.018325512 0.15515682 0.4743232 0.10678654    4      3
## 194 0.020626050 0.12273361 0.3678547 0.35765882    4      4
## 195 0.033396145 0.12581853 0.2675053 0.43846311    5      4
## 196 0.129157939 0.10983130 0.4577573 0.11550438    4      5
## 197 0.381285291 0.11557996 0.3302205 0.06434016    2      3
## 199 0.006512442 0.08893339 0.2632540 0.86513243    5      5
## 210 0.145519938 0.14282460 0.3587123 0.08553419    4      3
## 216 0.022006846 0.07063608 0.1712971 0.91089538    5      5
## 220 0.017269804 0.26537101 0.3181311 0.21492135    4      4
## 227 0.494397574 0.04494076 0.4645560 0.18116299    2      5
## 234 0.022859216 0.13397300 0.4438000 0.29441186    4      3
## 240 0.033141908 0.09300550 0.4083179 0.30403149    4      5
## 245 0.038768787 0.12599070 0.2954860 0.44598502    5      4
## 249 0.025324578 0.15083695 0.2925160 0.44866005    5      5
## 261 0.022987774 0.11374161 0.3197474 0.44463004    5      3
## 277 0.013567376 0.08102328 0.2581137 0.89502496    5      5
## 283 0.013831409 0.10775923 0.3150257 0.50926936    5      5
## 290 0.016213489 0.08664562 0.2300812 0.84455978    5      4
## 293 0.017168325 0.08964044 0.3410672 0.34258825    5      5
## 302 0.010147233 0.12932714 0.3481176 0.26566368    4      4
## 305 0.027018739 0.10525669 0.3366461 0.55358451    5      4
## 308 0.042925792 0.13745095 0.2364864 0.47187317    5      4
## 311 0.013526588 0.07941031 0.2590556 0.73686889    5      5
## 320 0.022271543 0.09163739 0.2684940 0.75455200    5      2
## 322 0.028245289 0.07188773 0.2041613 0.87865609    5      5
## 330 0.020565693 0.08958694 0.2312469 0.81797459    5      4
## 332 0.021862063 0.11886413 0.3973145 0.23689155    4      4
## 333 0.021581336 0.09070330 0.2918220 0.78347610    5      5
## 339 0.016724466 0.10029994 0.2869407 0.64812160    5      5
## 341 0.027313866 0.11330489 0.4017760 0.28844971    4      4
## 344 0.026414252 0.06906322 0.3021777 0.75393508    5      5
## 349 0.015654505 0.07925264 0.1920992 0.86555628    5      5
## 355 0.020589640 0.07002848 0.2567093 0.89738862    5      5
## 356 0.026246080 0.10743559 0.2915719 0.65047992    5      3
## 365 0.013743625 0.14889443 0.2702726 0.43545542    5      3
## 366 0.014513083 0.12168403 0.2964476 0.40531340    5      4
## 369 0.010952813 0.08613811 0.3594500 0.31214071    4      4
## 371 0.018050049 0.09345333 0.2407280 0.78560197    5      5
## 373 0.018935662 0.08494225 0.2980909 0.75649622    5      5
## 389 0.033374281 0.10787750 0.3053841 0.45457241    5      2
## 390 0.037161486 0.12821473 0.2867049 0.45684491    5      4
## 396 0.018031342 0.07608178 0.3414594 0.46605596    5      4
## 412 0.005086065 0.11252881 0.3593600 0.55850997    5      5
## 413 0.047278862 0.08495538 0.3063810 0.24506359    4      3
## 415 0.015473537 0.11593695 0.3344147 0.52197022    5      4
## 422 0.056486658 0.13706301 0.3612162 0.53147341    5      5
## 425 0.020580467 0.07798217 0.2357031 0.93184994    5      5
## 434 0.013551128 0.06425978 0.3613202 0.35546680    4      5
## 438 0.014087369 0.09607202 0.2906828 0.75888035    5      4
## 441 0.329202120 0.07088744 0.4606228 0.21299261    4      5
## 442 0.024840436 0.08230166 0.2127657 0.80653158    5      5
## 445 0.017717562 0.10060897 0.3969824 0.36253722    4      5
## 447 0.046689793 0.12062728 0.4006074 0.13257374    4      3
## 453 0.018985587 0.15417332 0.4174495 0.36843391    4      4
## 454 0.079529953 0.11668334 0.3151638 0.14303737    4      5
## 462 0.008178182 0.05946554 0.2605300 0.83865248    5      5
## 474 0.037370727 0.09336980 0.3304422 0.52150907    5      3
## 476 0.020974643 0.12691815 0.3055403 0.40531458    5      3
## 493 0.012767754 0.10405709 0.3207434 0.52028446    5      5
## 502 0.019181168 0.18616267 0.4061576 0.28327543    4      4
## 503 0.084353560 0.05107822 0.3692054 0.58035082    5      5
## 506 0.014049317 0.08983796 0.2918080 0.68355532    5      5
## 508 0.017080589 0.06936369 0.3201356 0.83423243    5      5
## 512 0.043999105 0.09309036 0.2703433 0.73097937    5      5
## 513 0.012821053 0.14623361 0.2495338 0.44466364    5      5
## 521 0.067495545 0.06885453 0.3291777 0.69127087    5      2
## 524 0.030849169 0.10805180 0.2912233 0.29626207    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       2   4   5
##   2   0   2   5
##   3   3  10  12
##   4   1  27  34
##   5   1  12 101
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
## Warning in diag(CM)/rowSums(CM): longer object length is not a multiple of
## shorter object length
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.2115385
Precision
##         2 
## 0.2115385
Recall
##  2 
## NA