PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/90")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TP feature set with a 90th percentile cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/90/Feature Set 1 90th TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:262){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  262 variables:
##  $ access     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ also       : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ although   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amsterdam  : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ area       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ around     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arriv      : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask        : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ avail      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ away       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ back       : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bad        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bar        : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bath       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti     : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed        : num  0 0 0 1 0 0 1 0 0 1 ...
##  $ bedroom    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ big        : num  1 0 0 0 0 0 0 0 0 1 ...
##  $ bit        : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ book       : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ breakfast  : num  0 0 1 0 0 0 0 1 0 1 ...
##  $ buffet     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ build      : num  0 0 0 1 1 0 0 0 0 0 ...
##  $ busi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ but        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ can        : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ center     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ centr      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ central    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ chang      : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ charg      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ check      : num  1 1 0 0 0 0 0 0 1 0 ...
##  $ choic      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ citi       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ clean      : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ close      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ coff       : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ cold       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ come       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ comfi      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ comfort    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ complet    : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ condit     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ construct  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ conveni    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cool       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ couldn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ court      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ day        : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ decor      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ definit    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ design     : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ desk       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ didn       : num  0 0 0 0 1 0 1 0 0 0 ...
##  $ differ     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ direct     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ don        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ door       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ doubl      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ drink      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ due        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ earl       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ easi       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ english    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enjoy      : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ enough     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ especi     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ etc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ even       : num  1 1 0 0 0 0 0 0 1 1 ...
##  $ everi      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ everyth    : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ excel      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ except     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expect     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expen      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ experi     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extra      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extrem     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ facil      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fantast    : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ far        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ feel       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ find       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ first      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ floor      : num  1 0 0 1 0 0 1 0 0 0 ...
##  $ food       : num  0 1 0 0 0 0 0 1 0 1 ...
##  $ free       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fresh      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ friend     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ front      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ garden     : num  1 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for the voting procedure. The class with the highest probability is chosen.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.064345578 0.9356544  0.8824737 0.11752628  0.7478428 0.2521572
## 14 0.003140714 0.9968593  0.9067159 0.09328414  0.6059613 0.3940387
## 16 0.011047721 0.9889523  0.9147052 0.08529480  0.7694530 0.2305470
## 26 0.019067343 0.9809327  0.8693469 0.13065312  0.5985427 0.4014573
## 28 0.050646282 0.9493537  0.8605175 0.13948247  0.6567651 0.3432349
## 29 0.007679017 0.9923210  0.8491336 0.15086641  0.5143996 0.4856004
##    Class 5: 0 Class5: 1
## 5   0.8217617 0.1782383
## 14  0.3497058 0.6502942
## 16  0.2345536 0.7654464
## 26  0.5807305 0.4192695
## 28  0.5852482 0.4147518
## 29  0.8479913 0.1520087
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4         5
## 5  0.064345578 0.11752628 0.2521572 0.1782383
## 14 0.003140714 0.09328414 0.3940387 0.6502942
## 16 0.011047721 0.08529480 0.2305470 0.7654464
## 26 0.019067343 0.13065312 0.4014573 0.4192695
## 28 0.050646282 0.13948247 0.3432349 0.4147518
## 29 0.007679017 0.15086641 0.4856004 0.1520087
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##                2          3         4          5 Vote Actual
## 5   6.434558e-02 0.11752628 0.2521572 0.17823829    4      4
## 14  3.140714e-03 0.09328414 0.3940387 0.65029423    5      5
## 16  1.104772e-02 0.08529480 0.2305470 0.76544642    5      5
## 26  1.906734e-02 0.13065312 0.4014573 0.41926951    5      4
## 28  5.064628e-02 0.13948247 0.3432349 0.41475184    5      4
## 29  7.679017e-03 0.15086641 0.4856004 0.15200872    4      4
## 39  3.143766e-01 0.10624041 0.1843009 0.36686858    5      5
## 40  1.394859e-02 0.10914323 0.5797749 0.13861024    4      3
## 60  1.095387e-02 0.05171991 0.2437510 0.88509316    5      5
## 61  2.905198e-02 0.17082158 0.2728018 0.55248164    5      3
## 72  6.894445e-06 0.11041464 0.3082627 0.66388564    5      4
## 81  5.049476e-03 0.12709565 0.3438308 0.44249915    5      3
## 86  6.373419e-02 0.11663115 0.2640885 0.68405481    5      5
## 90  1.329941e-01 0.13455773 0.3276392 0.17031717    4      4
## 92  4.023990e-02 0.11258798 0.3407654 0.20589777    4      4
## 113 1.034119e-01 0.11530881 0.2952860 0.49180861    5      5
## 116 3.084220e-02 0.10590475 0.2668665 0.50000000    5      4
## 117 2.614230e-02 0.09476127 0.3351825 0.34843488    5      5
## 122 5.201759e-02 0.11894136 0.3385569 0.09462056    4      4
## 123 1.219155e-02 0.09563621 0.3474495 0.26415009    4      2
## 124 3.900060e-02 0.12276789 0.3533404 0.10816492    4      4
## 131 3.716960e-03 0.14622326 0.2734960 0.63056942    5      4
## 135 1.843947e-01 0.16108118 0.3525966 0.11057667    4      3
## 137 2.879472e-03 0.06591619 0.3895492 0.51954155    5      5
## 140 8.129223e-03 0.12141892 0.3935291 0.24600361    4      4
## 142 1.316361e-02 0.08724861 0.2805907 0.56726149    5      5
## 149 8.862847e-03 0.10356001 0.3508577 0.54595382    5      4
## 154 2.506550e-02 0.16517912 0.2250569 0.38857288    5      5
## 156 3.867297e-02 0.10739135 0.2959658 0.33238097    5      3
## 158 6.495675e-01 0.11681685 0.3880595 0.11792258    2      3
## 169 2.911855e-03 0.09841891 0.2224985 0.80030930    5      5
## 185 6.176478e-03 0.10892171 0.2172351 0.62406916    5      5
## 187 3.355239e-03 0.09790061 0.4650272 0.34700973    4      5
## 192 2.615147e-02 0.13008427 0.5568990 0.03821665    4      3
## 194 2.013992e-02 0.12510830 0.4286132 0.24193512    4      4
## 195 1.414909e-02 0.14887441 0.2923213 0.24660259    4      4
## 196 1.000022e-01 0.14211186 0.3923096 0.06986487    4      5
## 197 7.677740e-02 0.16776507 0.2416225 0.08935651    4      3
## 199 8.269897e-06 0.09295268 0.2870651 0.90346058    5      5
## 210 1.474080e-01 0.15381881 0.4213175 0.03284862    4      3
## 216 1.969575e-02 0.08161874 0.1403844 0.94722109    5      5
## 220 1.396573e-02 0.24489623 0.2842126 0.14003148    4      4
## 227 1.752587e-01 0.06167314 0.4398098 0.17356627    4      5
## 234 4.070017e-02 0.11956006 0.4619118 0.21754467    4      3
## 240 3.006334e-02 0.09350269 0.4343373 0.23347643    4      5
## 245 7.573167e-02 0.12694671 0.3135291 0.32239918    5      4
## 249 1.464371e-02 0.14163176 0.3061407 0.40469376    5      5
## 261 2.601235e-02 0.12091497 0.3411455 0.35523177    5      3
## 277 7.665763e-03 0.08815058 0.2487138 0.93732754    5      5
## 283 9.536890e-03 0.10280992 0.2875084 0.72243735    5      5
## 290 6.074725e-03 0.09039409 0.2147304 0.90230944    5      4
## 293 1.417258e-02 0.09211249 0.3416283 0.23891072    4      5
## 302 5.827514e-03 0.12726795 0.3075887 0.39753419    5      4
## 305 1.683449e-02 0.10902146 0.3913637 0.51949656    5      4
## 308 2.554649e-02 0.14778721 0.2229979 0.34842207    5      4
## 311 7.662605e-03 0.08679717 0.2473303 0.83505517    5      5
## 320 9.938439e-03 0.09937463 0.2552621 0.80551473    5      2
## 322 1.577391e-02 0.08202979 0.1842364 0.92055182    5      5
## 330 1.238635e-02 0.10288393 0.2092315 0.84773179    5      4
## 332 5.826713e-02 0.11134454 0.4241504 0.16970175    4      4
## 333 1.868893e-02 0.09863607 0.2898077 0.82328639    5      5
## 339 6.146743e-03 0.10102642 0.2928187 0.69963390    5      5
## 341 3.023034e-02 0.10555934 0.4653307 0.16461911    4      4
## 344 2.909234e-02 0.07930089 0.2970706 0.77080424    5      5
## 349 7.122446e-03 0.08665113 0.1602857 0.92872943    5      5
## 355 9.334903e-03 0.07264405 0.2303190 0.94986307    5      5
## 356 1.815696e-02 0.10093731 0.2890656 0.67908958    5      3
## 365 1.079369e-02 0.14358711 0.2940487 0.35424798    5      3
## 366 5.178081e-03 0.13157487 0.3353260 0.49062049    5      4
## 369 5.546381e-03 0.08892608 0.4041705 0.42344980    5      4
## 371 8.071242e-03 0.10077718 0.2208245 0.87411148    5      5
## 373 1.613584e-02 0.08775244 0.2733257 0.81306648    5      5
## 389 3.509779e-02 0.11018153 0.3173099 0.37742754    5      2
## 390 3.512564e-02 0.13640850 0.2824723 0.36525984    5      4
## 396 7.293403e-03 0.08077494 0.3725835 0.36968643    4      4
## 412 3.993373e-03 0.10900422 0.4473064 0.40164896    4      5
## 413 7.651270e-03 0.08257739 0.3483247 0.42963666    5      3
## 415 8.432408e-03 0.12344330 0.3577052 0.49052940    5      4
## 422 1.685935e-02 0.12452610 0.3632660 0.40816459    5      5
## 425 1.265866e-02 0.08873525 0.2162444 0.96093354    5      5
## 434 7.660618e-03 0.07996161 0.2915099 0.54259669    5      5
## 438 4.472700e-03 0.10493778 0.2865419 0.77416456    5      4
## 441 3.969360e-02 0.10216197 0.3305820 0.33414174    5      5
## 442 1.089806e-02 0.08729440 0.1819261 0.81368704    5      5
## 445 1.228262e-02 0.09167825 0.3869560 0.54605313    5      5
## 447 2.023141e-02 0.12180284 0.4244067 0.14463667    4      3
## 453 4.451156e-02 0.13824693 0.3767113 0.28730308    4      4
## 454 2.169805e-01 0.09338169 0.2158372 0.23618541    5      5
## 462 4.982413e-03 0.06375749 0.2403289 0.88722064    5      5
## 474 6.214756e-02 0.07769308 0.3276354 0.46186681    5      3
## 476 1.327708e-02 0.13749798 0.3136843 0.31291309    4      3
## 493 1.463813e-02 0.12535188 0.2550065 0.52119612    5      5
## 502 4.336261e-02 0.21443593 0.4649346 0.19465123    4      4
## 503 2.180776e-02 0.05726446 0.2955252 0.42344585    5      5
## 506 7.924923e-03 0.09942034 0.2977777 0.65799909    5      5
## 508 9.584197e-03 0.07548826 0.3352238 0.84463529    5      5
## 512 2.768218e-02 0.09998153 0.2271349 0.72925542    5      5
## 513 5.601449e-02 0.12231975 0.2165934 0.44299130    5      5
## 521 2.075592e-01 0.08213464 0.3211687 0.56738059    5      2
## 524 3.742517e-02 0.11903718 0.3198005 0.30522678    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       2   4   5
##   2   0   2   5
##   3   1  14  10
##   4   0  29  33
##   5   0  14 100
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(0,29,100)/sum(CM)

#Precision
Rows <- rowSums(CM)
Precision2 <- CM[1,1]/Rows[1]
Precision3 <- CM[3,2]/Rows[3]
Precision4 <- CM[4,3]/Rows[4]

Precision <- (Precision2*Length3+Precision3*Length4+Precision4*Length5)/208

#Recall
Col <- colSums(CM)
Recall2 <- CM[1,1]/Col[1]
Recall3 <- CM[3,2]/Col[2]
Recall4 <- CM[4,3]/Col[3]

Recall <- (Recall2*Length3+Recall3*Length4+Recall4*Length5)/208


Accuracy
## [1] 0.6201923
Precision
##         2 
## 0.6201923
Recall
##         2 
## 0.5168346