PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/70")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TP feature set with a 70th percentile cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/70/Feature Set 1 70th TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:761){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  761 variables:
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ back         : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bacon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bad          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bag          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bang         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bank         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bar          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bare         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basement     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basic        : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ bath         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti       : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed          : num  0 0 0 1 0 0 1 0 0 1 ...
##  $ bedroom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beer         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ believ       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ big          : num  1 0 0 0 0 0 0 0 0 1 ...
##  $ bigger       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bike         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ birthday     : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ bit          : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ black        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ block        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ board        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bonus        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ book         : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ boutiqu      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ box          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bread        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ breakfast    : num  0 0 1 0 0 0 0 1 0 1 ...
##  $ bright       : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ brilliant    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ broken       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ brought      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ buffet       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ build        : num  0 0 0 1 1 0 0 0 0 0 ...
##  $ busi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ but          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ buy          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cafe         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ call         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ came         : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ can          : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ car          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ card         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ care         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carpet       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ case         : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use the probability as an input for the voting procedure. The class with the highest probability is chosen.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.050198745 0.9498013  0.7912596 0.20874038  0.6506548 0.3493452
## 14 0.006137467 0.9938625  0.9262031 0.07379692  0.6999023 0.3000977
## 16 0.011076573 0.9889234  0.9279990 0.07200104  0.7930228 0.2069772
## 26 0.053168843 0.9468312  0.9270521 0.07294790  0.6098518 0.3901482
## 28 0.050742577 0.9492574  0.8603031 0.13969686  0.6802491 0.3197509
## 29 0.021487153 0.9785128  0.8334554 0.16654460  0.5869645 0.4130355
##    Class 5: 0  Class5: 1
## 5   0.9192999 0.08070014
## 14  0.3888130 0.61118703
## 16  0.3739708 0.62602924
## 26  0.7076255 0.29237448
## 28  0.6362310 0.36376898
## 29  0.8813972 0.11860281
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4          5
## 5  0.050198745 0.20874038 0.3493452 0.08070014
## 14 0.006137467 0.07379692 0.3000977 0.61118703
## 16 0.011076573 0.07200104 0.2069772 0.62602924
## 26 0.053168843 0.07294790 0.3901482 0.29237448
## 28 0.050742577 0.13969686 0.3197509 0.36376898
## 29 0.021487153 0.16654460 0.4130355 0.11860281
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.050198745 0.20874038 0.3493452 0.08070014    4      4
## 14  0.006137467 0.07379692 0.3000977 0.61118703    5      5
## 16  0.011076573 0.07200104 0.2069772 0.62602924    5      5
## 26  0.053168843 0.07294790 0.3901482 0.29237448    4      4
## 28  0.050742577 0.13969686 0.3197509 0.36376898    5      4
## 29  0.021487153 0.16654460 0.4130355 0.11860281    4      4
## 39  0.168626076 0.07396024 0.3090875 0.12079657    4      5
## 40  0.014448429 0.20684732 0.6817530 0.05090089    4      3
## 60  0.010357915 0.04181923 0.2458085 0.77772015    5      5
## 61  0.031473386 0.27819943 0.3051827 0.31082314    5      3
## 72  0.003056225 0.08073608 0.3420918 0.59025396    5      4
## 81  0.011847641 0.10354846 0.3858845 0.32949379    4      3
## 86  0.028066354 0.13713380 0.2054629 0.63157921    5      5
## 90  0.081136390 0.17727033 0.3424094 0.11127708    4      4
## 92  0.029478535 0.15031042 0.3313006 0.13391321    4      4
## 113 0.064267643 0.07144724 0.3308216 0.28609695    4      5
## 116 0.030524084 0.13231247 0.2136585 0.39977453    5      4
## 117 0.028865129 0.07738160 0.3122476 0.37684643    5      5
## 122 0.043542123 0.14543686 0.3603615 0.07020024    4      4
## 123 0.018052530 0.08304918 0.3654577 0.24692148    4      2
## 124 0.027133870 0.18127525 0.3423944 0.06650750    4      4
## 131 0.004400150 0.11900219 0.3447399 0.56191308    5      4
## 135 0.159253483 0.20666579 0.4459900 0.04119318    4      3
## 137 0.002710185 0.06067866 0.3619807 0.50000000    5      5
## 140 0.015168784 0.16779732 0.3469577 0.21981968    4      4
## 142 0.011715036 0.07486885 0.3204670 0.49132878    5      5
## 149 0.015667191 0.06254969 0.3445750 0.51204164    5      4
## 154 0.035445885 0.13207655 0.2333623 0.34620470    5      5
## 156 0.150723556 0.07854195 0.3387122 0.18557229    4      3
## 158 0.396546257 0.22029512 0.4386574 0.02205837    4      3
## 169 0.006034343 0.07841304 0.2398375 0.75279896    5      5
## 185 0.008180010 0.13547951 0.1892871 0.52439115    5      5
## 187 0.003936755 0.06677733 0.4573341 0.33951549    4      5
## 192 0.025224635 0.18009246 0.5954588 0.02565033    4      3
## 194 0.021966644 0.18667631 0.4102105 0.18962034    4      4
## 195 0.014201540 0.25573718 0.2889290 0.22339302    4      4
## 196 0.135281025 0.14874912 0.4565876 0.03669117    4      5
## 197 0.368076412 0.18724325 0.2650021 0.05280078    2      3
## 199 0.005561729 0.08955217 0.2441305 0.84199173    5      5
## 210 0.107085853 0.16672663 0.3573707 0.02778787    4      3
## 216 0.016865921 0.06150811 0.1461455 0.91004898    5      5
## 220 0.012206240 0.26917141 0.3083345 0.10149769    4      4
## 227 0.122204139 0.04937165 0.5093372 0.08659952    4      5
## 234 0.029320159 0.11369993 0.4791119 0.19059053    4      3
## 240 0.019531896 0.07883463 0.4448295 0.19950609    4      5
## 245 0.048634574 0.11204877 0.3059943 0.30610136    5      4
## 249 0.015765564 0.13228289 0.3334327 0.38386579    5      5
## 261 0.026592191 0.18127966 0.3322931 0.31167848    4      3
## 277 0.009729815 0.07359786 0.2488677 0.91648487    5      5
## 283 0.013830882 0.09807656 0.2688636 0.68580928    5      5
## 290 0.009909005 0.07547284 0.2079586 0.88726743    5      4
## 293 0.013710398 0.07168669 0.3862232 0.21398310    4      5
## 302 0.008970155 0.12079481 0.2870276 0.38412595    5      4
## 305 0.020423727 0.09201249 0.3881990 0.50000000    5      4
## 308 0.027518619 0.14567372 0.2402644 0.35303899    5      4
## 311 0.008242433 0.07613708 0.2295368 0.80338534    5      5
## 320 0.015285975 0.08730620 0.2411326 0.79255875    5      2
## 322 0.022275895 0.07195280 0.2051618 0.89285073    5      5
## 330 0.013340034 0.07612833 0.1936597 0.84320721    5      4
## 332 0.031752461 0.11026949 0.4031462 0.17612703    4      4
## 333 0.022186863 0.08461833 0.2802216 0.79821162    5      5
## 339 0.009032803 0.09966750 0.2598537 0.67589206    5      5
## 341 0.021650891 0.10214553 0.4376644 0.17525968    4      4
## 344 0.026017606 0.07409970 0.3230614 0.73220414    5      5
## 349 0.009666410 0.06983383 0.1631300 0.91159963    5      5
## 355 0.012874843 0.05971465 0.2051916 0.93805389    5      5
## 356 0.030971367 0.08263984 0.2887584 0.59916964    5      3
## 365 0.012083859 0.14537126 0.2851206 0.36030776    5      3
## 366 0.007219614 0.12237835 0.3307716 0.47652923    5      4
## 369 0.007605116 0.08335856 0.3387505 0.42480819    5      4
## 371 0.010750562 0.08328880 0.2175404 0.85238561    5      5
## 373 0.013249449 0.07059719 0.3057463 0.78006846    5      5
## 389 0.048953483 0.10674804 0.2902172 0.38591078    5      2
## 390 0.034028726 0.13484410 0.2709804 0.41181560    5      4
## 396 0.011274034 0.06924667 0.3793796 0.36975263    4      4
## 412 0.005799538 0.07806326 0.4071543 0.31900546    4      5
## 413 0.007429899 0.10571032 0.3098268 0.30213440    4      3
## 415 0.007899690 0.13131778 0.3592732 0.40994287    5      4
## 422 0.019433376 0.13165150 0.3987179 0.19988167    4      5
## 425 0.017004081 0.05987807 0.1846781 0.95042828    5      5
## 434 0.014328643 0.05215296 0.3056037 0.48364281    5      5
## 438 0.004424970 0.08337654 0.2869318 0.72971015    5      4
## 441 0.061061644 0.17567664 0.3137047 0.14597876    4      5
## 442 0.026916043 0.07972993 0.1954781 0.71074065    5      5
## 445 0.009438971 0.09574750 0.4150343 0.46773924    5      5
## 447 0.034879376 0.14867734 0.2973834 0.09817384    4      3
## 453 0.039027959 0.16239374 0.4542491 0.18808054    4      4
## 454 0.122231150 0.09870721 0.2138307 0.13872214    4      5
## 462 0.005518763 0.05332820 0.2237572 0.83750875    5      5
## 474 0.047402821 0.08704512 0.3188089 0.41881481    5      3
## 476 0.011660994 0.16870266 0.3072160 0.28496362    4      3
## 493 0.008782093 0.12360494 0.2640248 0.45378642    5      5
## 502 0.015272914 0.21608205 0.4743104 0.15075794    4      4
## 503 0.153199095 0.04112550 0.2382604 0.20559390    4      5
## 506 0.016125125 0.13374001 0.2585934 0.59174023    5      5
## 508 0.013172311 0.06102079 0.3673410 0.77864044    5      5
## 512 0.023386386 0.09792667 0.2188649 0.69878722    5      5
## 513 0.021024112 0.12532958 0.1939403 0.37760604    5      5
## 521 0.203227542 0.09024426 0.2533483 0.48934188    5      2
## 524 0.054576764 0.11612636 0.2925148 0.28111587    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  4  5
##   2  0  3  4
##   3  1 17  7
##   4  0 29 33
##   5  0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(0,29,93)/sum(CM)

#Precision
Rows <- rowSums(CM)
Precision2 <- CM[1,1]/Rows[1]
Precision3 <- CM[3,2]/Rows[3]
Precision4 <- CM[4,3]/Rows[4]

Precision <- (Precision2*Length3+Precision3*Length4+Precision4*Length5)/208

#Recall
Col <- colSums(CM)
Recall2 <- CM[1,1]/Col[1]
Recall3 <- CM[3,2]/Col[2]
Recall4 <- CM[4,3]/Col[3]

Recall <- (Recall2*Length3+Recall3*Length4+Recall4*Length5)/208


Accuracy
## [1] 0.5865385
Precision
##         2 
## 0.5865385
Recall
##         2 
## 0.4955412