PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/Full")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TF feature set with no cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/Full/Feature Set 1 Full TF.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:2672){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  2672 variables:
##  $ abil          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abit          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abl           : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ abnorm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ about         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abov          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abrupt        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ absolut       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accent        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accid         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accomplish    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accur         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accustom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acess         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ach           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acomod        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ activ         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual        : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ adaptor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ add           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ addit         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjust        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ador          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advantag      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affair        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afford        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afraid        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ africa        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ afterdinn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afternoon     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afterward     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ago           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ agr           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ agreeabl      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ahead         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airless       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albeit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albert        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albrt         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alcohol       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aldo          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alittl        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allevi        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ almost        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ along         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alongsid      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alot          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ alright       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ also          : num  0 0 0 3 0 0 1 0 1 0 ...
##  $ altern        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz          : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ ambianc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ambienc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amen          : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ amend         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ america       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ american      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amongst       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amount        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ampl          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam     : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and           : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ angl          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ angri         : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ ann           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anna          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annex         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ announc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annoy         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ansterdam     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer        : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for the voting procedure. Pick the class with the highest probability.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.08694051 0.9130595  0.6716414 0.32835864  0.7026847 0.2973153
## 14 0.01239997 0.9876000  0.9290774 0.07092256  0.7065480 0.2934520
## 16 0.01455813 0.9854419  0.9008911 0.09910893  0.7892088 0.2107912
## 26 0.02507028 0.9749297  0.8787459 0.12125413  0.6625758 0.3374242
## 28 0.03310362 0.9668964  0.8702781 0.12972188  0.7012732 0.2987268
## 29 0.01575621 0.9842438  0.8512280 0.14877197  0.6034119 0.3965881
##    Class 5: 0  Class5: 1
## 5   0.9101134 0.08988664
## 14  0.4122357 0.58776433
## 16  0.6469491 0.35305089
## 26  0.6910642 0.30893580
## 28  0.3856022 0.61439780
## 29  0.6228235 0.37717645
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4          5
## 5  0.08694051 0.32835864 0.2973153 0.08988664
## 14 0.01239997 0.07092256 0.2934520 0.58776433
## 16 0.01455813 0.09910893 0.2107912 0.35305089
## 26 0.02507028 0.12125413 0.3374242 0.30893580
## 28 0.03310362 0.12972188 0.2987268 0.61439780
## 29 0.01575621 0.14877197 0.3965881 0.37717645
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4           5 Vote Actual
## 5   0.086940510 0.32835864 0.2973153 0.089886640    3      4
## 14  0.012399970 0.07092256 0.2934520 0.587764329    5      5
## 16  0.014558128 0.09910893 0.2107912 0.353050893    5      5
## 26  0.025070281 0.12125413 0.3374242 0.308935804    4      4
## 28  0.033103623 0.12972188 0.2987268 0.614397799    5      4
## 29  0.015756211 0.14877197 0.3965881 0.377176453    4      4
## 39  0.229577920 0.16188607 0.3365076 0.070392325    4      5
## 40  0.006462898 0.28751582 0.5480526 0.328179863    4      3
## 60  0.008230984 0.05432301 0.2961475 0.416567449    5      5
## 61  0.184641733 0.20687264 0.4283169 0.171486709    4      3
## 72  0.006955233 0.08662178 0.3397333 0.628878774    5      4
## 81  0.021967440 0.10694981 0.3545018 0.400447628    5      3
## 86  0.022193260 0.11800233 0.2534818 0.316853233    5      5
## 90  0.049979647 0.12530086 0.2912858 0.386943048    5      4
## 92  0.038145441 0.13137290 0.3315263 0.218643009    4      4
## 113 0.039622946 0.06408997 0.3773787 0.345534696    4      5
## 116 0.047629073 0.12894798 0.2443308 0.248334646    5      4
## 117 0.024678199 0.10293582 0.2974848 0.363529929    5      5
## 122 0.056726112 0.13778066 0.3273737 0.195587385    4      4
## 123 0.019385640 0.08366641 0.3599978 0.392848638    5      2
## 124 0.015835863 0.13724397 0.3812706 0.262330670    4      4
## 131 0.008895730 0.12067008 0.3124993 0.424995598    5      4
## 135 0.480209497 0.19751228 0.4235206 0.054498322    2      3
## 137 0.010515492 0.08079868 0.3408840 0.329560625    4      5
## 140 0.017300863 0.14991540 0.3170162 0.322268035    5      4
## 142 0.016913674 0.08866492 0.3375518 0.550234757    5      5
## 149 0.020757591 0.07748940 0.3540549 0.590361051    5      4
## 154 0.038783762 0.09941656 0.2662082 0.529432992    5      5
## 156 0.169452318 0.08488903 0.4192179 0.227250553    4      3
## 158 0.441887765 0.22358297 0.5216105 0.009696804    4      3
## 169 0.014469776 0.07602864 0.2753475 0.571332741    5      5
## 185 0.013404406 0.10353271 0.2596159 0.448573270    5      5
## 187 0.009219711 0.09088777 0.3900873 0.494876466    5      5
## 192 0.022464264 0.16566748 0.5000000 0.156175452    4      3
## 194 0.022063210 0.19418242 0.3499731 0.414161435    5      4
## 195 0.023750371 0.14466847 0.2680618 0.468575436    5      4
## 196 0.232491978 0.11855377 0.5000000 0.138135635    4      5
## 197 0.472399553 0.09880548 0.3328065 0.059703982    2      3
## 199 0.007654366 0.10911791 0.2410748 0.698183631    5      5
## 210 0.147413796 0.12377984 0.3427034 0.219939778    4      3
## 216 0.018994198 0.06856292 0.1866222 0.715783190    5      5
## 220 0.021649659 0.19566908 0.3592783 0.259511342    4      4
## 227 0.357119455 0.03526190 0.5816884 0.062628649    4      5
## 234 0.027754083 0.13447042 0.4412684 0.363747555    4      3
## 240 0.030323241 0.09155744 0.3921001 0.400391410    5      5
## 245 0.027392543 0.10759779 0.3105125 0.557092987    5      4
## 249 0.020734441 0.12212821 0.3171203 0.506975949    5      5
## 261 0.024145997 0.12520104 0.2996175 0.500000000    5      3
## 277 0.011877007 0.07841295 0.2587126 0.775997068    5      5
## 283 0.018420466 0.11021844 0.2982501 0.448538109    5      5
## 290 0.012460434 0.09322760 0.2281088 0.770084458    5      4
## 293 0.013830231 0.09505036 0.3710383 0.425448165    5      5
## 302 0.015121438 0.13928041 0.3141717 0.292235562    4      4
## 305 0.019234360 0.09734574 0.3281293 0.625300036    5      4
## 308 0.030657838 0.11238541 0.2498363 0.600419884    5      4
## 311 0.013145489 0.08362076 0.2585749 0.591920114    5      5
## 320 0.016054003 0.09157723 0.2630082 0.723294321    5      2
## 322 0.016601879 0.07039034 0.2271105 0.763962945    5      5
## 330 0.011921075 0.08095461 0.2256267 0.762629173    5      4
## 332 0.020989043 0.11548799 0.3829447 0.395042373    5      4
## 333 0.018042148 0.08504056 0.2862222 0.736490785    5      5
## 339 0.018100891 0.10109586 0.2607225 0.601873194    5      5
## 341 0.016799704 0.10680815 0.3900855 0.500000000    5      4
## 344 0.019489025 0.07528312 0.3155477 0.709811280    5      5
## 349 0.013953259 0.08618980 0.1970090 0.718833954    5      5
## 355 0.014296736 0.07330175 0.2341953 0.807365108    5      5
## 356 0.027833212 0.08826295 0.2925701 0.628094878    5      3
## 365 0.012661705 0.13149777 0.2759019 0.511220596    5      3
## 366 0.012420942 0.11322217 0.2949670 0.403925585    5      4
## 369 0.011137406 0.09286081 0.3145138 0.384247992    5      4
## 371 0.014988827 0.09578691 0.2341783 0.673419961    5      5
## 373 0.014130597 0.08341162 0.3137938 0.638028812    5      5
## 389 0.028076056 0.10248056 0.3007548 0.577544363    5      2
## 390 0.026812244 0.11918994 0.2817845 0.613877307    5      4
## 396 0.014207113 0.07812149 0.3531741 0.500000000    5      4
## 412 0.007350118 0.10005782 0.3286508 0.500000000    5      5
## 413 0.026380357 0.10671107 0.3116486 0.167452158    4      3
## 415 0.012073324 0.10029845 0.3518261 0.522715938    5      4
## 422 0.100700517 0.11044995 0.4136088 0.366112842    4      5
## 425 0.015237259 0.07305156 0.2206195 0.835599843    5      5
## 434 0.019640736 0.06100430 0.3281691 0.270978817    4      5
## 438 0.008168731 0.09181730 0.2869682 0.668249925    5      4
## 441 0.193542435 0.07112976 0.4410169 0.079094578    4      5
## 442 0.029374468 0.08842695 0.2304181 0.673073848    5      5
## 445 0.024002963 0.09182330 0.3942862 0.295252101    4      5
## 447 0.047923813 0.14725302 0.3661489 0.133170945    4      3
## 453 0.032303483 0.15687734 0.4357367 0.479469152    5      4
## 454 0.070222771 0.11879860 0.3110680 0.095840345    4      5
## 462 0.007026066 0.07478535 0.2426266 0.629084327    5      5
## 474 0.032198782 0.10473829 0.3184858 0.516561034    5      3
## 476 0.020955494 0.14029497 0.2813054 0.436854528    5      3
## 493 0.012407288 0.10506877 0.2847986 0.535930143    5      5
## 502 0.010846087 0.17290988 0.4541608 0.367603760    4      4
## 503 0.088846150 0.04728997 0.3449779 0.399114583    5      5
## 506 0.018093256 0.10471117 0.2722241 0.598231188    5      5
## 508 0.015200326 0.06916729 0.3361952 0.735128620    5      5
## 512 0.023200654 0.10212518 0.2614552 0.682321576    5      5
## 513 0.018535885 0.11137171 0.2589496 0.420982831    5      5
## 521 0.050932598 0.09094555 0.2786562 0.654316377    5      2
## 524 0.029487506 0.12121962 0.3007481 0.336774090    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       2   3   4   5
##   2   0   0   1   6
##   3   2   0  10  13
##   4   1   1  18  42
##   5   0   0  14 100
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5673077
Precision
##         2 
## 0.5673077
Recall
##         2 
## 0.4651968