PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/10")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TP feature set with a 10-30th percentile cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/10/Feature Set 1 10th TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO FACTOR VARIABLES

#Transform Integer to Factor
for(i in 1:1462){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  1462 variables:
##  $ abit         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ach          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ add          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ addit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afford       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afraid       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ afternoon    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ago          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircon       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit    : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albert       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ almost       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ along        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ altern       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ ambienc      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ american     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amount       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ angri        : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ anna         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anywh        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ apolog       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appal        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appar        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appeal       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appl         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appoint      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ approach     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apt          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ aren         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arrang       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ art          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ asid         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ asleep       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assist       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assum        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assur        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atm          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attend       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attitud      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avoid        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom       : num  0 0 0 0 0 1 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for the voting procedure.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.060719120 0.9392809  0.7713557 0.22864428  0.6275775 0.3724225
## 14 0.006332847 0.9936672  0.9334676 0.06653235  0.6964144 0.3035856
## 16 0.010132759 0.9898672  0.9264671 0.07353291  0.7902200 0.2097800
## 26 0.068531945 0.9314681  0.9188981 0.08110194  0.6331179 0.3668821
## 28 0.049043421 0.9509566  0.8624833 0.13751674  0.6835997 0.3164003
## 29 0.017917369 0.9820826  0.8052007 0.19479926  0.5810587 0.4189413
##    Class 5: 0  Class5: 1
## 5   0.9227214 0.07727862
## 14  0.3734984 0.62650156
## 16  0.3685950 0.63140500
## 26  0.7005314 0.29946859
## 28  0.6276657 0.37233427
## 29  0.8874327 0.11256734
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4          5
## 5  0.060719120 0.22864428 0.3724225 0.07727862
## 14 0.006332847 0.06653235 0.3035856 0.62650156
## 16 0.010132759 0.07353291 0.2097800 0.63140500
## 26 0.068531945 0.08110194 0.3668821 0.29946859
## 28 0.049043421 0.13751674 0.3164003 0.37233427
## 29 0.017917369 0.19479926 0.4189413 0.11256734
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.060719120 0.22864428 0.3724225 0.07727862    4      4
## 14  0.006332847 0.06653235 0.3035856 0.62650156    5      5
## 16  0.010132759 0.07353291 0.2097800 0.63140500    5      5
## 26  0.068531945 0.08110194 0.3668821 0.29946859    4      4
## 28  0.049043421 0.13751674 0.3164003 0.37233427    5      4
## 29  0.017917369 0.19479926 0.4189413 0.11256734    4      4
## 39  0.226342515 0.11433187 0.3456262 0.12051710    4      5
## 40  0.017749073 0.22209291 0.6678924 0.04949230    4      3
## 60  0.011693348 0.04159902 0.2620006 0.78093919    5      5
## 61  0.039318476 0.30737606 0.2975770 0.30612058    3      3
## 72  0.004469306 0.07829862 0.3293939 0.58174245    5      4
## 81  0.013485745 0.10062743 0.3875409 0.32235260    4      3
## 86  0.025173853 0.12467652 0.2208405 0.63129873    5      5
## 90  0.079688152 0.16764719 0.3396656 0.11109406    4      4
## 92  0.031358774 0.16839193 0.3209157 0.12988879    4      4
## 113 0.056425664 0.06324450 0.3457549 0.29536740    4      5
## 116 0.029047482 0.13562505 0.2164791 0.40184236    5      4
## 117 0.022722076 0.07828635 0.3128063 0.37591822    5      5
## 122 0.037941413 0.14613976 0.3616944 0.07257338    4      4
## 123 0.018243466 0.07643456 0.3629782 0.24827252    4      2
## 124 0.029313522 0.21166707 0.3455539 0.06301302    4      4
## 131 0.004269490 0.13263824 0.3115432 0.56026315    5      4
## 135 0.176813923 0.22504112 0.4932368 0.03614343    4      3
## 137 0.002941211 0.06051015 0.3510127 0.50000000    5      5
## 140 0.015115373 0.17076920 0.3372815 0.22389708    4      4
## 142 0.011290302 0.08260615 0.3117485 0.48748413    5      5
## 149 0.016651423 0.06776552 0.3599204 0.50603014    5      4
## 154 0.030418911 0.12882384 0.2485209 0.35029694    5      5
## 156 0.215798253 0.08881103 0.3515711 0.16750402    4      3
## 158 0.329360216 0.25657426 0.4573377 0.02103792    4      3
## 169 0.006054609 0.07864171 0.2433113 0.74806974    5      5
## 185 0.009340180 0.12911276 0.1942001 0.52752693    5      5
## 187 0.004402003 0.06726239 0.4684700 0.33651861    4      5
## 192 0.019783195 0.23533567 0.5614581 0.02531717    4      3
## 194 0.015901540 0.18783591 0.4144194 0.18765891    4      4
## 195 0.013964384 0.23640444 0.2897635 0.22537757    4      4
## 196 0.150094180 0.16763864 0.4920486 0.03508336    4      5
## 197 0.388348204 0.19290411 0.2737666 0.04996482    2      3
## 199 0.005280535 0.08621173 0.2431860 0.84620152    5      5
## 210 0.099445969 0.14197552 0.4122735 0.02521855    4      3
## 216 0.016903820 0.06187738 0.1493248 0.91139099    5      5
## 220 0.012341666 0.29484706 0.3191344 0.09537202    4      4
## 227 0.105123717 0.06048995 0.5375615 0.08843665    4      5
## 234 0.030660956 0.12861921 0.4659388 0.17999524    4      3
## 240 0.019570433 0.08063124 0.4573038 0.19815581    4      5
## 245 0.045430699 0.11332974 0.3114897 0.30907821    4      4
## 249 0.015296104 0.12311512 0.3354832 0.38327979    5      5
## 261 0.024834312 0.16498748 0.3173907 0.31444332    4      3
## 277 0.010302206 0.07226569 0.2444348 0.91516205    5      5
## 283 0.017201635 0.09453889 0.2629123 0.67669898    5      5
## 290 0.009293342 0.07145557 0.2066114 0.88659642    5      4
## 293 0.014709553 0.06701530 0.3947714 0.20800119    4      5
## 302 0.009047436 0.12155207 0.2816959 0.38434127    5      4
## 305 0.018408029 0.09372087 0.3776387 0.50000000    5      4
## 308 0.024151760 0.13814472 0.2419496 0.35903463    5      4
## 311 0.008198472 0.07020128 0.2309308 0.80538692    5      5
## 320 0.014644361 0.08135214 0.2396642 0.79198194    5      2
## 322 0.020698047 0.06717452 0.2061664 0.89324217    5      5
## 330 0.012660500 0.07264244 0.1976266 0.84578430    5      4
## 332 0.030270027 0.10719641 0.4060413 0.17998966    4      4
## 333 0.020029206 0.07946882 0.2748141 0.79696074    5      5
## 339 0.009308403 0.09490192 0.2581795 0.67981961    5      5
## 341 0.019927286 0.09791916 0.4259198 0.17985423    4      4
## 344 0.025086695 0.07010483 0.3190742 0.73149307    5      5
## 349 0.009698833 0.06714504 0.1653868 0.91087586    5      5
## 355 0.012440108 0.05720770 0.2025275 0.93677900    5      5
## 356 0.027905673 0.08601825 0.2882003 0.60221850    5      3
## 365 0.011277395 0.13375912 0.2824516 0.36689528    5      3
## 366 0.008325038 0.12266957 0.3121282 0.47491721    5      4
## 369 0.007398590 0.07433960 0.3222977 0.43403714    5      4
## 371 0.009651876 0.07981347 0.2168568 0.85122560    5      5
## 373 0.013285824 0.06913943 0.3076453 0.78181093    5      5
## 389 0.043015673 0.10230682 0.2857322 0.38967938    5      2
## 390 0.035177997 0.12293714 0.2685294 0.41095283    5      4
## 396 0.010892282 0.06387390 0.3729171 0.37706690    5      4
## 412 0.005900847 0.07849760 0.4010352 0.33587574    4      5
## 413 0.007814346 0.10614232 0.3326566 0.30850152    4      3
## 415 0.009471610 0.12050424 0.3331822 0.41111998    5      4
## 422 0.020155630 0.15441490 0.3997204 0.19224287    4      5
## 425 0.017927333 0.05878618 0.1900615 0.95016327    5      5
## 434 0.012675364 0.05299373 0.2955342 0.48590569    5      5
## 438 0.005002268 0.08441154 0.2819459 0.73110061    5      4
## 441 0.065923243 0.16877365 0.3577106 0.14407533    4      5
## 442 0.023377313 0.08165659 0.1998990 0.71289101    5      5
## 445 0.010033707 0.09518258 0.4023962 0.47480945    5      5
## 447 0.033344309 0.14750803 0.3416606 0.09594671    4      3
## 453 0.083893816 0.12859772 0.4604940 0.17039171    4      4
## 454 0.122788265 0.10677410 0.2331023 0.13939656    4      5
## 462 0.006101032 0.05596089 0.2173711 0.83974507    5      5
## 474 0.040958983 0.10150962 0.3405297 0.38847884    5      3
## 476 0.011657052 0.16420354 0.2909361 0.28074862    4      3
## 493 0.008911849 0.12240648 0.2592187 0.46251680    5      5
## 502 0.016337894 0.21298737 0.4876286 0.15275984    4      4
## 503 0.183244099 0.04913485 0.2509922 0.20226163    4      5
## 506 0.015934542 0.12677168 0.2612792 0.60460471    5      5
## 508 0.011700285 0.05994788 0.3713655 0.78560140    5      5
## 512 0.020163185 0.10692991 0.2173027 0.69767460    5      5
## 513 0.022383389 0.12166685 0.1966970 0.38040046    5      5
## 521 0.177948288 0.09604614 0.2630601 0.50000000    5      2
## 524 0.044141182 0.12202923 0.3091491 0.27538217    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  3  4
##   3  1  1 17  6
##   4  0  0 30 32
##   5  0  0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5961538
Precision
##         2 
## 0.5961538
Recall
##         2 
## 0.6237044