PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/Full")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import Feature Set “Full” - the feature set with no cut-off.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/Full/Feature Set 1 Full TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:2672){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  2672 variables:
##  $ abil          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abit          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abl           : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ abnorm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ about         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abov          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abrupt        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ absolut       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accent        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accid         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accomplish    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accur         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accustom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acess         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ach           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acomod        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ activ         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual        : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ adaptor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ add           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ addit         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjust        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ador          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advantag      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affair        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afford        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afraid        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ africa        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ afterdinn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afternoon     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afterward     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ago           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ agr           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ agreeabl      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ahead         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airless       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albeit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albert        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albrt         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alcohol       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aldo          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alittl        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allevi        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ almost        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ along         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alongsid      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alot          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ alright       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ also          : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ altern        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz          : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ ambianc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ambienc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amen          : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ amend         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ america       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ american      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amongst       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amount        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ampl          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam     : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and           : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ angl          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ angri         : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ ann           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anna          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annex         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ announc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annoy         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ansterdam     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer        : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use the probabilities as an input for the voting procedure. Choose the class with the highest probability.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.059325858 0.9406741  0.7700948 0.22990517  0.6194502 0.3805498
## 14 0.006354669 0.9936453  0.9345275 0.06547254  0.6970027 0.3029973
## 16 0.010897097 0.9891029  0.9274546 0.07254541  0.7921826 0.2078174
## 26 0.064600556 0.9353994  0.9162288 0.08377122  0.6320345 0.3679655
## 28 0.046912467 0.9530875  0.8645258 0.13547417  0.6795935 0.3204065
## 29 0.019217561 0.9807824  0.8068216 0.19317842  0.5771262 0.4228738
##    Class 5: 0 Class5: 1
## 5   0.9220958 0.0779042
## 14  0.3667004 0.6332996
## 16  0.3637557 0.6362443
## 26  0.6943750 0.3056250
## 28  0.6244933 0.3755067
## 29  0.8882729 0.1117271
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4         5
## 5  0.059325858 0.22990517 0.3805498 0.0779042
## 14 0.006354669 0.06547254 0.3029973 0.6332996
## 16 0.010897097 0.07254541 0.2078174 0.6362443
## 26 0.064600556 0.08377122 0.3679655 0.3056250
## 28 0.046912467 0.13547417 0.3204065 0.3755067
## 29 0.019217561 0.19317842 0.4228738 0.1117271
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.059325858 0.22990517 0.3805498 0.07790420    4      4
## 14  0.006354669 0.06547254 0.3029973 0.63329962    5      5
## 16  0.010897097 0.07254541 0.2078174 0.63624425    5      5
## 26  0.064600556 0.08377122 0.3679655 0.30562498    4      4
## 28  0.046912467 0.13547417 0.3204065 0.37550674    5      4
## 29  0.019217561 0.19317842 0.4228738 0.11172715    4      4
## 39  0.272512002 0.12850545 0.3663182 0.12824322    4      5
## 40  0.018545222 0.22653838 0.6740571 0.05007697    4      3
## 60  0.012632899 0.04120022 0.2661603 0.78576299    5      5
## 61  0.044779507 0.31313805 0.3005885 0.31083666    3      3
## 72  0.004672894 0.07918454 0.3315088 0.58196656    5      4
## 81  0.013290352 0.10000418 0.3826273 0.32063088    4      3
## 86  0.024525374 0.12327212 0.2276819 0.63359750    5      5
## 90  0.075871600 0.16849081 0.3338143 0.11128996    4      4
## 92  0.030338523 0.16910858 0.3207941 0.12850672    4      4
## 113 0.058825241 0.06453796 0.3406573 0.30764457    4      5
## 116 0.029721084 0.13065323 0.2179422 0.40272728    5      4
## 117 0.023561928 0.07864363 0.3150070 0.37217963    5      5
## 122 0.038610787 0.15027816 0.3629138 0.07163905    4      4
## 123 0.017662819 0.08045601 0.3611065 0.24640388    4      2
## 124 0.028115152 0.21818711 0.3564957 0.06372599    4      4
## 131 0.004200636 0.13179201 0.3055440 0.56463678    5      4
## 135 0.190719986 0.23354435 0.5000000 0.03526430    4      3
## 137 0.002869735 0.05937267 0.3478987 0.50000000    5      5
## 140 0.014853885 0.16637399 0.3375522 0.22184128    4      4
## 142 0.010873556 0.08231967 0.3093147 0.48526807    5      5
## 149 0.015964792 0.06717236 0.3546831 0.50550901    5      4
## 154 0.031740671 0.13253397 0.2479786 0.34972630    5      5
## 156 0.211044934 0.09165400 0.3560818 0.16720736    4      3
## 158 0.371902880 0.28150098 0.4597658 0.02142464    4      3
## 169 0.006200316 0.07553956 0.2418227 0.74631875    5      5
## 185 0.009887789 0.13094611 0.1979845 0.52836920    5      5
## 187 0.004682547 0.06415782 0.4609615 0.33017053    4      5
## 192 0.020835027 0.23143091 0.5534388 0.02499704    4      3
## 194 0.016519042 0.19124418 0.4090025 0.18695622    4      4
## 195 0.013522396 0.22357761 0.2883438 0.22303105    4      4
## 196 0.164979171 0.16741927 0.5000000 0.03466019    4      5
## 197 0.370850637 0.19965007 0.2785941 0.04937581    2      3
## 199 0.005449677 0.08194214 0.2449273 0.84897821    5      5
## 210 0.097546510 0.15006168 0.4158526 0.02502458    4      3
## 216 0.015185060 0.06662203 0.1524800 0.91245925    5      5
## 220 0.014014042 0.29796455 0.3130246 0.09473492    4      4
## 227 0.110743515 0.06062925 0.5289363 0.09233281    4      5
## 234 0.031126931 0.12841604 0.4587588 0.17803205    4      3
## 240 0.019187936 0.07999859 0.4493222 0.19559540    4      5
## 245 0.043781404 0.11299057 0.3149894 0.30896815    4      4
## 249 0.014657140 0.12358970 0.3339082 0.37875820    5      5
## 261 0.023515058 0.16404980 0.3147852 0.31096906    4      3
## 277 0.009961137 0.07169570 0.2418350 0.91441119    5      5
## 283 0.017808583 0.09433438 0.2642887 0.67668912    5      5
## 290 0.009189587 0.07021753 0.2063569 0.88608166    5      4
## 293 0.014243032 0.06742994 0.3948653 0.20602138    4      5
## 302 0.008718099 0.12063653 0.2836645 0.38228508    5      4
## 305 0.018660461 0.08997736 0.3718278 0.50000000    5      4
## 308 0.023504239 0.13800375 0.2409229 0.35836908    5      4
## 311 0.007957456 0.07030196 0.2305856 0.80535638    5      5
## 320 0.014435085 0.07965039 0.2398128 0.79000362    5      2
## 322 0.019763452 0.06560852 0.2074372 0.89360102    5      5
## 330 0.012249899 0.07225188 0.1998089 0.84632792    5      4
## 332 0.027888763 0.10706910 0.4006138 0.17864641    4      4
## 333 0.019442797 0.07898515 0.2738407 0.79517886    5      5
## 339 0.009140503 0.09384334 0.2579765 0.67890531    5      5
## 341 0.019947251 0.09218453 0.4233523 0.17817311    4      4
## 344 0.024994684 0.06792967 0.3182269 0.73180903    5      5
## 349 0.009532052 0.06444804 0.1663806 0.91021258    5      5
## 355 0.012068832 0.05497200 0.2006331 0.93662992    5      5
## 356 0.028095036 0.08425981 0.2869127 0.60002628    5      3
## 365 0.011546114 0.12969776 0.2830908 0.36269053    5      3
## 366 0.008633088 0.12119562 0.3122069 0.47376828    5      4
## 369 0.007649108 0.07444844 0.3214340 0.43648353    5      4
## 371 0.009637870 0.07596169 0.2168805 0.84973708    5      5
## 373 0.013568699 0.06734426 0.3039575 0.78289276    5      5
## 389 0.040270357 0.10330167 0.2843236 0.38591983    5      2
## 390 0.033173062 0.12325444 0.2700131 0.40851678    5      4
## 396 0.011096723 0.06136803 0.3729068 0.37599345    5      4
## 412 0.005822502 0.07852685 0.4041251 0.33833892    4      5
## 413 0.007953292 0.10396231 0.3232255 0.31854369    4      3
## 415 0.009493377 0.11998726 0.3384452 0.41191791    5      4
## 422 0.024026956 0.15813401 0.4129313 0.19837992    4      5
## 425 0.017661162 0.05752743 0.1913810 0.95119745    5      5
## 434 0.012644308 0.05569189 0.2926453 0.48341125    5      5
## 438 0.004975260 0.08397950 0.2866411 0.72875268    5      4
## 441 0.065576532 0.17515936 0.3670258 0.15062911    4      5
## 442 0.024188008 0.08304819 0.2043371 0.71408928    5      5
## 445 0.010731841 0.09579384 0.3998345 0.47148658    5      5
## 447 0.032233026 0.14908243 0.3442012 0.09573800    4      3
## 453 0.083043108 0.13139310 0.4609986 0.16772013    4      4
## 454 0.135064245 0.11283651 0.2402330 0.13935669    4      5
## 462 0.006097432 0.05435568 0.2142035 0.83949891    5      5
## 474 0.038500599 0.10407685 0.3429597 0.39211686    5      3
## 476 0.010962992 0.15977908 0.2908361 0.27789219    4      3
## 493 0.009186388 0.12450798 0.2584795 0.46376558    5      5
## 502 0.017351106 0.20331736 0.4895393 0.15253238    4      4
## 503 0.185962403 0.05047058 0.2642774 0.21112020    4      5
## 506 0.016522463 0.12484893 0.2644950 0.61113805    5      5
## 508 0.012280471 0.05655856 0.3735849 0.78871511    5      5
## 512 0.019792351 0.10665004 0.2190826 0.69643024    5      5
## 513 0.024443744 0.12794950 0.2008131 0.38010857    5      5
## 521 0.159807171 0.09561297 0.2657449 0.50000000    5      2
## 524 0.043229468 0.12150807 0.3117975 0.27520466    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  3  4
##   3  1  1 17  6
##   4  0  0 30 32
##   5  0  0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5961538
Precision
##         2 
## 0.5961538
Recall
##         2 
## 0.6237044