PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/50")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import the TP feature set with a 50th percentile cut-off.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/50/Feature Set 1 50th TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:1101){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  1101 variables:
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ addit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit    : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ altern       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ american     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anywh        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ apolog       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appoint      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ approach     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arrang       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assum        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atm          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attend       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attitud      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avoid        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ back         : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bacon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bad          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bag          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bake         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bang         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bank         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bar          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bare         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ base         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basement     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basic        : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ bath         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathtub      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti       : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed          : num  0 0 0 1 0 0 1 0 0 1 ...
##  $ bedroom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beer         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ begin        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ behind       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ believ       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ benefit      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ besid        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ big          : num  1 0 0 0 0 0 0 0 0 1 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for the voting procedure.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.065601271 0.9343987  0.7675769 0.23242309  0.6385660 0.3614340
## 14 0.005602181 0.9943978  0.9330662 0.06693376  0.6995650 0.3004350
## 16 0.009361841 0.9906382  0.9276978 0.07230217  0.7964899 0.2035101
## 26 0.059216050 0.9407840  0.9201148 0.07988517  0.6339025 0.3660975
## 28 0.047756100 0.9522439  0.8650292 0.13497080  0.6831120 0.3168880
## 29 0.019548217 0.9804518  0.8148213 0.18517870  0.5858900 0.4141100
##    Class 5: 0  Class5: 1
## 5   0.9211599 0.07884014
## 14  0.3811052 0.61889477
## 16  0.3686065 0.63139350
## 26  0.7018917 0.29810826
## 28  0.6296034 0.37039659
## 29  0.8837346 0.11626541
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##              2          3         4          5
## 5  0.065601271 0.23242309 0.3614340 0.07884014
## 14 0.005602181 0.06693376 0.3004350 0.61889477
## 16 0.009361841 0.07230217 0.2035101 0.63139350
## 26 0.059216050 0.07988517 0.3660975 0.29810826
## 28 0.047756100 0.13497080 0.3168880 0.37039659
## 29 0.019548217 0.18517870 0.4141100 0.11626541
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.065601271 0.23242309 0.3614340 0.07884014    4      4
## 14  0.005602181 0.06693376 0.3004350 0.61889477    5      5
## 16  0.009361841 0.07230217 0.2035101 0.63139350    5      5
## 26  0.059216050 0.07988517 0.3660975 0.29810826    4      4
## 28  0.047756100 0.13497080 0.3168880 0.37039659    5      4
## 29  0.019548217 0.18517870 0.4141100 0.11626541    4      4
## 39  0.164007448 0.10157794 0.3465062 0.12075390    4      5
## 40  0.016210058 0.22490915 0.6621418 0.05063282    4      3
## 60  0.010361800 0.03937324 0.2549049 0.77568430    5      5
## 61  0.032643751 0.31140815 0.2855201 0.30885069    3      3
## 72  0.004156537 0.08190410 0.3331915 0.57841896    5      4
## 81  0.011141455 0.10255066 0.3934008 0.32886621    4      3
## 86  0.026327778 0.13395248 0.2215864 0.62983145    5      5
## 90  0.074854182 0.17562022 0.3437507 0.11232926    4      4
## 92  0.030657452 0.15748748 0.3282574 0.13226185    4      4
## 113 0.060322764 0.06702227 0.3410302 0.28695466    4      5
## 116 0.028574009 0.13287576 0.2227370 0.40510662    5      4
## 117 0.024010719 0.07800817 0.3101777 0.37402745    5      5
## 122 0.041958856 0.15093623 0.3591797 0.07235008    4      4
## 123 0.019362095 0.07686879 0.3633148 0.24736326    4      2
## 124 0.029507629 0.20634981 0.3440499 0.06251467    4      4
## 131 0.004088441 0.13170141 0.3186188 0.55368832    5      4
## 135 0.157151876 0.22426545 0.4728317 0.03847118    4      3
## 137 0.002733994 0.05963790 0.3555119 0.50000000    5      5
## 140 0.015001965 0.17232748 0.3362932 0.22382989    4      4
## 142 0.011432009 0.07760878 0.3116876 0.48754929    5      5
## 149 0.015428060 0.06463795 0.3434278 0.51481683    5      4
## 154 0.030784614 0.12960578 0.2413049 0.35187569    5      5
## 156 0.174179121 0.08922331 0.3535987 0.17185320    4      3
## 158 0.317430830 0.27835904 0.4497158 0.02137248    4      3
## 169 0.005951224 0.07836386 0.2451005 0.74791391    5      5
## 185 0.009580262 0.12719490 0.1949168 0.52361776    5      5
## 187 0.004053913 0.06400352 0.4590230 0.33691299    4      5
## 192 0.021519319 0.22035629 0.5693126 0.02579768    4      3
## 194 0.017372678 0.19653051 0.4150956 0.18689782    4      4
## 195 0.013534903 0.24141518 0.2812008 0.22835057    4      4
## 196 0.146262984 0.16273240 0.4775080 0.03580206    4      5
## 197 0.425794532 0.17893357 0.2765371 0.05090354    2      3
## 199 0.005327095 0.08763072 0.2420727 0.84405277    5      5
## 210 0.117968492 0.15321369 0.3962645 0.02571694    4      3
## 216 0.016026390 0.06176495 0.1497539 0.91033972    5      5
## 220 0.012714952 0.30157519 0.3192776 0.09521295    4      4
## 227 0.115383737 0.05567640 0.5465399 0.08599267    4      5
## 234 0.031984994 0.13141538 0.4658474 0.18353771    4      3
## 240 0.020057004 0.07855071 0.4549698 0.19872207    4      5
## 245 0.044355642 0.11328518 0.3145619 0.30786572    4      4
## 249 0.015138160 0.12574747 0.3384071 0.38207798    5      5
## 261 0.025451494 0.16929537 0.3275426 0.31504790    4      3
## 277 0.009636257 0.07028297 0.2460693 0.91527206    5      5
## 283 0.017644902 0.09650251 0.2617475 0.67468323    5      5
## 290 0.009366834 0.07205544 0.2080436 0.88651935    5      4
## 293 0.012504854 0.06849038 0.4025123 0.21026093    4      5
## 302 0.009178425 0.11540087 0.2830814 0.38834448    5      4
## 305 0.018764390 0.09157890 0.3808060 0.49215437    5      4
## 308 0.025632468 0.14121282 0.2418988 0.35805351    5      4
## 311 0.007925030 0.07077653 0.2271392 0.80425891    5      5
## 320 0.014301658 0.08217342 0.2405749 0.79203658    5      2
## 322 0.020846050 0.06701951 0.2082092 0.89265863    5      5
## 330 0.012135715 0.07447777 0.1969410 0.84388733    5      4
## 332 0.032627377 0.10922919 0.4053880 0.17926420    4      4
## 333 0.020310499 0.07848261 0.2751302 0.79735405    5      5
## 339 0.009021323 0.09402719 0.2589371 0.67862294    5      5
## 341 0.021463689 0.09796297 0.4319255 0.17971315    4      4
## 344 0.025455943 0.06661961 0.3287234 0.73067749    5      5
## 349 0.009391533 0.06761823 0.1645056 0.91085596    5      5
## 355 0.012141932 0.05652093 0.2032807 0.93703876    5      5
## 356 0.028826169 0.08447198 0.2890631 0.60138585    5      3
## 365 0.011687758 0.13767487 0.2808409 0.36595339    5      3
## 366 0.008183069 0.11748070 0.3216713 0.48006138    5      4
## 369 0.007731737 0.07374461 0.3288342 0.42916884    5      4
## 371 0.009560912 0.07963605 0.2187417 0.85143776    5      5
## 373 0.013199130 0.06692234 0.3079821 0.77981711    5      5
## 389 0.046665101 0.10471227 0.2871548 0.38972585    5      2
## 390 0.037075001 0.12668332 0.2700089 0.41049955    5      4
## 396 0.011081879 0.06371549 0.3748828 0.37499418    5      4
## 412 0.005419143 0.07273130 0.4045119 0.33027242    4      5
## 413 0.006918447 0.11061131 0.3192161 0.30562455    4      3
## 415 0.007745250 0.13066093 0.3386417 0.41445867    5      4
## 422 0.018067535 0.14511107 0.3995792 0.19312475    4      5
## 425 0.016659309 0.05854865 0.1909779 0.94926307    5      5
## 434 0.012888516 0.05192523 0.2998809 0.48627736    5      5
## 438 0.004448969 0.08095203 0.2863108 0.72996856    5      4
## 441 0.067347113 0.17366548 0.3469161 0.14250019    4      5
## 442 0.023577459 0.08233886 0.2011293 0.71268568    5      5
## 445 0.009628341 0.09361407 0.4092050 0.47168075    5      5
## 447 0.036049107 0.14769153 0.3389257 0.09356921    4      3
## 453 0.089237281 0.12744136 0.4521446 0.17329824    4      4
## 454 0.125413958 0.10176170 0.2309437 0.13893078    4      5
## 462 0.005555445 0.05383906 0.2198852 0.83858855    5      5
## 474 0.046658397 0.10231819 0.3183467 0.39444134    5      3
## 476 0.012122992 0.16343337 0.2997408 0.28304400    4      3
## 493 0.008668906 0.12177729 0.2629375 0.46007434    5      5
## 502 0.015688649 0.21770241 0.4806482 0.14992814    4      4
## 503 0.171914800 0.04266066 0.2324954 0.20795206    4      5
## 506 0.015872601 0.13046940 0.2628529 0.59907342    5      5
## 508 0.011968136 0.05884374 0.3673096 0.78243650    5      5
## 512 0.020365992 0.09426126 0.2171769 0.70349134    5      5
## 513 0.019901958 0.12679680 0.1924247 0.38743758    5      5
## 521 0.186497500 0.09241020 0.2584039 0.49410256    5      2
## 524 0.048886562 0.12223018 0.3099854 0.27418604    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  3  4
##   3  1  1 17  6
##   4  0  0 30 32
##   5  0  0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5961538
Precision
##         2 
## 0.5961538
Recall
##         2 
## 0.6237044