PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/10-30")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/10-30/Feature Set 1: 10 to 30th Percentile.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO FACTOR VARIABLES

#Transform Integer to Factor
for(i in 1:1494){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  1494 variables:
##  $ abit         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ach          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ add          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ addit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afford       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ afraid       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ africa       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ afternoon    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ago          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircon       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit    : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albert       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alcohol      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ almost       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ along        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 3 0 0 1 0 1 0 ...
##  $ altern       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ ambienc      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ american     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amount       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ angri        : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ anna         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anywh        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ apolog       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appal        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appar        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appeal       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appl         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appoint      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ approach     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apt          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ aren         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arrang       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ art          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ asid         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ asleep       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assign       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assist       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assum        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assur        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ate          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atm          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attend       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attitud      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

NAIVE BAYES MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.08547205 0.9145280  0.6566529 0.34334707  0.7036284 0.2963716
## 14 0.01202274 0.9879773  0.9280069 0.07199312  0.7053650 0.2946350
## 16 0.01387237 0.9861276  0.9000704 0.09992963  0.7898209 0.2101791
## 26 0.02378320 0.9762168  0.8781510 0.12184900  0.6606955 0.3393045
## 28 0.03266428 0.9673357  0.8677411 0.13225894  0.7018990 0.2981010
## 29 0.01441988 0.9855801  0.8456658 0.15433422  0.6010457 0.3989543
##    Class 5: 0  Class5: 1
## 5   0.9152238 0.08477624
## 14  0.3813637 0.61863629
## 16  0.6074497 0.39255031
## 26  0.6867812 0.31321883
## 28  0.3855213 0.61447875
## 29  0.6277253 0.37227473
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4          5
## 5  0.08547205 0.34334707 0.2963716 0.08477624
## 14 0.01202274 0.07199312 0.2946350 0.61863629
## 16 0.01387237 0.09992963 0.2101791 0.39255031
## 26 0.02378320 0.12184900 0.3393045 0.31321883
## 28 0.03266428 0.13225894 0.2981010 0.61447875
## 29 0.01441988 0.15433422 0.3989543 0.37227473
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.085472045 0.34334707 0.2963716 0.08477624    3      4
## 14  0.012022744 0.07199312 0.2946350 0.61863629    5      5
## 16  0.013872369 0.09992963 0.2101791 0.39255031    5      5
## 26  0.023783204 0.12184900 0.3393045 0.31321883    4      4
## 28  0.032664282 0.13225894 0.2981010 0.61447875    5      4
## 29  0.014419876 0.15433422 0.3989543 0.37227473    4      4
## 39  0.214897836 0.16504593 0.3281248 0.07753896    4      5
## 40  0.006580473 0.29452482 0.5508210 0.30724048    4      3
## 60  0.008640354 0.05206319 0.2942697 0.45646939    5      5
## 61  0.181904126 0.20897992 0.4296970 0.16616735    4      3
## 72  0.007192594 0.08413272 0.3415113 0.62850788    5      4
## 81  0.020484066 0.10690055 0.3557239 0.41802948    5      3
## 86  0.020489219 0.11915166 0.2501746 0.34274197    5      5
## 90  0.051132109 0.12971481 0.2931121 0.37581182    5      4
## 92  0.033347181 0.13386216 0.3315232 0.22201250    4      4
## 113 0.038705935 0.06405057 0.3793537 0.35486170    4      5
## 116 0.041550333 0.13351331 0.2389954 0.27463601    5      4
## 117 0.023782370 0.10184144 0.2966695 0.36489499    5      5
## 122 0.054083770 0.13918421 0.3212767 0.18444421    4      4
## 123 0.018347076 0.08137488 0.3604422 0.39340657    5      2
## 124 0.014972835 0.13617127 0.3744226 0.26322526    4      4
## 131 0.008802068 0.12162820 0.3152965 0.43855962    5      4
## 135 0.456954819 0.19349504 0.4184925 0.06130111    2      3
## 137 0.009984332 0.08056198 0.3418051 0.33971073    4      5
## 140 0.016824634 0.15344805 0.3150152 0.32915780    5      4
## 142 0.016991974 0.08716409 0.3396473 0.55313462    5      5
## 149 0.021124401 0.07403776 0.3573193 0.60758237    5      4
## 154 0.035826296 0.10181066 0.2641018 0.55244277    5      5
## 156 0.157193100 0.08268927 0.4175070 0.22495676    4      3
## 158 0.404103541 0.21255899 0.5132562 0.01203972    4      3
## 169 0.014200585 0.07587525 0.2725372 0.60387709    5      5
## 185 0.013430051 0.10210540 0.2606621 0.46893370    5      5
## 187 0.008486077 0.08990863 0.3910884 0.50772123    5      5
## 192 0.020639217 0.16750610 0.4932324 0.15891544    4      3
## 194 0.022757812 0.19589223 0.3544082 0.40517911    5      4
## 195 0.024349472 0.14849541 0.2686432 0.46940476    5      4
## 196 0.222374807 0.12140614 0.5000000 0.13323739    4      5
## 197 0.447163601 0.09992311 0.3276895 0.06341907    2      3
## 199 0.007279882 0.10661845 0.2388612 0.72779372    5      5
## 210 0.147868566 0.12432114 0.3433263 0.19398922    4      3
## 216 0.018691633 0.06856213 0.1834996 0.75393596    5      5
## 220 0.020611712 0.20758960 0.3498181 0.25168890    4      4
## 227 0.403401285 0.03497746 0.5758734 0.07332662    4      5
## 234 0.026176803 0.13390870 0.4459418 0.35496653    4      3
## 240 0.028718439 0.09162028 0.3969969 0.39106503    4      5
## 245 0.028457220 0.10816393 0.3099416 0.54065736    5      4
## 249 0.020881628 0.12349432 0.3191613 0.50738096    5      5
## 261 0.023632432 0.12858040 0.3036343 0.50588311    5      3
## 277 0.011655698 0.07808109 0.2589843 0.80054827    5      5
## 283 0.017940068 0.11118687 0.2976560 0.45984296    5      5
## 290 0.013224233 0.09171076 0.2293738 0.78215469    5      4
## 293 0.013691091 0.09653867 0.3698898 0.42306715    5      5
## 302 0.014213975 0.13850332 0.3130927 0.29482417    4      4
## 305 0.020591923 0.09737659 0.3312859 0.61719458    5      4
## 308 0.031162989 0.11281071 0.2507979 0.59036412    5      4
## 311 0.013283258 0.08225982 0.2589666 0.61431153    5      5
## 320 0.016871752 0.09188037 0.2647275 0.72838496    5      2
## 322 0.017526514 0.07016695 0.2257918 0.78080316    5      5
## 330 0.012678855 0.08034923 0.2253681 0.77579685    5      4
## 332 0.020404921 0.11747009 0.3858219 0.38671948    5      4
## 333 0.018411626 0.08455076 0.2884456 0.74975231    5      5
## 339 0.018166760 0.10293504 0.2612546 0.61411164    5      5
## 341 0.017392644 0.10705352 0.3921894 0.47259762    5      4
## 344 0.020577260 0.07478555 0.3177792 0.71941995    5      5
## 349 0.014134195 0.08531594 0.1962472 0.74499679    5      5
## 355 0.014832226 0.07320404 0.2361999 0.82367480    5      5
## 356 0.027591983 0.08814719 0.2924420 0.63883561    5      3
## 365 0.012503572 0.13257245 0.2766179 0.51344852    5      3
## 366 0.012408447 0.11315536 0.2942943 0.41119021    5      4
## 369 0.011160990 0.09329207 0.3155697 0.38887314    5      4
## 371 0.014854994 0.09580901 0.2333518 0.69477685    5      5
## 373 0.014225151 0.08380653 0.3143158 0.65700304    5      5
## 389 0.028696589 0.10175898 0.3028918 0.56769017    5      2
## 390 0.028359136 0.11905881 0.2837544 0.60391241    5      4
## 396 0.014792359 0.07861171 0.3540121 0.50000000    5      4
## 412 0.007107856 0.10089574 0.3292915 0.51500789    5      5
## 413 0.028524740 0.10718546 0.3224879 0.15729573    4      3
## 415 0.012351525 0.10009868 0.3509467 0.51021872    5      4
## 422 0.105956950 0.11175781 0.4160585 0.36857117    4      5
## 425 0.015550719 0.07331124 0.2221505 0.85193774    5      5
## 434 0.018481696 0.06169364 0.3220067 0.28948831    4      5
## 438 0.008219456 0.09034836 0.2825791 0.67686970    5      4
## 441 0.220532445 0.06914151 0.4446927 0.08077281    4      5
## 442 0.028671085 0.08850723 0.2300474 0.69419592    5      5
## 445 0.021316453 0.09140980 0.3933320 0.31816095    4      5
## 447 0.046022293 0.14359846 0.3658527 0.13529684    4      3
## 453 0.031732155 0.15349707 0.4385595 0.45959327    5      4
## 454 0.067018163 0.12013318 0.3076758 0.09432540    4      5
## 462 0.006810181 0.07227828 0.2422851 0.65816547    5      5
## 474 0.033052234 0.10405238 0.3206793 0.51684040    5      3
## 476 0.019980161 0.13880975 0.2807088 0.44191476    5      3
## 493 0.011559209 0.10624582 0.2877772 0.53667794    5      5
## 502 0.011417896 0.17389026 0.4517280 0.35053653    4      4
## 503 0.091765073 0.04655090 0.3390371 0.41392192    5      5
## 506 0.018319346 0.10784108 0.2702484 0.62165168    5      5
## 508 0.015247765 0.06820652 0.3392760 0.74783150    5      5
## 512 0.024480434 0.09995428 0.2619933 0.68803012    5      5
## 513 0.016651752 0.11204315 0.2530864 0.42771423    5      5
## 521 0.052057617 0.08933317 0.2802501 0.66251229    5      2
## 524 0.028316410 0.12164892 0.2989574 0.34116661    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  1  6
##   3  2  0 11 12
##   4  1  1 18 42
##   5  0  0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5576923
Precision
##         2 
## 0.5576923
Recall
##         2 
## 0.4565852