PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/50")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TF/50/Feature Set 1: 50th Percentile.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO FACTOR VARIABLES

#Transform Integer to Factor
for(i in 1:1126){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  1126 variables:
##  $ abit         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ abl          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ absolut      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ acknowledg   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ addit        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adequ        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ advi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ air          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit    : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ airi         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ also         : num  0 0 0 3 0 0 1 0 1 0 ...
##  $ altern       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz         : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ amen         : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ american     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam    : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ and          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ annoy        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ anoth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon        : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ anyth        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anywh        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart        : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ apolog       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appear       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appoint      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ approach     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area         : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arena        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ arrang       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arriv        : num  1 0 0 0 1 0 0 0 0 0 ...
##  $ ask          : num  1 0 1 0 0 0 0 0 0 0 ...
##  $ aspect       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ assum        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atm          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attend       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attitud      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ averag       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avoid        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ back         : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ bacon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bad          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ bag          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bake         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bang         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bank         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bar          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ bare         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ base         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basement     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ basic        : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ bath         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathroom     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bathtub      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beauti       : num  1 0 0 0 0 0 1 0 0 0 ...
##  $ bed          : num  0 0 0 2 0 0 1 0 0 1 ...
##  $ bedroom      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ beer         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ begin        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ behind       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ believ       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ benefit      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ besid        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best         : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ better       : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.08719072 0.9128093  0.6532595 0.34674053  0.7062136 0.2937864
## 14 0.01180700 0.9881930  0.9280888 0.07191119  0.7086065 0.2913935
## 16 0.01375576 0.9862442  0.8973203 0.10267970  0.7945674 0.2054326
## 26 0.02268871 0.9773113  0.8764225 0.12357754  0.6575025 0.3424975
## 28 0.03345721 0.9665428  0.8645264 0.13547356  0.7017126 0.2982874
## 29 0.01339542 0.9866046  0.8479731 0.15202688  0.6016552 0.3983448
##    Class 5: 0  Class5: 1
## 5   0.9267292 0.07327081
## 14  0.3743564 0.62564361
## 16  0.6028495 0.39715053
## 26  0.6906187 0.30938125
## 28  0.3981108 0.60188923
## 29  0.6404162 0.35958385
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4          5
## 5  0.08719072 0.34674053 0.2937864 0.07327081
## 14 0.01180700 0.07191119 0.2913935 0.62564361
## 16 0.01375576 0.10267970 0.2054326 0.39715053
## 26 0.02268871 0.12357754 0.3424975 0.30938125
## 28 0.03345721 0.13547356 0.2982874 0.60188923
## 29 0.01339542 0.15202688 0.3983448 0.35958385
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.087190723 0.34674053 0.2937864 0.07327081    3      4
## 14  0.011807000 0.07191119 0.2913935 0.62564361    5      5
## 16  0.013755763 0.10267970 0.2054326 0.39715053    5      5
## 26  0.022688712 0.12357754 0.3424975 0.30938125    4      4
## 28  0.033457210 0.13547356 0.2982874 0.60188923    5      4
## 29  0.013395425 0.15202688 0.3983448 0.35958385    4      4
## 39  0.196424619 0.16682350 0.3305483 0.07171274    4      5
## 40  0.006311111 0.30569632 0.5451396 0.29860299    4      3
## 60  0.009279821 0.05138992 0.2935639 0.46618495    5      5
## 61  0.179890313 0.22045583 0.4306961 0.15076684    4      3
## 72  0.007454634 0.08264296 0.3406109 0.62751420    5      4
## 81  0.018878480 0.10829177 0.3599779 0.41379739    5      3
## 86  0.019742049 0.12449562 0.2473895 0.33521459    5      5
## 90  0.048318096 0.13455068 0.2927701 0.35427070    5      4
## 92  0.030525581 0.13163501 0.3294593 0.20216185    4      4
## 113 0.039657019 0.06546208 0.3793242 0.33748725    4      5
## 116 0.039035417 0.13822583 0.2411203 0.26170062    5      4
## 117 0.024185155 0.10315757 0.2945958 0.34320474    5      5
## 122 0.055870951 0.14284028 0.3208412 0.16039479    4      4
## 123 0.018546251 0.07987575 0.3593784 0.37782153    5      2
## 124 0.014624122 0.13188837 0.3685614 0.25046495    4      4
## 131 0.008747729 0.12314381 0.3189686 0.42790283    5      4
## 135 0.424681902 0.20665430 0.4081185 0.05310929    2      3
## 137 0.009892244 0.07982538 0.3424585 0.32289456    4      5
## 140 0.016888840 0.15473249 0.3119504 0.30996126    4      4
## 142 0.017254054 0.08464047 0.3407115 0.54746410    5      5
## 149 0.021690042 0.07130429 0.3475125 0.60975894    5      4
## 154 0.034984139 0.10419262 0.2561026 0.54465964    5      5
## 156 0.143248797 0.08069949 0.4185158 0.21076247    4      3
## 158 0.394893526 0.22903289 0.5081642 0.01038736    4      3
## 169 0.014495903 0.07554127 0.2713229 0.61153067    5      5
## 185 0.013712189 0.10453185 0.2588369 0.46834796    5      5
## 187 0.008148973 0.08842511 0.3911020 0.50000000    5      5
## 192 0.020031116 0.16494116 0.5000000 0.13787716    4      3
## 194 0.023643895 0.20241996 0.3556112 0.37850745    5      4
## 195 0.024737539 0.14734805 0.2626312 0.45965253    5      4
## 196 0.222638752 0.12634305 0.4905218 0.11358779    4      5
## 197 0.452086773 0.09770692 0.3192206 0.05439899    2      3
## 199 0.007260424 0.10777315 0.2342020 0.74997509    5      5
## 210 0.160304495 0.12829783 0.3357897 0.16630417    4      3
## 216 0.018609392 0.06880404 0.1825177 0.77329708    5      5
## 220 0.020241747 0.21491396 0.3521250 0.23026153    4      4
## 227 0.419086669 0.03236850 0.5773028 0.06430521    4      5
## 234 0.025178301 0.13803935 0.4461902 0.33750258    4      3
## 240 0.028699226 0.09175029 0.3993332 0.37079990    4      5
## 245 0.029194343 0.10769732 0.3095234 0.52019818    5      4
## 249 0.021154075 0.12665268 0.3205186 0.49414718    5      5
## 261 0.023766742 0.13196847 0.3058559 0.49070577    5      3
## 277 0.011715167 0.07709940 0.2596686 0.81414540    5      5
## 283 0.017841680 0.11310118 0.2965839 0.45184630    5      5
## 290 0.013574748 0.09101754 0.2289714 0.79085941    5      4
## 293 0.012712981 0.09598305 0.3746080 0.40104324    5      5
## 302 0.013689316 0.13656948 0.3092763 0.28380868    4      4
## 305 0.021805079 0.09685378 0.3312502 0.60366342    5      4
## 308 0.032633038 0.11528220 0.2498620 0.57709748    5      4
## 311 0.013165131 0.08159990 0.2569642 0.61955179    5      5
## 320 0.017465563 0.09121525 0.2647106 0.73048706    5      2
## 322 0.018638586 0.06814609 0.2253928 0.79188459    5      5
## 330 0.013275525 0.07981217 0.2234993 0.78605074    5      4
## 332 0.020166744 0.11849290 0.3867359 0.36192044    4      4
## 333 0.018759835 0.08359774 0.2896525 0.75434196    5      5
## 339 0.018179899 0.10296821 0.2619691 0.60926468    5      5
## 341 0.018292240 0.10612312 0.3934318 0.44579202    5      4
## 344 0.021341740 0.07212949 0.3203790 0.72006837    5      5
## 349 0.014001532 0.08492201 0.1947592 0.75826662    5      5
## 355 0.015265601 0.07198040 0.2363252 0.83537514    5      5
## 356 0.028123219 0.08848581 0.2922649 0.63285523    5      3
## 365 0.012619262 0.13584438 0.2759820 0.50000000    5      3
## 366 0.012319246 0.11096464 0.2965364 0.40054229    5      4
## 369 0.010979868 0.09340020 0.3176367 0.36998478    5      4
## 371 0.014943784 0.09588605 0.2338813 0.70444867    5      5
## 373 0.014496675 0.08194890 0.3158659 0.66348992    5      5
## 389 0.029987100 0.10151516 0.3030418 0.55199328    5      2
## 390 0.030132556 0.12109115 0.2842531 0.58952905    5      4
## 396 0.015658229 0.07676180 0.3559792 0.48390414    5      4
## 412 0.007122523 0.09890038 0.3332388 0.51537732    5      5
## 413 0.027048481 0.10407992 0.3182199 0.14091527    4      3
## 415 0.011719684 0.10329810 0.3534772 0.50000000    5      4
## 422 0.109850671 0.10581676 0.4182783 0.35684234    4      5
## 425 0.016338304 0.07310734 0.2219466 0.86707020    5      5
## 434 0.018452671 0.06076072 0.3235348 0.28344823    4      5
## 438 0.008467501 0.08938608 0.2847070 0.68898377    5      4
## 441 0.234282919 0.06752392 0.4403016 0.07220685    4      5
## 442 0.027822548 0.08958690 0.2286234 0.70003086    5      5
## 445 0.020369822 0.09381332 0.3947092 0.31026705    4      5
## 447 0.047790350 0.14192253 0.3643009 0.12249222    4      3
## 453 0.032865653 0.15616542 0.4310806 0.43529885    5      4
## 454 0.066676011 0.11678189 0.3075200 0.08321992    4      5
## 462 0.006720221 0.07100283 0.2426965 0.67700698    5      5
## 474 0.034877260 0.10099860 0.3127425 0.50839483    5      3
## 476 0.020068810 0.13604011 0.2818025 0.43678694    5      3
## 493 0.011175347 0.10555166 0.2930956 0.52683188    5      5
## 502 0.012279677 0.17788151 0.4513695 0.32824283    4      4
## 503 0.087956366 0.04518004 0.3202322 0.41206991    5      5
## 506 0.018260901 0.10556564 0.2703494 0.61870380    5      5
## 508 0.015700025 0.06528105 0.3405290 0.75318844    5      5
## 512 0.024807770 0.09440417 0.2564118 0.69445518    5      5
## 513 0.014695971 0.11700314 0.2505463 0.42398297    5      5
## 521 0.054324723 0.08809108 0.2774292 0.65850736    5      2
## 524 0.028208828 0.12365440 0.2973006 0.31540082    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##      2  3  4  5
##   2  0  0  1  6
##   3  2  0 11 12
##   4  1  1 20 40
##   5  0  0 16 98
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5673077
Precision
##         2 
## 0.5673077
Recall
##         2 
## 0.4685035