PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TFIDF/50")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TFIDF/50/Feature Set 1 50th TFIDF.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:1336){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  1336 variables:
##  $ abl           : num  0.0351 0 0 0 0 ...
##  $ about         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ absolut       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accent        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accept        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ access        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accommod      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ accomplish    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ across        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ actual        : num  0 0 0 0 0 ...
##  $ adequ         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adjac         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ador          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adult         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advanc        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ adverti       : num  0 0 0 0 0.105 ...
##  $ advi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ advic         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ affect        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ after         : num  0 0 0 0 0.112 ...
##  $ ago           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ahead         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ air           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircon        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aircondit     : num  0 0 0 0 0 ...
##  $ airi          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ airport       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alarm         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albert        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ albrt         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aldo          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alittl        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ all           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alloc         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ allow         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alreadi       : num  0 0 0 0.062 0 ...
##  $ also          : num  0 0 0 0.0933 0 ...
##  $ altern        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ although      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alway         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amaz          : num  0 0 0 0 0.0653 ...
##  $ ambianc       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amen          : num  0 0.107 0 0 0 ...
##  $ american      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amount        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ amsterdam     : num  0 0.0847 0 0.0463 0 ...
##  $ and           : num  0.031 0 0 0 0 ...
##  $ anna          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ annoy         : num  0.0357 0 0 0 0 ...
##  $ anoth         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ answer        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ant           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anymor        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyon         : num  0 0.102 0 0 0 ...
##  $ anyth         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anyway        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ anywh         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ apart         : num  0 0 0 0 0 ...
##  $ apolog        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appal         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appeal        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appear        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ applic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appoint       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ appreci       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ approach      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ april         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ architectur   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ area          : num  0 0 0 0.0285 0 ...
##  $ arena         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ aroom         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ around        : num  0 0 0 0.0371 0 ...
##  $ arrang        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ arriv         : num  0.0239 0 0 0 0.064 ...
##  $ art           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ artwork       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ask           : num  0.0239 0 0.1319 0 0 ...
##  $ aspect        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ athmosph      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atm           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atmosph       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attend        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attent        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attic         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attitud       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ attract       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ atttent       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ avail         : num  0.031 0 0 0 0 ...
##  $ averag        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awar          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ away          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ awesom        : num  0 0 0 0 0 ...
##  $ back          : num  0 0.0698 0.1376 0 0 ...
##  $ backyard      : num  0 0 0 0 0 ...
##  $ bacon         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bad           : num  0 0 0 0.0417 0 ...
##  $ bag           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bake          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bang          : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for thevoting procedure pick class with the highest probability.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.04484328 0.9551567  0.8420299 0.15797008  0.7090221 0.2909779
## 14 0.02329448 0.9767055  0.9195713 0.08042869  0.6792998 0.3207002
## 16 0.01854669 0.9814533  0.9205178 0.07948224  0.7575615 0.2424385
## 26 0.03711053 0.9628895  0.9030260 0.09697404  0.7237737 0.2762263
## 28 0.03493567 0.9650643  0.7268440 0.27315599  0.7379065 0.2620935
## 29 0.02592096 0.9740790  0.8839534 0.11604661  0.6974833 0.3025167
##    Class 5: 0 Class5: 1
## 5   0.7015602 0.2984398
## 14  0.5545259 0.4454741
## 16  0.3928097 0.6071903
## 26  0.5436832 0.4563168
## 28  0.6837344 0.3162656
## 29  0.6166213 0.3833787
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4         5
## 5  0.04484328 0.15797008 0.2909779 0.2984398
## 14 0.02329448 0.08042869 0.3207002 0.4454741
## 16 0.01854669 0.07948224 0.2424385 0.6071903
## 26 0.03711053 0.09697404 0.2762263 0.4563168
## 28 0.03493567 0.27315599 0.2620935 0.3162656
## 29 0.02592096 0.11604661 0.3025167 0.3833787
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4            5 Vote Actual
## 5   0.044843277 0.15797008 0.2909779 0.2984397677    5      4
## 14  0.023294478 0.08042869 0.3207002 0.4454741000    5      5
## 16  0.018546687 0.07948224 0.2424385 0.6071903350    5      5
## 26  0.037110531 0.09697404 0.2762263 0.4563168401    5      4
## 28  0.034935668 0.27315599 0.2620935 0.3162656140    5      4
## 29  0.025920956 0.11604661 0.3025167 0.3833786867    5      4
## 39  0.034445041 0.08783595 0.2853168 0.5000000000    5      5
## 40  0.025011598 0.14731906 0.3387985 0.3195846343    4      3
## 60  0.021556690 0.10523305 0.2561904 0.5800867423    5      5
## 61  0.036826420 0.19943335 0.2834452 0.3705419045    5      3
## 72  0.017692520 0.05025872 0.2789928 0.5483896054    5      4
## 81  0.025929730 0.12610579 0.3179549 0.3780857888    5      3
## 86  0.033691095 0.12372089 0.3027465 0.4919849910    5      5
## 90  0.083393297 0.12223073 0.3012856 0.2157613107    4      4
## 92  0.025362784 0.15394049 0.2972471 0.2564524018    4      4
## 113 0.033505632 0.07474724 0.3204596 0.3729722568    5      5
## 116 0.051635397 0.17330145 0.2533199 0.3648648446    5      4
## 117 0.032876886 0.07933628 0.3765655 0.4476159385    5      5
## 122 0.045705296 0.14021964 0.3524011 0.1230021687    4      4
## 123 0.026776282 0.06784839 0.4057345 0.3393499452    4      2
## 124 0.022576459 0.10680457 0.3196773 0.3592469139    5      4
## 131 0.029327622 0.11955361 0.3203576 0.4061352394    5      4
## 135 0.041586590 0.13346915 0.3062325 0.2791259144    4      3
## 137 0.012752184 0.08377862 0.2921319 0.5819487502    5      5
## 140 0.037006442 0.14975757 0.3067204 0.3081576635    5      4
## 142 0.021267996 0.06359000 0.3479308 0.4475342488    5      5
## 149 0.027478775 0.08776605 0.3385043 0.4659088421    5      4
## 154 0.021239138 0.10635658 0.2376051 0.6198451327    5      5
## 156 0.041952186 0.09709812 0.3441434 0.3293036668    4      3
## 158 0.044048580 0.16968360 0.2742555 0.3459777451    5      3
## 169 0.026656860 0.06655193 0.2152478 0.7610319068    5      5
## 185 0.028691593 0.08193249 0.2544690 0.5399131768    5      5
## 187 0.016154122 0.10460300 0.3365687 0.4614469242    5      5
## 192 0.028752804 0.17967179 0.3206408 0.2665901050    4      3
## 194 0.192278808 0.63808789 0.2513519 0.1337142564    3      4
## 195 0.019649677 0.27884510 0.2770518 0.2326310516    3      4
## 196 0.042180610 0.13502026 0.3090300 0.3042031473    4      5
## 197 0.076347837 0.16768984 0.2483006 0.2406451824    4      3
## 199 0.023230455 0.06329765 0.2499905 0.7418951206    5      5
## 210 0.086090931 0.13525136 0.3084900 0.1709104942    4      3
## 216 0.040003454 0.07279322 0.1835453 0.7922669812    5      5
## 220 0.025458903 0.17020207 0.2663765 0.3734043382    5      4
## 227 0.040989328 0.07589788 0.2926312 0.4739886434    5      5
## 234 0.025371093 0.11749097 0.3668373 0.3022107418    4      3
## 240 0.024163102 0.07187264 0.5487187 0.1598821975    4      5
## 245 0.114409546 0.16134609 0.4015219 0.1660770761    4      4
## 249 0.037307996 0.11506568 0.3570669 0.3338219241    4      5
## 261 0.044735235 0.34891100 0.3975316 0.1935281394    4      3
## 277 0.022553592 0.04912727 0.2029803 0.8898157573    5      5
## 283 0.038169270 0.11556694 0.2818945 0.5123777143    5      5
## 290 0.009140046 0.02575375 0.1540439 0.9715000230    5      4
## 293 0.036308669 0.06629986 0.3855401 0.2809980358    4      5
## 302 0.025682323 0.18399418 0.2503642 0.4691492793    5      4
## 305 0.046908211 0.11262462 0.3728527 0.3103478231    4      4
## 308 0.024746170 0.11954208 0.3011358 0.1707038424    4      4
## 311 0.030405864 0.05648742 0.2531105 0.7407365254    5      5
## 320 0.025313179 0.04896257 0.1955893 0.9552963563    5      2
## 322 0.027284490 0.04798359 0.2305270 0.8197997138    5      5
## 330 0.015829428 0.04546294 0.1637183 0.7963694520    5      4
## 332 0.040932149 0.10938490 0.8249230 0.0300783356    4      4
## 333 0.035628877 0.02806733 0.3199496 0.8230636432    5      5
## 339 0.016812132 0.13033850 0.2365946 0.5745127650    5      5
## 341 0.036510081 0.08322201 0.4761905 0.1288393992    4      4
## 344 0.031039791 0.07372983 0.3151945 0.6478638245    5      5
## 349 0.020655759 0.05597628 0.2082354 0.9015559291    5      5
## 355 0.024109186 0.02246100 0.1574725 0.9712647522    5      5
## 356 0.052070444 0.10487416 0.3020308 0.3927775201    5      3
## 365 0.055362828 0.19365189 0.3196933 0.2172368087    4      3
## 366 0.046065309 0.08945879 0.2431204 0.5521433141    5      4
## 369 0.019491612 0.05945159 0.3175882 0.4753637500    5      4
## 371 0.013887435 0.08627243 0.2565713 0.8349696311    5      5
## 373 0.037660094 0.05202472 0.3490168 0.5693459206    5      5
## 389 0.225639433 0.06157733 0.8619585 0.0002290760    4      2
## 390 0.186231567 0.52690288 0.7715212 0.0001802053    4      4
## 396 0.021269460 0.04887285 0.2967983 0.6389128022    5      4
## 412 0.014336337 0.07427548 0.3474837 0.4896260781    5      5
## 413 0.023900242 0.10043002 0.2775893 0.4545048022    5      3
## 415 0.019448387 0.08844494 0.3807358 0.4501459432    5      4
## 422 0.027057554 0.13724690 0.2664378 0.4029691379    5      5
## 425 0.019923749 0.06073585 0.2791036 0.6340446793    5      5
## 434 0.041466073 0.08994411 0.3136761 0.3845290163    5      5
## 438 0.014082896 0.07663476 0.2879811 0.6856191414    5      4
## 441 0.035105016 0.11735036 0.2909235 0.4275498312    5      5
## 442 0.029463411 0.09472811 0.2254451 0.6421508951    5      5
## 445 0.024822767 0.09609986 0.3387221 0.3920191404    5      5
## 447 0.031178452 0.10571820 0.2723423 0.3750032411    5      3
## 453 0.078493857 0.26947859 0.3104531 0.2256228901    4      4
## 454 0.024346528 0.10861382 0.2747973 0.4556753947    5      5
## 462 0.014957269 0.05324530 0.2525274 0.7485326641    5      5
## 474 0.062824380 0.15270043 0.3099880 0.4006963640    5      3
## 476 0.024871589 0.12903631 0.2550365 0.4613581351    5      3
## 493 0.014704760 0.07590691 0.5000000 0.2272418504    4      5
## 502 0.020219822 0.11225153 0.4030491 0.3196949215    4      4
## 503 0.030334854 0.09345805 0.2596635 0.5272908677    5      5
## 506 0.023360859 0.16785164 0.2546083 0.4432451875    5      5
## 508 0.027736107 0.05733879 0.3560391 0.5669470421    5      5
## 512 0.030303688 0.12756247 0.3162203 0.4839702275    5      5
## 513 0.025573997 0.12939597 0.2897328 0.4834765188    5      5
## 521 0.054791527 0.11372946 0.2642512 0.3630869582    5      2
## 524 0.086410156 0.12558859 0.3645862 0.1930278752    4      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       3   4   5
##   2   0   4   3
##   3   0  14  11
##   4   2  25  35
##   5   1  13 100
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(0,29,100)/sum(CM)

#Precision
Rows <- rowSums(CM)
Precision2 <- CM[2,1]/Rows[2]
Precision3 <- CM[3,2]/Rows[3]
Precision4 <- CM[4,3]/Rows[4]

Precision <- (Precision2*Length3+Precision3*Length4+Precision4*Length5)/208

#Recall
Col <- colSums(CM)
Recall2 <- CM[2,1]/Col[1]
Recall3 <- CM[3,2]/Col[2]
Recall4 <- CM[4,3]/Col[3]

Recall <- (Recall2*Length3+Recall3*Length4+Recall4*Length5)/208


Accuracy
## [1] 0.6201923
Precision
##         3 
## 0.6009615
Recall
##         3 
## 0.5009069