PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/4.Feature Set 3/Negations")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import Feature Set 3.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/4.Feature Set 3/Negations/Feature Set 2 TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:417){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  417 variables:
##  $ amaz_jj       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ arriv_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ bad_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ basic_jj      : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ beauti_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ befor_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best_jjs      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ big_jj        : num  1 0 0 0 0 0 0 0 0 1 ...
##  $ build_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ central_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ clean_jj      : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ clear_jj      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ close_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ cold_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ difficult_jj  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ due_jj        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ earl_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ easi_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ english_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enough_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ excel_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extra_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ first_jj      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ free_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fresh_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ friend_jj     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ front_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ full_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ general_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ good_jj       : num  0 0 1 0 0 1 0 1 0 1 ...
##  $ great_jj      : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ guest_jjs     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ high_jj       : num  1 0 0 0 0 0 0 1 0 0 ...
##  $ hot_jj        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ huge_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ littl_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ locat_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ london_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ loud_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ main_jj       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ major_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ modern_jj     : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ much_jj       : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ new_jj        : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ next_jj       : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ nice_jj       : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ nois_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ noisi_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ ok_jj         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ old_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ onli_jj       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ open_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ overal_jj     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ particular_jj : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ perfect_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pillow_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pleasant_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ poor_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ public_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ quiet_jj      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ realli_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ recept_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ safe_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ second_jj     : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ select_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servic_jj     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ short_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ shower_jjr    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sleep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ small_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ spacious_jj   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ special_jj    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ standard_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ stay_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ steep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ super_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sure_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ underground_jj: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ upgrad_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ veri_jj       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ warm_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ whole_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ask_vb        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bed_vbd       : num  0 0 0 1 0 0 0 0 0 1 ...
##  $ build_vb      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ came_vbd      : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ check_vb      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ definit_vb    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ done_vbn      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expens_vbz    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ gave_vbd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ get_vb        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ given_vbn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ go_vb         : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ go_vbp        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ got_vbd       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ like_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ love_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ made_vbd      : num  1 0 0 0 0 0 1 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use probabilities as an input for voting.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.00879753 0.9912025  0.7377722 0.26222781  0.6243724 0.3756276
## 14 0.01341807 0.9865819  0.9344075 0.06559246  0.6308911 0.3691089
## 16 0.02622755 0.9737724  0.9061110 0.09388895  0.7308060 0.2691940
## 26 0.01664195 0.9833580  0.8396728 0.16032723  0.7128343 0.2871657
## 28 0.02326517 0.9767348  0.8765291 0.12347085  0.6902587 0.3097413
## 29 0.02091930 0.9790807  0.7083602 0.29163978  0.6646333 0.3353667
##    Class 5: 0 Class5: 1
## 5   0.7659940 0.2340060
## 14  0.6258244 0.3741756
## 16  0.6409522 0.3590478
## 26  0.6143458 0.3856542
## 28  0.3628743 0.6371257
## 29  0.8595322 0.1404678
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4         5
## 5  0.00879753 0.26222781 0.3756276 0.2340060
## 14 0.01341807 0.06559246 0.3691089 0.3741756
## 16 0.02622755 0.09388895 0.2691940 0.3590478
## 26 0.01664195 0.16032723 0.2871657 0.3856542
## 28 0.02326517 0.12347085 0.3097413 0.6371257
## 29 0.02091930 0.29163978 0.3353667 0.1404678
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.008797530 0.26222781 0.3756276 0.23400604    4      4
## 14  0.013418067 0.06559246 0.3691089 0.37417563    5      5
## 16  0.026227553 0.09388895 0.2691940 0.35904779    5      5
## 26  0.016641954 0.16032723 0.2871657 0.38565417    5      4
## 28  0.023265166 0.12347085 0.3097413 0.63712575    5      4
## 29  0.020919297 0.29163978 0.3353667 0.14046781    4      4
## 39  0.102655174 0.04676430 0.2612437 0.19425235    4      5
## 40  0.007750257 0.19925076 0.4609293 0.22502865    4      3
## 60  0.012736901 0.04471888 0.3082842 0.58468822    5      5
## 61  0.126501293 0.14211277 0.2840692 0.25084508    4      3
## 72  0.014790550 0.05830250 0.2998829 0.75548394    5      4
## 81  0.021451240 0.15142979 0.2977117 0.39713606    5      3
## 86  0.028195641 0.11032743 0.3057142 0.60342651    5      5
## 90  0.039951738 0.12866046 0.3143934 0.19541855    4      4
## 92  0.093227052 0.10852811 0.3301207 0.14220076    4      4
## 113 0.040207346 0.16818333 0.3869415 0.11161318    4      5
## 116 0.015810547 0.10773660 0.3811139 0.37415235    4      4
## 117 0.020205002 0.10593639 0.3079025 0.43465363    5      5
## 122 0.069336014 0.11805604 0.3636753 0.20049244    4      4
## 123 0.017537922 0.08183826 0.3298523 0.35893049    5      2
## 124 0.052064225 0.11333626 0.2931891 0.17767067    4      4
## 131 0.006803645 0.13221851 0.2779668 0.61608286    5      4
## 135 0.314225502 0.13251202 0.4478279 0.07448621    4      3
## 137 0.014518836 0.03264246 0.3333292 0.57115666    5      5
## 140 0.034231197 0.13169689 0.3592979 0.37725321    5      4
## 142 0.038844061 0.05444274 0.3596247 0.60423802    5      5
## 149 0.014164141 0.11019988 0.3341014 0.62428100    5      4
## 154 0.022111498 0.11075142 0.2489088 0.47452451    5      5
## 156 0.054479506 0.09185114 0.3656499 0.20424309    4      3
## 158 0.146077486 0.14721107 0.4210122 0.02936685    4      3
## 169 0.019263315 0.06636823 0.2349821 0.67551736    5      5
## 185 0.008532301 0.08910406 0.2085943 0.74144753    5      5
## 187 0.006883922 0.10076526 0.3711825 0.38918973    5      5
## 192 0.050667894 0.16342260 0.4112146 0.06185446    4      3
## 194 0.023338726 0.10183684 0.3807097 0.28257905    4      4
## 195 0.028040528 0.14917001 0.2764980 0.40822652    5      4
## 196 0.050988391 0.38351701 0.3507117 0.05229399    3      5
## 197 0.202518288 0.26861880 0.2396609 0.08036052    3      3
## 199 0.014196151 0.09105081 0.2423181 0.54992739    5      5
## 210 0.142472364 0.17400831 0.3841712 0.06320002    4      3
## 216 0.013793673 0.07038832 0.2047479 0.78402047    5      5
## 220 0.006074434 0.21346551 0.2971165 0.18467846    4      4
## 227 0.194042283 0.01626865 0.3047750 0.29841790    4      5
## 234 0.029323259 0.11468842 0.3336515 0.32219531    4      3
## 240 0.020434141 0.13525670 0.3839319 0.22735244    4      5
## 245 0.074697310 0.08831651 0.3500420 0.51353880    5      4
## 249 0.018254120 0.14441811 0.2789638 0.49488181    5      5
## 261 0.020495623 0.11529686 0.3329845 0.42593628    5      3
## 277 0.012576921 0.07563544 0.2721718 0.85262780    5      5
## 283 0.021577717 0.09695364 0.3198932 0.44258819    5      5
## 290 0.014575173 0.09251260 0.2513317 0.77786587    5      4
## 293 0.012739261 0.08936153 0.3505295 0.26825617    4      5
## 302 0.011593644 0.12957242 0.2777206 0.57533398    5      4
## 305 0.029364498 0.08088828 0.2948822 0.64976458    5      4
## 308 0.020281066 0.12257792 0.2915586 0.56044957    5      4
## 311 0.011762550 0.08710777 0.2449212 0.67398181    5      5
## 320 0.021714839 0.10381970 0.2934730 0.63886639    5      2
## 322 0.032062413 0.07649058 0.2843190 0.76233880    5      5
## 330 0.011700446 0.06895954 0.2207559 0.90575777    5      4
## 332 0.048599029 0.12269718 0.4033067 0.25465198    4      4
## 333 0.034506809 0.09357968 0.3051406 0.62903122    5      5
## 339 0.016184899 0.09766187 0.3072187 0.47005435    5      5
## 341 0.027211937 0.09706938 0.4357107 0.21631603    4      4
## 344 0.053428293 0.06059660 0.2951321 0.75774884    5      5
## 349 0.012931406 0.10961684 0.2325078 0.68355226    5      5
## 355 0.026187187 0.10626634 0.2678101 0.77527138    5      5
## 356 0.030379086 0.10191276 0.3066171 0.45806581    5      3
## 365 0.016103226 0.15636421 0.2790149 0.41721285    5      3
## 366 0.018265841 0.11572108 0.2824956 0.51397906    5      4
## 369 0.009683084 0.12857913 0.3145304 0.40924980    5      4
## 371 0.013067332 0.09245925 0.2595128 0.70240614    5      5
## 373 0.013250864 0.07118052 0.3391139 0.58198846    5      5
## 389 0.038788941 0.09643736 0.2789605 0.67688479    5      2
## 390 0.024591459 0.13253653 0.2734244 0.65020517    5      4
## 396 0.034405422 0.08503450 0.4095975 0.25552997    4      4
## 412 0.004777593 0.09142432 0.3566647 0.45915192    5      5
## 413 0.026691684 0.11224673 0.3923225 0.33390117    4      3
## 415 0.016318769 0.09076430 0.3333482 0.53918532    5      4
## 422 0.063684409 0.10075961 0.3132516 0.53411543    5      5
## 425 0.011425814 0.07189119 0.2527750 0.88876367    5      5
## 434 0.018040258 0.07455554 0.3280573 0.47793629    5      5
## 438 0.010466182 0.10076015 0.3046442 0.67955094    5      4
## 441 0.157860650 0.17252437 0.2770982 0.22757365    4      5
## 442 0.025851047 0.10886325 0.2623718 0.59367502    5      5
## 445 0.016153844 0.14625934 0.3365875 0.50536613    5      5
## 447 0.044804741 0.14914639 0.3472974 0.08606242    4      3
## 453 0.022250649 0.15714425 0.3393725 0.44749373    5      4
## 454 0.143023793 0.05135636 0.2616426 0.15842786    4      5
## 462 0.006866118 0.06936382 0.2355875 0.77915857    5      5
## 474 0.013615740 0.09624327 0.2721188 0.62878380    5      3
## 476 0.028329012 0.19633199 0.2521633 0.41502028    5      3
## 493 0.022465708 0.10977881 0.2776805 0.47209292    5      5
## 502 0.014805224 0.15962931 0.3315774 0.31412622    4      4
## 503 0.042667444 0.03232222 0.3452314 0.38457594    5      5
## 506 0.012499426 0.13302378 0.3146762 0.40854532    5      5
## 508 0.016779263 0.06167772 0.3676027 0.60590278    5      5
## 512 0.024384836 0.14440441 0.2276198 0.72007239    5      5
## 513 0.022396214 0.11033355 0.2500834 0.51892649    5      5
## 521 0.231277564 0.04713657 0.3100033 0.64772023    5      2
## 524 0.010863214 0.14502855 0.3220096 0.35986068    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       2   3   4   5
##   2   0   0   1   6
##   3   0   1  14  10
##   4   1   0  22  39
##   5   0   2  12 100
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5913462
Precision
##         2 
## 0.5913462
Recall
##         2 
## 0.5274926