PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/3.Feature Set 2/Valence")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/3.Feature Set 2/Valence/Feature Set 2 TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}
#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:356){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame':    1000 obs. of  356 variables:
##  $ amaz_jj       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ arriv_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ bad_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ basic_jj      : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ beauti_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ befor_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best_jjs      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ big_jj        : num  1 0 0 0 0 0 0 0 0 1 ...
##  $ build_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ central_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ clean_jj      : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ clear_jj      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ close_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ cold_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ difficult_jj  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ due_jj        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ earl_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ easi_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ english_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enough_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ excel_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extra_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ first_jj      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ free_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fresh_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ friend_jj     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ front_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ full_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ general_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ good_jj       : num  0 0 1 0 0 1 0 1 0 1 ...
##  $ great_jj      : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ guest_jjs     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ high_jj       : num  1 0 0 0 0 0 0 1 0 0 ...
##  $ hot_jj        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ huge_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ littl_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ locat_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ london_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ loud_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ main_jj       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ major_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ modern_jj     : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ much_jj       : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ new_jj        : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ next_jj       : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ nice_jj       : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ nois_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ noisi_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ ok_jj         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ old_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ onli_jj       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ open_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ overal_jj     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ particular_jj : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ perfect_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pillow_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pleasant_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ poor_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ public_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ quiet_jj      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ realli_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ recept_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ safe_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ second_jj     : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ select_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servic_jj     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ short_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ shower_jjr    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sleep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ small_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ spacious_jj   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ special_jj    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ standard_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ stay_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ steep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ super_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sure_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ underground_jj: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ upgrad_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ veri_jj       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ warm_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ whole_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ask_vb        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bed_vbd       : num  0 0 0 1 0 0 0 0 0 1 ...
##  $ build_vb      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ came_vbd      : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ check_vb      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ definit_vb    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ done_vbn      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expens_vbz    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ gave_vbd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ get_vb        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ given_vbn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ go_vb         : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ go_vbp        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ got_vbd       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ like_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ love_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ made_vbd      : num  1 0 0 0 0 0 1 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

The probabilities were used as an input for the votig procedure. The class yielding the highest probability was chosen.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)
##    Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.00967875 0.9903212  0.7407303 0.25926970  0.6234476 0.3765524
## 14 0.01543320 0.9845668  0.9356191 0.06438087  0.6282264 0.3717736
## 16 0.02707129 0.9729287  0.9077881 0.09221194  0.7319714 0.2680286
## 26 0.01768531 0.9823147  0.8405854 0.15941459  0.7197727 0.2802273
## 28 0.02315176 0.9768482  0.8673229 0.13267712  0.6896086 0.3103914
## 29 0.02092721 0.9790728  0.7080381 0.29196188  0.6706295 0.3293705
##    Class 5: 0 Class5: 1
## 5   0.7651958 0.2348042
## 14  0.6261038 0.3738962
## 16  0.6404897 0.3595103
## 26  0.6079863 0.3920137
## 28  0.3635236 0.6364764
## 29  0.8555406 0.1444594
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
##             2          3         4         5
## 5  0.00967875 0.25926970 0.3765524 0.2348042
## 14 0.01543320 0.06438087 0.3717736 0.3738962
## 16 0.02707129 0.09221194 0.2680286 0.3595103
## 26 0.01768531 0.15941459 0.2802273 0.3920137
## 28 0.02315176 0.13267712 0.3103914 0.6364764
## 29 0.02092721 0.29196188 0.3293705 0.1444594
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
##               2          3         4          5 Vote Actual
## 5   0.009678750 0.25926970 0.3765524 0.23480421    4      4
## 14  0.015433196 0.06438087 0.3717736 0.37389624    5      5
## 16  0.027071293 0.09221194 0.2680286 0.35951030    5      5
## 26  0.017685315 0.15941459 0.2802273 0.39201374    5      4
## 28  0.023151761 0.13267712 0.3103914 0.63647636    5      4
## 29  0.020927213 0.29196188 0.3293705 0.14445937    4      4
## 39  0.072561358 0.05277748 0.2493513 0.20967706    4      5
## 40  0.008568965 0.19796372 0.4697741 0.22516400    4      3
## 60  0.013507360 0.04562536 0.3073965 0.58315444    5      5
## 61  0.141282256 0.13396419 0.2824113 0.25131036    4      3
## 72  0.015262255 0.06112688 0.3010791 0.75501994    5      4
## 81  0.022506947 0.14731964 0.2983113 0.40055856    5      3
## 86  0.029358069 0.11239344 0.3058505 0.60151578    5      5
## 90  0.042555984 0.13371234 0.3138610 0.19588701    4      4
## 92  0.064211726 0.11554099 0.3195823 0.15522930    4      4
## 113 0.040555751 0.17563058 0.3946159 0.11215257    4      5
## 116 0.017617079 0.10662143 0.3826560 0.37320767    4      4
## 117 0.021229604 0.10601310 0.3103428 0.43398029    5      5
## 122 0.069827399 0.12114657 0.3701611 0.20066696    4      4
## 123 0.017571688 0.08582790 0.3123074 0.36823204    5      2
## 124 0.049499761 0.11257573 0.2991197 0.17832463    4      4
## 131 0.007240849 0.12945562 0.2775472 0.61420534    5      4
## 135 0.321232138 0.13779100 0.4404871 0.07817342    4      3
## 137 0.012786306 0.03169135 0.3306186 0.58609813    5      5
## 140 0.033249673 0.13128620 0.3641368 0.37596078    5      4
## 142 0.037325471 0.05568896 0.3656838 0.60305092    5      5
## 149 0.014251222 0.10852447 0.3337304 0.62259402    5      4
## 154 0.022995640 0.11602440 0.2468248 0.47476443    5      5
## 156 0.055010880 0.09228777 0.3690160 0.20503416    4      3
## 158 0.118843156 0.13256830 0.4390458 0.03289577    4      3
## 169 0.020831181 0.06586095 0.2312800 0.67521877    5      5
## 185 0.009388808 0.08833820 0.2025331 0.73936003    5      5
## 187 0.008189057 0.10069022 0.3645053 0.38883141    5      5
## 192 0.049681642 0.15769725 0.4158370 0.05964246    4      3
## 194 0.025955566 0.10401016 0.3845614 0.28288618    4      4
## 195 0.029314738 0.14613465 0.2769567 0.40763879    5      4
## 196 0.050272269 0.38939160 0.3598824 0.05331882    3      5
## 197 0.204497380 0.21933828 0.2457030 0.08516028    4      3
## 199 0.015527230 0.09394990 0.2377920 0.54928977    5      5
## 210 0.149809042 0.16782023 0.3889646 0.06392325    4      3
## 216 0.016736848 0.06861630 0.1979394 0.78280410    5      5
## 220 0.006768780 0.21411982 0.2925048 0.19040274    4      4
## 227 0.187797618 0.01417078 0.3172280 0.30395350    4      5
## 234 0.029994888 0.11979616 0.3358560 0.32242334    4      3
## 240 0.017523085 0.14019658 0.3807466 0.23887371    4      5
## 245 0.076871881 0.09145333 0.3510266 0.51227225    5      4
## 249 0.019450930 0.14623250 0.2766288 0.49413986    5      5
## 261 0.021648647 0.11501222 0.3326516 0.42510170    5      3
## 277 0.013659521 0.07609210 0.2710334 0.85282360    5      5
## 283 0.022714775 0.09665276 0.3210336 0.44145897    5      5
## 290 0.015066374 0.09435285 0.2494450 0.77746046    5      4
## 293 0.013756956 0.09272440 0.3505471 0.26763296    4      5
## 302 0.011844529 0.13152010 0.2747941 0.57454290    5      4
## 305 0.029573352 0.08351868 0.2940562 0.64916300    5      4
## 308 0.021902202 0.12766160 0.2900555 0.56040987    5      4
## 311 0.012736619 0.08943196 0.2420247 0.67301106    5      5
## 320 0.022220384 0.10622339 0.2935644 0.63894661    5      2
## 322 0.030984180 0.07642610 0.2833732 0.76118164    5      5
## 330 0.012095631 0.06966259 0.2173868 0.90543786    5      4
## 332 0.049362875 0.12090388 0.4066793 0.25382554    4      4
## 333 0.035500165 0.09386047 0.3053484 0.62925276    5      5
## 339 0.017017275 0.09930729 0.3073977 0.46935368    5      5
## 341 0.027344392 0.09798088 0.4452108 0.21655641    4      4
## 344 0.051693089 0.06270520 0.2916685 0.75680979    5      5
## 349 0.014335686 0.11065840 0.2267046 0.68300009    5      5
## 355 0.027420503 0.09572565 0.2509218 0.79067272    5      5
## 356 0.030253796 0.10364392 0.3053247 0.45799169    5      3
## 365 0.016608121 0.15769460 0.2760141 0.41645062    5      3
## 366 0.019573259 0.11521570 0.2824862 0.51299314    5      4
## 369 0.010211421 0.12674830 0.3146724 0.40746679    5      4
## 371 0.013329722 0.09341356 0.2544949 0.70167631    5      5
## 373 0.013278076 0.06902598 0.3425837 0.58067562    5      5
## 389 0.039647652 0.09806279 0.2777402 0.67683632    5      2
## 390 0.025456451 0.13599533 0.2722322 0.65030580    5      4
## 396 0.034910325 0.08298479 0.4138067 0.25449771    4      4
## 412 0.005091713 0.08855757 0.3616707 0.45789305    5      5
## 413 0.029502748 0.11326724 0.3996389 0.33426946    4      3
## 415 0.016922278 0.09128535 0.3345398 0.53836954    5      4
## 422 0.066918713 0.10366944 0.3087043 0.53194989    5      5
## 425 0.011668181 0.07332659 0.2515710 0.88832664    5      5
## 434 0.020110171 0.07506389 0.3194183 0.47677588    5      5
## 438 0.010601075 0.09921586 0.3045724 0.67794655    5      4
## 441 0.159933364 0.17585385 0.2869703 0.21770616    4      5
## 442 0.027590075 0.10905322 0.2625891 0.59375351    5      5
## 445 0.018157458 0.14441412 0.3370149 0.50000000    5      5
## 447 0.039589716 0.15421838 0.3514492 0.08874892    4      3
## 453 0.023016094 0.16825672 0.3398655 0.44745778    5      4
## 454 0.139205114 0.05386497 0.2680766 0.15997158    4      5
## 462 0.007461855 0.06444471 0.2337827 0.77826694    5      5
## 474 0.014580992 0.09930797 0.2684016 0.62733150    5      3
## 476 0.032000886 0.19506945 0.2472423 0.41452522    5      3
## 493 0.023182918 0.10157845 0.2742198 0.46988513    5      5
## 502 0.016048758 0.15893833 0.3291024 0.32005546    4      4
## 503 0.038273029 0.03210373 0.3579953 0.38721852    5      5
## 506 0.013279694 0.13452197 0.3129695 0.40736855    5      5
## 508 0.018061672 0.06159911 0.3744498 0.60408646    5      5
## 512 0.027841799 0.15052513 0.2238038 0.71825344    5      5
## 513 0.025128264 0.10813438 0.2485841 0.51829954    5      5
## 521 0.222797099 0.04700589 0.3061033 0.64636928    5      2
## 524 0.012345895 0.14527110 0.3216109 0.35911010    5      5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##    
##       2   3   4   5
##   2   0   0   1   6
##   3   0   0  15  10
##   4   1   0  22  39
##   5   0   2  12 100
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy
## [1] 0.5865385
Precision
##         2 
## 0.5865385
Recall
##         2 
## 0.4847519