SVM v2

PREPARATION

setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/5.Feature Set 4/Activation")

#install.packages("naivebayes")
library(naivebayes)

## Warning: package 'naivebayes' was built under R version 3.4.3

library(dplyr)

## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(psych)
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(e1071)
library(readxl)

Import actual labels.

#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")

Label <- Labels$Score

Import Feature Set F4.

#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/5.Feature Set 4/Activation/Feature Set 4 TP.csv")

Features <- Features[-1]

RECODE LABELS FOR ONE-VS-ALL

Recode Labels for One vs All Classification.

#Class 2
Label2 <- list()
for(i in 1:1000){
  if(Label[i]==3| Label[i]==4){
    Label2[i] <- 1
  }else{
    Label2[i] <- 0
  }
}

#As Factor
Label2 <- as.factor(unlist(Label2))

#Class 3
Label3 <- list()
for(i in 1:1000){
  if(Label[i]==5| Label[i]==6){
    Label3[i] <- 1
  }else{
    Label3[i] <- 0
  }
}

#As Factor
Label3 <- as.factor(unlist(Label3))

#Class 4
Label4 <- list()
for(i in 1:1000){
  if(Label[i]==7| Label[i]==8){
    Label4[i] <- 1
  }else{
    Label4[i] <- 0
  }
}

#As Factor
Label4 <- as.factor(unlist(Label4))

#Class 5
Label5 <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    Label5[i] <- 1
  }else{
    Label5[i] <- 0
  }
}

#As Factor
Label5 <- as.factor(unlist(Label5))

#All Labels
All <- list()
for(i in 1:1000){
  if(Label[i]==9| Label[i]==10){
    All[i] <- 5
  }else if(Label[i]==7| Label[i]==8){
    All[i] <- 4
  }else if(Label[i]==5| Label[i]==6){
    All[i] <- 3
  }else{
    All[i] <- 2
  }
  
  
}

#As Factor
All <- as.factor(unlist(All))

TRANSFORM FEATURES TO NUMERIC VARIABLES

#Transform Integer to Factor
for(i in 1:438){
  Features[,i] <- as.numeric(Features[,i])
}
str(Features)

## 'data.frame':    1000 obs. of  438 variables:
##  $ amaz_jj       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ arriv_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ bad_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ basic_jj      : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ beauti_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ befor_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ best_jjs      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ big_jj        : num  1 0 0 0 0 0 0 0 0 1 ...
##  $ build_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ central_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ clean_jj      : num  0 0 0 1 0 0 1 0 1 0 ...
##  $ clear_jj      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ close_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ cold_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ difficult_jj  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ due_jj        : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ earl_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ easi_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ english_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ enough_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ excel_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ extra_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ first_jj      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ free_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fresh_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ friend_jj     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ front_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ full_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ general_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ good_jj       : num  0 0 1 0 0 1 0 1 0 1 ...
##  $ great_jj      : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ guest_jjs     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ high_jj       : num  1 0 0 0 0 0 0 1 0 0 ...
##  $ hot_jj        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ huge_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ littl_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ locat_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ london_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ loud_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ main_jj       : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ major_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ modern_jj     : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ much_jj       : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ new_jj        : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ next_jj       : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ nice_jj       : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ nois_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ noisi_jj      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ ok_jj         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ old_jj        : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ onli_jj       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ open_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ overal_jj     : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ particular_jj : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ perfect_jj    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pillow_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pleasant_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ poor_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ public_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ quiet_jj      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ realli_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ recept_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ safe_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ second_jj     : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ select_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servic_jj     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ short_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ shower_jjr    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sleep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ small_jj      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ spacious_jj   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ special_jj    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ standard_jj   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ stay_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ steep_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ super_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ sure_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ underground_jj: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ upgrad_jj     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ veri_jj       : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ warm_jj       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ whole_jj      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ask_vb        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bed_vbd       : num  0 0 0 1 0 0 0 0 0 1 ...
##  $ build_vb      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ came_vbd      : num  1 0 0 1 0 0 0 0 0 0 ...
##  $ check_vb      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ definit_vb    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ done_vbn      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expens_vbz    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ gave_vbd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ get_vb        : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ given_vbn     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ go_vb         : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ go_vbp        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ got_vbd       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ like_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ love_vb       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ made_vbd      : num  1 0 0 0 0 0 1 0 0 0 ...
##   [list output truncated]

PARTITIONING TRAINING & VALIDATION

#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]

Labels

train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]

train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]

train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]

train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]

train.labels <- All[ind == 1]
test.labels <- All[ind ==2]

SVM MODEL

#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)

train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)

train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)

train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)

P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)

Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")

VOTING

Use predictions as an input for the voting process. The class with the highest probability is picked.

Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")

head(Voting.df)

##     Class 2: 1 Class2: 0 Class 3: 0  Class3: 1 Class 4: 0 Class4: 1
## 5  0.009034948 0.9909651  0.7393968 0.26060320  0.6208179 0.3791821
## 14 0.013688778 0.9863112  0.9357992 0.06420078  0.6305946 0.3694054
## 16 0.026910040 0.9730900  0.9067345 0.09326550  0.7299959 0.2700041
## 26 0.017346926 0.9826531  0.8427476 0.15725241  0.7115276 0.2884724
## 28 0.023402370 0.9765976  0.8764472 0.12355284  0.6896215 0.3103785
## 29 0.021795000 0.9782050  0.7108836 0.28911637  0.6654715 0.3345285
##    Class 5: 0 Class5: 1
## 5   0.7648084 0.2351916
## 14  0.6254732 0.3745268
## 16  0.6396284 0.3603716
## 26  0.6124282 0.3875718
## 28  0.3635601 0.6364399
## 29  0.8595517 0.1404483

SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)

##              2          3         4         5
## 5  0.009034948 0.26060320 0.3791821 0.2351916
## 14 0.013688778 0.06420078 0.3694054 0.3745268
## 16 0.026910040 0.09326550 0.2700041 0.3603716
## 26 0.017346926 0.15725241 0.2884724 0.3875718
## 28 0.023402370 0.12355284 0.3103785 0.6364399
## 29 0.021795000 0.28911637 0.3345285 0.1404483

Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels

head(Evaluation,100)

##               2          3         4          5 Vote Actual
## 5   0.009034948 0.26060320 0.3791821 0.23519163    4      4
## 14  0.013688778 0.06420078 0.3694054 0.37452678    5      5
## 16  0.026910040 0.09326550 0.2700041 0.36037164    5      5
## 26  0.017346926 0.15725241 0.2884724 0.38757184    5      4
## 28  0.023402370 0.12355284 0.3103785 0.63643988    5      4
## 29  0.021795000 0.28911637 0.3345285 0.14044831    4      4
## 39  0.095900156 0.05066081 0.2836497 0.18333539    4      5
## 40  0.007619576 0.19615957 0.4587297 0.22691287    4      3
## 60  0.012551605 0.04408588 0.3087160 0.58597302    5      5
## 61  0.124308737 0.14043500 0.2880888 0.25170102    4      3
## 72  0.014805506 0.05907056 0.3001805 0.75609557    5      4
## 81  0.022179498 0.15126601 0.2980090 0.39746959    5      3
## 86  0.028564702 0.10786686 0.3056660 0.60396962    5      5
## 90  0.040741293 0.12948722 0.3139904 0.19513915    4      4
## 92  0.096462678 0.10661236 0.3301838 0.14208673    4      4
## 113 0.040077672 0.16869318 0.3852452 0.11144870    4      5
## 116 0.015756731 0.10735407 0.3816636 0.37520636    4      4
## 117 0.020393326 0.10659496 0.3069531 0.43443313    5      5
## 122 0.069939161 0.12088643 0.3631201 0.20082202    4      4
## 123 0.017777158 0.08099152 0.3215967 0.36435548    5      2
## 124 0.053721863 0.10835263 0.2999630 0.17443552    4      4
## 131 0.006833058 0.13128711 0.2789506 0.61737515    5      4
## 135 0.303534021 0.13415832 0.4498397 0.07471032    4      3
## 137 0.013860703 0.03271669 0.3351240 0.57292424    5      5
## 140 0.034151308 0.13031552 0.3583638 0.37793348    5      4
## 142 0.038630844 0.05476850 0.3576751 0.60504307    5      5
## 149 0.013724026 0.11009605 0.3339537 0.62489023    5      4
## 154 0.022668353 0.11133269 0.2498606 0.47388040    5      5
## 156 0.054432395 0.09099991 0.3627451 0.20454850    4      3
## 158 0.147692385 0.14496648 0.4187333 0.02931646    4      3
## 169 0.019405402 0.06560234 0.2375909 0.67522451    5      5
## 185 0.008978097 0.09034450 0.2103810 0.74227081    5      5
## 187 0.006946956 0.10135200 0.3712691 0.39079235    5      5
## 192 0.049760946 0.16597776 0.4112546 0.06195634    4      3
## 194 0.023799500 0.10128081 0.3782101 0.28215002    4      4
## 195 0.028382046 0.14939023 0.2768290 0.40890759    5      4
## 196 0.050826393 0.37179029 0.3523417 0.05207476    3      5
## 197 0.202810131 0.27197697 0.2419202 0.08039710    3      3
## 199 0.014322764 0.09060664 0.2430436 0.55094240    5      5
## 210 0.143062273 0.17025954 0.3830150 0.06326269    4      3
## 216 0.013824471 0.06944755 0.2067769 0.78433246    5      5
## 220 0.006337688 0.21302708 0.2978570 0.18558461    4      4
## 227 0.188166856 0.01619527 0.3058952 0.30042515    4      5
## 234 0.030402995 0.11525962 0.3346619 0.32180359    4      3
## 240 0.020328669 0.13952577 0.3812417 0.22828357    4      5
## 245 0.076406715 0.08786471 0.3493664 0.51388052    5      4
## 249 0.018406345 0.14428530 0.2804393 0.49489372    5      5
## 261 0.020967473 0.11610909 0.3335663 0.42585565    5      3
## 277 0.012897199 0.07524297 0.2730905 0.85198750    5      5
## 283 0.021957335 0.09868635 0.3200622 0.44242457    5      5
## 290 0.014811584 0.09270094 0.2512839 0.77745704    5      4
## 293 0.012971240 0.08822543 0.3510625 0.26860762    4      5
## 302 0.011654751 0.13012501 0.2779316 0.57560853    5      4
## 305 0.029454893 0.08243167 0.2941265 0.65015360    5      4
## 308 0.020635469 0.12311266 0.2903725 0.56000761    5      4
## 311 0.012113449 0.08825963 0.2459614 0.67416785    5      5
## 320 0.021796549 0.10417086 0.2932457 0.63806592    5      2
## 322 0.032061724 0.07701008 0.2824325 0.76214362    5      5
## 330 0.011881995 0.06888958 0.2214576 0.90559467    5      4
## 332 0.051026358 0.12341873 0.4024923 0.25492811    4      4
## 333 0.034809228 0.09453816 0.3049029 0.62781691    5      5
## 339 0.016512380 0.09787881 0.3065699 0.46991257    5      5
## 341 0.027786484 0.09707638 0.4335345 0.21601103    4      4
## 344 0.053823740 0.06175166 0.2969660 0.75750137    5      5
## 349 0.013319236 0.11075017 0.2338657 0.68370932    5      5
## 355 0.026593700 0.10620570 0.2683496 0.77483357    5      5
## 356 0.029937129 0.10231110 0.3066041 0.45716452    5      3
## 365 0.016475961 0.15794936 0.2785482 0.41745164    5      3
## 366 0.018731404 0.11427324 0.2830124 0.51456603    5      4
## 369 0.009862828 0.12995830 0.3151450 0.40996813    5      4
## 371 0.013272136 0.09195058 0.2621244 0.70271680    5      5
## 373 0.013331892 0.06997933 0.3371288 0.58207278    5      5
## 389 0.039241412 0.09691994 0.2785913 0.67615929    5      2
## 390 0.024796020 0.13281793 0.2734376 0.64938123    5      4
## 396 0.033631776 0.08358336 0.4147701 0.25373018    4      4
## 412 0.004721256 0.09180254 0.3549954 0.45904879    5      5
## 413 0.027459728 0.11348845 0.3877988 0.33527715    4      3
## 415 0.015971060 0.09193708 0.3308562 0.53999016    5      4
## 422 0.064626850 0.10267181 0.3136802 0.53589555    5      5
## 425 0.011490849 0.07101080 0.2523710 0.88880400    5      5
## 434 0.017997561 0.07532356 0.3234489 0.47867565    5      5
## 438 0.010341863 0.10177915 0.3062645 0.68056211    5      4
## 441 0.159163439 0.17373127 0.2848290 0.22442256    4      5
## 442 0.026607963 0.10772554 0.2602844 0.59273383    5      5
## 445 0.016074501 0.14465955 0.3365650 0.50643720    5      5
## 447 0.044911202 0.14734701 0.3475813 0.08610746    4      3
## 453 0.022208901 0.15761364 0.3379342 0.44686586    5      4
## 454 0.140198291 0.05100286 0.2616495 0.15992473    4      5
## 462 0.006931215 0.07095199 0.2357286 0.78026041    5      5
## 474 0.013877427 0.09723541 0.2731695 0.62874795    5      3
## 476 0.028270712 0.19579218 0.2531605 0.41515596    5      3
## 493 0.022687419 0.11042209 0.2784585 0.47401376    5      5
## 502 0.014778673 0.16217026 0.3333004 0.31519346    4      4
## 503 0.041175153 0.03279780 0.3465036 0.38573314    5      5
## 506 0.012690585 0.13234938 0.3151404 0.40878417    5      5
## 508 0.016390382 0.05949842 0.3676681 0.60580163    5      5
## 512 0.024580975 0.14537517 0.2297324 0.72099470    5      5
## 513 0.022461055 0.10961113 0.2494815 0.52025083    5      5
## 521 0.231454156 0.04814004 0.3099698 0.64765970    5      2
## 524 0.011002847 0.14572301 0.3232345 0.35959900    5      5

Finally a confusion matrix is created and the accuracy, precision and recall is calculated.

CM <- table(Evaluation$Actual,Evaluation$Vote)
CM

##    
##       2   3   4   5
##   2   0   0   1   6
##   3   0   1  14  10
##   4   1   0  22  39
##   5   0   2  12 100

#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))


#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)

#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208

#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208


Accuracy

## [1] 0.5913462

Precision

##         2 
## 0.5913462

Recall

##         2 
## 0.5274926