setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/3.Feature Set 2/Combined")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import Feature Set 2.
#Import Features
Features1 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/3.Feature Set 2/Combined/Feature Set 2 TP.csv")
Features1 <- Features1[-1]
Import Feature Set 1.
#Import Features
Features2 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/3.Feature Set 2/Combined/Feature Set 1 10th TP.csv")
Features2 <- Features2[-1]
Combine both feature sets into one.
#Import Features
Features <- cbind(Features1,Features2)
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:1818){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 1818 variables:
## $ amaz_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ arriv_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ bad_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ basic_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ beauti_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ befor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ best_jjs : num 1 0 0 0 0 0 0 0 0 0 ...
## $ big_jj : num 1 0 0 0 0 0 0 0 0 1 ...
## $ build_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ central_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ clean_jj : num 0 0 0 1 0 0 1 0 1 0 ...
## $ clear_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ close_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ cold_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ difficult_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ due_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ earl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ easi_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ english_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ enough_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ excel_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ extra_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ first_jj : num 0 1 0 0 0 0 0 0 0 0 ...
## $ free_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fresh_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ friend_jj : num 0 0 0 0 0 0 0 1 0 0 ...
## $ front_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ full_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ general_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ good_jj : num 0 0 1 0 0 1 0 1 0 1 ...
## $ great_jj : num 0 1 0 1 0 1 0 0 0 0 ...
## $ guest_jjs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ high_jj : num 1 0 0 0 0 0 0 1 0 0 ...
## $ hot_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ huge_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ littl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ locat_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ london_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ loud_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ main_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ major_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ modern_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ much_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ new_jj : num 1 1 0 0 0 0 0 0 0 0 ...
## $ next_jj : num 1 0 0 1 0 0 0 0 0 0 ...
## $ nice_jj : num 0 0 1 1 0 0 0 0 0 0 ...
## $ nois_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ noisi_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ ok_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ old_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ onli_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ open_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ overal_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ particular_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ perfect_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pillow_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pleasant_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ poor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ public_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ quiet_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ realli_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ recept_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ safe_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ second_jj : num 0 1 0 1 0 0 0 0 0 0 ...
## $ select_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servic_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ short_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ shower_jjr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sleep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ small_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ spacious_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ special_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ standard_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ stay_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ steep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ super_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sure_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ underground_jj: num 0 0 0 0 0 0 0 0 0 0 ...
## $ upgrad_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ veri_jj : num 0 0 0 0 0 0 0 0 1 0 ...
## $ warm_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ whole_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ask_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bed_vbd : num 0 0 0 1 0 0 0 0 0 1 ...
## $ build_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ came_vbd : num 1 0 0 1 0 0 0 0 0 0 ...
## $ check_vb : num 1 0 0 0 0 0 0 0 0 0 ...
## $ definit_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ done_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ expens_vbz : num 0 0 0 0 0 0 0 0 0 0 ...
## $ gave_vbd : num 0 0 0 0 0 0 0 0 0 0 ...
## $ get_vb : num 0 0 0 0 1 0 0 0 0 0 ...
## $ given_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ go_vb : num 0 0 1 0 0 0 0 0 0 0 ...
## $ go_vbp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ got_vbd : num 1 0 0 0 0 0 0 0 0 0 ...
## $ like_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ love_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ made_vbd : num 1 0 0 0 0 0 1 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
The probabilities were used as in put for the voting process. The class with the highest probability was picked.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.032606399 0.9673936 0.7655320 0.23446799 0.6325080 0.3674920
## 14 0.007091264 0.9929087 0.9360034 0.06399659 0.6503677 0.3496323
## 16 0.017050214 0.9829498 0.9135978 0.08640221 0.7657297 0.2342703
## 26 0.041796686 0.9582033 0.8787862 0.12121381 0.6842644 0.3157356
## 28 0.036681125 0.9633189 0.8705713 0.12942866 0.6870545 0.3129455
## 29 0.018772442 0.9812276 0.7804926 0.21950738 0.6183516 0.3816484
## Class 5: 0 Class5: 1
## 5 0.8941866 0.1058134
## 14 0.4802947 0.5197053
## 16 0.4851715 0.5148285
## 26 0.6543982 0.3456018
## 28 0.5365838 0.4634162
## 29 0.8969489 0.1030511
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.032606399 0.23446799 0.3674920 0.1058134
## 14 0.007091264 0.06399659 0.3496323 0.5197053
## 16 0.017050214 0.08640221 0.2342703 0.5148285
## 26 0.041796686 0.12121381 0.3157356 0.3456018
## 28 0.036681125 0.12942866 0.3129455 0.4634162
## 29 0.018772442 0.21950738 0.3816484 0.1030511
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.032606399 0.23446799 0.3674920 0.10581341 4 4
## 14 0.007091264 0.06399659 0.3496323 0.51970532 5 5
## 16 0.017050214 0.08640221 0.2342703 0.51482848 5 5
## 26 0.041796686 0.12121381 0.3157356 0.34560178 5 4
## 28 0.036681125 0.12942866 0.3129455 0.46341620 5 4
## 29 0.018772442 0.21950738 0.3816484 0.10305112 4 4
## 39 0.209418853 0.08885445 0.2970308 0.13964807 4 5
## 40 0.010244384 0.21574740 0.6224759 0.07995303 4 3
## 60 0.011150967 0.03874132 0.2646629 0.71067584 5 5
## 61 0.071306783 0.23087797 0.2442283 0.29049646 5 3
## 72 0.006851367 0.07578264 0.3376402 0.65799756 5 4
## 81 0.013805339 0.11326370 0.3473421 0.34165389 4 3
## 86 0.023424199 0.11889265 0.2536982 0.62475531 5 5
## 90 0.060900504 0.17288752 0.3330947 0.11758207 4 4
## 92 0.037649010 0.13955962 0.3369531 0.11608050 4 4
## 113 0.050518104 0.09400428 0.3807871 0.17893788 4 5
## 116 0.020793436 0.13847100 0.2918954 0.37548295 5 4
## 117 0.021556028 0.08413253 0.3323288 0.37363888 5 5
## 122 0.048140342 0.12381778 0.3632241 0.09056707 4 4
## 123 0.019112832 0.07344736 0.3367593 0.26968452 4 2
## 124 0.034318613 0.14768061 0.3420099 0.08327561 4 4
## 131 0.005150139 0.14339867 0.2964182 0.57168645 5 4
## 135 0.317934541 0.21195130 0.4892647 0.03715908 4 3
## 137 0.004774791 0.04260445 0.3582131 0.53201838 5 5
## 140 0.021922183 0.15968793 0.3453923 0.25014746 4 4
## 142 0.018291605 0.06747823 0.3367159 0.51868446 5 5
## 149 0.012709303 0.08312987 0.3549414 0.54168429 5 4
## 154 0.029161309 0.13615018 0.2408427 0.37359171 5 5
## 156 0.154185553 0.07466628 0.3646861 0.15724600 4 3
## 158 0.301087943 0.22850925 0.4854586 0.01748909 4 3
## 169 0.009127897 0.07601771 0.2107797 0.74574982 5 5
## 185 0.009209584 0.11372472 0.1823496 0.63949864 5 5
## 187 0.004892435 0.07155300 0.4545886 0.34069936 4 5
## 192 0.026463493 0.18218031 0.5275966 0.02675882 4 3
## 194 0.019766791 0.16658948 0.4273633 0.19904093 4 4
## 195 0.016788580 0.20780337 0.2652114 0.28148210 5 4
## 196 0.109497039 0.26468466 0.4338441 0.02811541 4 5
## 197 0.350999735 0.20662003 0.2476451 0.04735815 2 3
## 199 0.007479748 0.09481312 0.2349246 0.77400284 5 5
## 210 0.113163687 0.16019816 0.4251082 0.02714038 4 3
## 216 0.016621470 0.06202914 0.1522010 0.88517871 5 5
## 220 0.008772767 0.26442969 0.2886023 0.10884332 4 4
## 227 0.168460902 0.03207103 0.4452559 0.14813515 4 5
## 234 0.030111684 0.12578020 0.4211525 0.20042993 4 3
## 240 0.015747215 0.10538289 0.4229518 0.18319728 4 5
## 245 0.056365554 0.10948983 0.3199625 0.36821189 5 4
## 249 0.015271907 0.13530829 0.3084668 0.40699392 5 5
## 261 0.021484076 0.15494783 0.3310668 0.34096100 5 3
## 277 0.010041986 0.07176750 0.2486279 0.90440948 5 5
## 283 0.019249520 0.09110795 0.2703007 0.57871952 5 5
## 290 0.010587572 0.08258827 0.2094098 0.86574474 5 4
## 293 0.014678006 0.07631392 0.3788308 0.20360093 4 5
## 302 0.008774383 0.12969148 0.2868770 0.43735699 5 4
## 305 0.023044639 0.08527211 0.3569291 0.55838739 5 4
## 308 0.024125081 0.13442204 0.2443141 0.41386578 5 4
## 311 0.008560495 0.07587599 0.2256220 0.77867647 5 5
## 320 0.015695883 0.08871228 0.2478818 0.74196040 5 2
## 322 0.022218242 0.06416225 0.2351916 0.86504481 5 5
## 330 0.012155286 0.07566696 0.1976253 0.88612177 5 4
## 332 0.035834409 0.11152124 0.4295768 0.17657429 4 4
## 333 0.024169947 0.08667697 0.2776611 0.74226306 5 5
## 339 0.009990171 0.09540054 0.2708564 0.59144827 5 5
## 341 0.022255404 0.09117063 0.4701358 0.16467591 4 4
## 344 0.037259399 0.06256462 0.3319810 0.75337246 5 5
## 349 0.010742651 0.08276375 0.1845408 0.86617692 5 5
## 355 0.016007428 0.07055416 0.2016546 0.90676788 5 5
## 356 0.027046900 0.08707521 0.2890443 0.53278459 5 3
## 365 0.012429089 0.14164939 0.2796232 0.37061438 5 3
## 366 0.009883874 0.13385599 0.3172141 0.48020466 5 4
## 369 0.007369483 0.09301260 0.3207309 0.40360596 5 4
## 371 0.010020978 0.08764833 0.2171809 0.81649831 5 5
## 373 0.013084398 0.06656946 0.3143456 0.71399267 5 5
## 389 0.043397696 0.10383520 0.2825576 0.48901900 5 2
## 390 0.032925451 0.13503197 0.2669948 0.49302533 5 4
## 396 0.015626937 0.06741926 0.4246627 0.28765802 4 4
## 412 0.004523162 0.07908335 0.3881505 0.36824381 4 5
## 413 0.013814542 0.10853714 0.3723349 0.30215291 4 3
## 415 0.010149141 0.10363443 0.3543091 0.44189820 5 4
## 422 0.043730152 0.13190479 0.3510138 0.29652186 4 5
## 425 0.015674218 0.07297629 0.1976732 0.94095245 5 5
## 434 0.015359654 0.05913168 0.3303814 0.47266726 5 5
## 438 0.005915523 0.10161784 0.3089868 0.70846787 5 4
## 441 0.108542809 0.18001043 0.3359980 0.15573625 4 5
## 442 0.022348608 0.09572089 0.1895699 0.68017157 5 5
## 445 0.011116547 0.12241480 0.3943433 0.47320410 5 5
## 447 0.032681981 0.15880581 0.3608650 0.08255829 4 3
## 453 0.052578982 0.13357695 0.4161222 0.23995448 4 4
## 454 0.170417899 0.06828422 0.2504082 0.13730463 4 5
## 462 0.006115003 0.06146029 0.2142719 0.83139529 5 5
## 474 0.025622707 0.10220542 0.3070538 0.47872700 5 3
## 476 0.015173593 0.18025155 0.2781339 0.30895262 5 3
## 493 0.014359570 0.11379836 0.2442553 0.44591768 5 5
## 502 0.015131971 0.20422809 0.4099599 0.20374181 4 4
## 503 0.133142075 0.03575333 0.3148697 0.24769091 4 5
## 506 0.015324274 0.13829324 0.2657904 0.52110134 5 5
## 508 0.012934800 0.05020800 0.4031394 0.74450817 5 5
## 512 0.023891480 0.12255654 0.1984804 0.71526307 5 5
## 513 0.022664233 0.11023482 0.2032672 0.41891017 5 5
## 521 0.186357310 0.07327935 0.2726400 0.55242065 5 2
## 524 0.026304197 0.14001703 0.3234707 0.29224615 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 0 15 9
## 4 0 0 27 35
## 5 0 1 20 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5769231
Precision
## 2
## 0.5769231
Recall
## 2
## 0.4853141