setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/4.Feature Set 3/Combined")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import Feature Set 3.
#Import Features
Features1 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/4.Feature Set 3/Combined/Feature Set 2 TP.csv")
Features1 <- Features1[-1]
Import Feature Set 1.
#Import Features
Features2 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/4.Feature Set 3/Combined/Feature Set 1 10th TP.csv")
Features2 <- Features2[-1]
#Import Features
Features <- cbind(Features1, Features2)
Recode Labels for One-vs-all.
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:1879){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 1879 variables:
## $ amaz_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ arriv_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ bad_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ basic_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ beauti_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ befor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ best_jjs : num 1 0 0 0 0 0 0 0 0 0 ...
## $ big_jj : num 1 0 0 0 0 0 0 0 0 1 ...
## $ build_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ central_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ clean_jj : num 0 0 0 1 0 0 1 0 1 0 ...
## $ clear_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ close_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ cold_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ difficult_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ due_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ earl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ easi_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ english_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ enough_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ excel_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ extra_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ first_jj : num 0 1 0 0 0 0 0 0 0 0 ...
## $ free_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fresh_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ friend_jj : num 0 0 0 0 0 0 0 1 0 0 ...
## $ front_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ full_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ general_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ good_jj : num 0 0 1 0 0 1 0 1 0 1 ...
## $ great_jj : num 0 1 0 1 0 1 0 0 0 0 ...
## $ guest_jjs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ high_jj : num 1 0 0 0 0 0 0 1 0 0 ...
## $ hot_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ huge_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ littl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ locat_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ london_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ loud_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ main_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ major_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ modern_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ much_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ new_jj : num 1 1 0 0 0 0 0 0 0 0 ...
## $ next_jj : num 1 0 0 1 0 0 0 0 0 0 ...
## $ nice_jj : num 0 0 1 1 0 0 0 0 0 0 ...
## $ nois_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ noisi_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ ok_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ old_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ onli_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ open_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ overal_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ particular_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ perfect_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pillow_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pleasant_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ poor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ public_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ quiet_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ realli_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ recept_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ safe_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ second_jj : num 0 1 0 1 0 0 0 0 0 0 ...
## $ select_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servic_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ short_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ shower_jjr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sleep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ small_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ spacious_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ special_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ standard_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ stay_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ steep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ super_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sure_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ underground_jj: num 0 0 0 0 0 0 0 0 0 0 ...
## $ upgrad_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ veri_jj : num 0 0 0 0 0 0 0 0 1 0 ...
## $ warm_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ whole_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ask_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bed_vbd : num 0 0 0 1 0 0 0 0 0 1 ...
## $ build_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ came_vbd : num 1 0 0 1 0 0 0 0 0 0 ...
## $ check_vb : num 1 0 0 0 0 0 0 0 0 0 ...
## $ definit_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ done_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ expens_vbz : num 0 0 0 0 0 0 0 0 0 0 ...
## $ gave_vbd : num 0 0 0 0 0 0 0 0 0 0 ...
## $ get_vb : num 0 0 0 0 1 0 0 0 0 0 ...
## $ given_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ go_vb : num 0 0 1 0 0 0 0 0 0 0 ...
## $ go_vbp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ got_vbd : num 1 0 0 0 0 0 0 0 0 0 ...
## $ like_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ love_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ made_vbd : num 1 0 0 0 0 0 1 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use probabilities as an input for voting, choose the class with the highest probability.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.032550562 0.9674494 0.7673195 0.23268049 0.6310905 0.3689095
## 14 0.007004017 0.9929960 0.9361750 0.06382502 0.6514990 0.3485010
## 16 0.015838913 0.9841611 0.9110876 0.08891237 0.7628006 0.2371994
## 26 0.041522857 0.9584771 0.8796842 0.12031577 0.6792290 0.3207710
## 28 0.035836663 0.9641633 0.8738814 0.12611860 0.6881794 0.3118206
## 29 0.018895675 0.9811043 0.7807466 0.21925345 0.6155611 0.3844389
## Class 5: 0 Class5: 1
## 5 0.8941061 0.1058939
## 14 0.4793192 0.5206808
## 16 0.4841433 0.5158567
## 26 0.6568386 0.3431614
## 28 0.5358614 0.4641386
## 29 0.8978423 0.1021577
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.032550562 0.23268049 0.3689095 0.1058939
## 14 0.007004017 0.06382502 0.3485010 0.5206808
## 16 0.015838913 0.08891237 0.2371994 0.5158567
## 26 0.041522857 0.12031577 0.3207710 0.3431614
## 28 0.035836663 0.12611860 0.3118206 0.4641386
## 29 0.018895675 0.21925345 0.3844389 0.1021577
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.032550562 0.23268049 0.3689095 0.10589385 4 4
## 14 0.007004017 0.06382502 0.3485010 0.52068078 5 5
## 16 0.015838913 0.08891237 0.2371994 0.51585673 5 5
## 26 0.041522857 0.12031577 0.3207710 0.34316140 5 4
## 28 0.035836663 0.12611860 0.3118206 0.46413863 5 4
## 29 0.018895675 0.21925345 0.3844389 0.10215766 4 4
## 39 0.246073521 0.08608608 0.3058660 0.13459085 4 5
## 40 0.010332053 0.21082554 0.6178807 0.07994265 4 3
## 60 0.010846452 0.03851740 0.2657178 0.71141036 5 5
## 61 0.071086342 0.23012029 0.2462352 0.29063929 5 3
## 72 0.006675285 0.07550666 0.3373082 0.65823817 5 4
## 81 0.013997219 0.11326034 0.3462024 0.34105034 4 3
## 86 0.024357130 0.11935896 0.2537205 0.62506092 5 5
## 90 0.061167775 0.17215195 0.3327544 0.11786334 4 4
## 92 0.045087920 0.13384530 0.3414363 0.11168607 4 4
## 113 0.052455308 0.09306235 0.3797383 0.17936948 4 5
## 116 0.020254972 0.13633619 0.2912443 0.37570098 5 4
## 117 0.021435263 0.08433783 0.3304387 0.37434007 5 5
## 122 0.048443248 0.12195670 0.3622461 0.09079936 4 4
## 123 0.019235052 0.07308348 0.3401855 0.26781159 4 2
## 124 0.034423440 0.14909550 0.3397862 0.08335382 4 4
## 131 0.005287763 0.14192833 0.2939718 0.57200101 5 4
## 135 0.310196649 0.20521109 0.4921757 0.03699710 4 3
## 137 0.005003248 0.04296714 0.3544315 0.52491196 5 5
## 140 0.021975143 0.15728514 0.3432531 0.25054853 4 4
## 142 0.018187068 0.06725453 0.3359955 0.51905308 5 5
## 149 0.012435937 0.08382654 0.3578403 0.54238464 5 4
## 154 0.028975023 0.13454558 0.2406781 0.37409458 5 5
## 156 0.151300609 0.07594545 0.3650155 0.15751229 4 3
## 158 0.320344826 0.23153542 0.4803940 0.01684115 4 3
## 169 0.008994001 0.07665674 0.2133776 0.74639989 5 5
## 185 0.009239595 0.11469870 0.1826100 0.64002736 5 5
## 187 0.004482111 0.07250963 0.4580091 0.34138676 4 5
## 192 0.026736952 0.18013808 0.5262110 0.02682388 4 3
## 194 0.019218205 0.16590629 0.4260532 0.19942916 4 4
## 195 0.016788508 0.20978710 0.2652889 0.28196475 5 4
## 196 0.110551232 0.26263602 0.4323587 0.02811199 4 5
## 197 0.341871785 0.22087504 0.2419438 0.04635755 2 3
## 199 0.007348658 0.09377933 0.2373930 0.77433902 5 5
## 210 0.114447292 0.16174632 0.4220237 0.02720369 4 3
## 216 0.016529771 0.06460177 0.1527286 0.88519670 5 5
## 220 0.008132184 0.26121812 0.2944043 0.10784683 4 4
## 227 0.173776463 0.03428352 0.4353533 0.14698134 4 5
## 234 0.030108832 0.12581995 0.4201959 0.20096186 4 3
## 240 0.017307318 0.10639089 0.4155558 0.17918504 4 5
## 245 0.056358718 0.10991389 0.3184761 0.36866570 5 4
## 249 0.015402970 0.13646270 0.3089571 0.40749895 5 5
## 261 0.021593676 0.15663638 0.3312962 0.34139224 5 3
## 277 0.009895240 0.07258497 0.2489168 0.90441540 5 5
## 283 0.019227275 0.09150743 0.2717777 0.57921618 5 5
## 290 0.010455049 0.08276155 0.2101070 0.86590398 5 4
## 293 0.014381404 0.07654698 0.3799861 0.20395697 4 5
## 302 0.008748220 0.12921125 0.2864679 0.43780803 5 4
## 305 0.022977748 0.08430635 0.3557554 0.55896227 5 4
## 308 0.023387828 0.13354197 0.2440161 0.41434271 5 4
## 311 0.008339889 0.07577880 0.2266431 0.77913951 5 5
## 320 0.015666404 0.08918620 0.2480184 0.74221259 5 2
## 322 0.022711271 0.06460672 0.2347151 0.86517648 5 5
## 330 0.012062255 0.07604510 0.1985318 0.88623771 5 4
## 332 0.034932718 0.11372799 0.4265310 0.17696722 4 4
## 333 0.024070091 0.08756814 0.2772309 0.74251836 5 5
## 339 0.009854142 0.09496669 0.2713149 0.59207084 5 5
## 341 0.021935800 0.09108508 0.4683688 0.16485910 4 4
## 344 0.037198385 0.06200086 0.3319844 0.75361508 5 5
## 349 0.010315891 0.08307964 0.1864392 0.86631871 5 5
## 355 0.016197906 0.07357972 0.2099958 0.90328258 5 5
## 356 0.027430232 0.08819461 0.2913392 0.53335651 5 3
## 365 0.012276528 0.14160500 0.2806955 0.37108788 5 3
## 366 0.010133360 0.13292298 0.3158287 0.48061224 5 4
## 369 0.007333942 0.09261409 0.3185532 0.40441219 5 4
## 371 0.009884334 0.08769674 0.2200889 0.81661703 5 5
## 373 0.013138648 0.06776095 0.3135537 0.71432581 5 5
## 389 0.043147511 0.10339117 0.2829837 0.48962010 5 2
## 390 0.033023629 0.13395874 0.2669040 0.49354831 5 4
## 396 0.015574651 0.06751296 0.4222426 0.28810329 4 4
## 412 0.004330836 0.07894118 0.3868091 0.36875430 4 5
## 413 0.013076592 0.10680020 0.3736979 0.30230034 4 3
## 415 0.010207170 0.10350553 0.3539418 0.44211351 5 4
## 422 0.040731031 0.12958118 0.3485326 0.29690940 4 5
## 425 0.016013393 0.07345894 0.1980705 0.94095081 5 5
## 434 0.014281470 0.05999869 0.3307957 0.47356061 5 5
## 438 0.005826694 0.10130638 0.3101515 0.70855804 5 4
## 441 0.103954605 0.18089264 0.3186383 0.15943566 4 5
## 442 0.021649950 0.09631189 0.1899239 0.68063516 5 5
## 445 0.010394489 0.12316200 0.3919762 0.47373475 5 5
## 447 0.034515956 0.15466428 0.3540550 0.08178897 4 3
## 453 0.051797745 0.13336283 0.4163475 0.24036597 4 4
## 454 0.162468922 0.06573398 0.2459286 0.13861880 4 5
## 462 0.005875153 0.06203360 0.2149008 0.83141374 5 5
## 474 0.025141850 0.10116051 0.3050142 0.47926309 5 3
## 476 0.014795714 0.18261064 0.2769214 0.30951355 5 3
## 493 0.013956117 0.11586706 0.2432242 0.44631042 5 5
## 502 0.014701484 0.20397582 0.4149309 0.20195727 4 4
## 503 0.139044993 0.03551446 0.3092450 0.24705347 4 5
## 506 0.015032709 0.13912523 0.2661192 0.52205333 5 5
## 508 0.012090469 0.04885160 0.3999136 0.74530293 5 5
## 512 0.023556285 0.12098689 0.1987580 0.71559683 5 5
## 513 0.022104200 0.11021572 0.2044413 0.41942301 5 5
## 521 0.185539093 0.07440826 0.2750607 0.55297875 5 2
## 524 0.025634592 0.13923008 0.3222938 0.29283974 4 5
The confusion matrix was created and accuracy, precision and recall was computed.
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 0 15 9
## 4 0 0 27 35
## 5 0 1 20 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5769231
Precision
## 2
## 0.5769231
Recall
## 2
## 0.4853141