setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/5.Feature Set 4/Activation")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import the Activation Feature Set.
#Import Features
Features1 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/5.Feature Set 4/Activation/Feature Set 4 TP.csv")
Features1 <- Features1[-1]
Import Feature Set F1.
#Import Features
Features2 <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/5.Feature Set 4/Combined/Feature Set 1 10th TP.csv")
Features2 <- Features2[-1]
Combine the two feature sets into one.
#Import Features
Features <- cbind(Features1, Features2)
Recode Labels for One-vs-all classification.
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
Transform Features to numeric.
#Transform Integer to Factor
for(i in 1:1900){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 1900 variables:
## $ amaz_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ arriv_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ bad_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ basic_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ beauti_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ befor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ best_jjs : num 1 0 0 0 0 0 0 0 0 0 ...
## $ big_jj : num 1 0 0 0 0 0 0 0 0 1 ...
## $ build_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ central_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ clean_jj : num 0 0 0 1 0 0 1 0 1 0 ...
## $ clear_jj : num 0 0 0 0 1 0 0 0 0 0 ...
## $ close_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ cold_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ difficult_jj : num 0 0 1 0 0 0 0 0 0 0 ...
## $ due_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ earl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ easi_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ english_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ enough_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ excel_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ extra_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ first_jj : num 0 1 0 0 0 0 0 0 0 0 ...
## $ free_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ fresh_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ friend_jj : num 0 0 0 0 0 0 0 1 0 0 ...
## $ front_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ full_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ general_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ good_jj : num 0 0 1 0 0 1 0 1 0 1 ...
## $ great_jj : num 0 1 0 1 0 1 0 0 0 0 ...
## $ guest_jjs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ high_jj : num 1 0 0 0 0 0 0 1 0 0 ...
## $ hot_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ huge_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ littl_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ locat_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ london_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ loud_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ main_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ major_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ modern_jj : num 0 0 0 0 0 1 0 0 0 0 ...
## $ much_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ new_jj : num 1 1 0 0 0 0 0 0 0 0 ...
## $ next_jj : num 1 0 0 1 0 0 0 0 0 0 ...
## $ nice_jj : num 0 0 1 1 0 0 0 0 0 0 ...
## $ nois_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ noisi_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ ok_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ old_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ onli_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ open_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ overal_jj : num 0 0 0 1 0 0 0 0 0 0 ...
## $ particular_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ perfect_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pillow_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pleasant_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ poor_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ public_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ quiet_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ realli_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ recept_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ safe_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ second_jj : num 0 1 0 1 0 0 0 0 0 0 ...
## $ select_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ servic_jj : num 0 0 0 0 0 0 0 0 0 1 ...
## $ short_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ shower_jjr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sleep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ small_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ spacious_jj : num 0 0 0 0 0 0 1 0 0 0 ...
## $ special_jj : num 1 0 0 0 0 0 0 0 0 0 ...
## $ standard_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ stay_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ steep_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ super_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ sure_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ underground_jj: num 0 0 0 0 0 0 0 0 0 0 ...
## $ upgrad_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ veri_jj : num 0 0 0 0 0 0 0 0 1 0 ...
## $ warm_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ whole_jj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ask_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bed_vbd : num 0 0 0 1 0 0 0 0 0 1 ...
## $ build_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ came_vbd : num 1 0 0 1 0 0 0 0 0 0 ...
## $ check_vb : num 1 0 0 0 0 0 0 0 0 0 ...
## $ definit_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ done_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ expens_vbz : num 0 0 0 0 0 0 0 0 0 0 ...
## $ gave_vbd : num 0 0 0 0 0 0 0 0 0 0 ...
## $ get_vb : num 0 0 0 0 1 0 0 0 0 0 ...
## $ given_vbn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ go_vb : num 0 0 1 0 0 0 0 0 0 0 ...
## $ go_vbp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ got_vbd : num 1 0 0 0 0 0 0 0 0 0 ...
## $ like_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ love_vb : num 0 0 0 0 0 0 0 0 0 0 ...
## $ made_vbd : num 1 0 0 0 0 0 1 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use SVM probability outputs for the voting process. The one with the highest probability was picked.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.033303970 0.9666960 0.7678272 0.23217284 0.6291194 0.3708806
## 14 0.007056059 0.9929439 0.9359528 0.06404716 0.6506787 0.3493213
## 16 0.016223951 0.9837760 0.9117140 0.08828601 0.7639694 0.2360306
## 26 0.041736180 0.9582638 0.8808376 0.11916242 0.6792852 0.3207148
## 28 0.036936294 0.9630637 0.8738221 0.12617789 0.6868954 0.3131046
## 29 0.018599871 0.9814001 0.7810517 0.21894827 0.6160649 0.3839351
## Class 5: 0 Class5: 1
## 5 0.8939539 0.1060461
## 14 0.4791655 0.5208345
## 16 0.4847509 0.5152491
## 26 0.6561744 0.3438256
## 28 0.5355801 0.4644199
## 29 0.8978492 0.1021508
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.033303970 0.23217284 0.3708806 0.1060461
## 14 0.007056059 0.06404716 0.3493213 0.5208345
## 16 0.016223951 0.08828601 0.2360306 0.5152491
## 26 0.041736180 0.11916242 0.3207148 0.3438256
## 28 0.036936294 0.12617789 0.3131046 0.4644199
## 29 0.018599871 0.21894827 0.3839351 0.1021508
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.033303970 0.23217284 0.3708806 0.10604608 4 4
## 14 0.007056059 0.06404716 0.3493213 0.52083452 5 5
## 16 0.016223951 0.08828601 0.2360306 0.51524911 5 5
## 26 0.041736180 0.11916242 0.3207148 0.34382560 5 4
## 28 0.036936294 0.12617789 0.3131046 0.46441995 5 4
## 29 0.018599871 0.21894827 0.3839351 0.10215076 4 4
## 39 0.246772675 0.08929656 0.3175293 0.12999570 4 5
## 40 0.010284179 0.20931367 0.6169875 0.07990964 4 3
## 60 0.011293804 0.03966390 0.2641735 0.71153345 5 5
## 61 0.070508384 0.23053184 0.2469306 0.29094113 5 3
## 72 0.006869195 0.07564055 0.3382351 0.65814909 5 4
## 81 0.014440642 0.11395942 0.3466415 0.34128956 4 3
## 86 0.024278529 0.11677592 0.2545300 0.62505310 5 5
## 90 0.061205562 0.17086142 0.3336061 0.11789225 4 4
## 92 0.044456141 0.13408652 0.3418860 0.11168747 4 4
## 113 0.051235090 0.09378322 0.3792471 0.17971553 4 5
## 116 0.020018436 0.13642157 0.2920870 0.37558391 5 4
## 117 0.021560304 0.08428785 0.3304238 0.37433500 5 5
## 122 0.047787615 0.12152986 0.3631324 0.09079826 4 4
## 123 0.019410440 0.07269631 0.3359405 0.27026687 4 2
## 124 0.036128032 0.14892432 0.3430827 0.08243053 4 4
## 131 0.005344224 0.14262620 0.2946443 0.57200179 5 4
## 135 0.303304256 0.20915744 0.5000000 0.03700228 4 3
## 137 0.005119366 0.04300394 0.3556076 0.52435489 5 5
## 140 0.021500725 0.15802947 0.3424626 0.25003341 4 4
## 142 0.018277110 0.06672855 0.3370727 0.51873808 5 5
## 149 0.012509904 0.08352698 0.3580081 0.54182587 5 4
## 154 0.028418806 0.13407878 0.2411333 0.37416727 5 5
## 156 0.151594779 0.07626301 0.3644473 0.15716889 4 3
## 158 0.318966462 0.23310529 0.4791090 0.01684031 4 3
## 169 0.009143826 0.07594445 0.2138236 0.74626001 5 5
## 185 0.009462651 0.11441788 0.1835055 0.63989840 5 5
## 187 0.004525709 0.07257806 0.4561520 0.34122647 4 5
## 192 0.027089342 0.18285736 0.5294428 0.02687675 4 3
## 194 0.019332552 0.16642732 0.4252142 0.19936198 4 4
## 195 0.016870698 0.20760314 0.2653402 0.28195155 5 4
## 196 0.109546872 0.25963743 0.4318205 0.02810534 4 5
## 197 0.342037021 0.22310879 0.2431054 0.04632425 2 3
## 199 0.007668334 0.09434279 0.2353946 0.77421403 5 5
## 210 0.111605332 0.16100451 0.4238772 0.02719948 4 3
## 216 0.016807684 0.06521225 0.1519390 0.88524088 5 5
## 220 0.008330897 0.26367439 0.2932567 0.10778163 4 4
## 227 0.171267895 0.03456918 0.4358244 0.14745261 4 5
## 234 0.029777083 0.12558916 0.4216215 0.20109317 4 3
## 240 0.017580516 0.10584067 0.4163363 0.17888758 4 5
## 245 0.056453116 0.10971994 0.3182563 0.36891169 5 4
## 249 0.015588607 0.13497020 0.3097638 0.40710592 5 5
## 261 0.021711138 0.15409078 0.3312823 0.34091160 5 3
## 277 0.010193221 0.07281647 0.2489131 0.90434043 5 5
## 283 0.019053887 0.09118559 0.2725415 0.57877342 5 5
## 290 0.010500425 0.08233699 0.2103187 0.86580735 5 4
## 293 0.014041408 0.07524279 0.3798596 0.20388760 4 5
## 302 0.008813749 0.12858435 0.2875865 0.43712042 5 4
## 305 0.022969458 0.08460165 0.3560554 0.55904007 5 4
## 308 0.023303759 0.13354464 0.2438321 0.41432766 5 4
## 311 0.008442882 0.07593217 0.2265492 0.77905424 5 5
## 320 0.015594499 0.08865837 0.2484204 0.74203716 5 2
## 322 0.022651478 0.06524167 0.2340957 0.86493843 5 5
## 330 0.012163573 0.07537929 0.1984262 0.88608563 5 4
## 332 0.035214879 0.11410895 0.4286860 0.17701321 4 4
## 333 0.024231482 0.08701141 0.2780419 0.74247966 5 5
## 339 0.009968690 0.09447982 0.2719715 0.59184683 5 5
## 341 0.021750346 0.09115225 0.4694679 0.16483096 4 4
## 344 0.037733571 0.06152891 0.3312103 0.75297361 5 5
## 349 0.010487603 0.08314255 0.1866315 0.86622489 5 5
## 355 0.016253759 0.07299904 0.2096282 0.90318598 5 5
## 356 0.027228639 0.08750635 0.2920757 0.53318682 5 3
## 365 0.012371063 0.14226021 0.2811142 0.37101924 5 3
## 366 0.010252779 0.13244882 0.3151274 0.48038503 5 4
## 369 0.007229544 0.09211705 0.3186334 0.40434077 5 4
## 371 0.009954189 0.08711292 0.2198240 0.81632350 5 5
## 373 0.013013282 0.06740343 0.3120345 0.71383635 5 5
## 389 0.043210579 0.10298358 0.2831975 0.48958709 5 2
## 390 0.032920504 0.13391631 0.2671304 0.49352948 5 4
## 396 0.015556596 0.06755564 0.4263393 0.28721463 4 4
## 412 0.004237077 0.07989220 0.3879760 0.36880881 4 5
## 413 0.013447788 0.10720924 0.3736098 0.30245222 4 3
## 415 0.010087598 0.10473338 0.3536096 0.44193468 5 4
## 422 0.041065789 0.12977479 0.3496221 0.29693751 4 5
## 425 0.015759232 0.07238139 0.1981913 0.94085169 5 5
## 434 0.014414044 0.05949506 0.3290170 0.47330075 5 5
## 438 0.005974022 0.10382974 0.3086364 0.70888421 5 4
## 441 0.109984478 0.18328250 0.3232775 0.15768629 4 5
## 442 0.021904410 0.09576812 0.1904356 0.68052597 5 5
## 445 0.010645691 0.12209476 0.3918464 0.47363459 5 5
## 447 0.034292035 0.15471133 0.3557221 0.08171857 4 3
## 453 0.051609290 0.13413872 0.4176147 0.24029314 4 4
## 454 0.161494094 0.06524101 0.2443413 0.13834204 4 5
## 462 0.006108894 0.06231910 0.2155989 0.83161500 5 5
## 474 0.025483523 0.10227964 0.3064073 0.47941197 5 3
## 476 0.014839942 0.18131808 0.2778684 0.30903863 5 3
## 493 0.014281458 0.11678667 0.2435989 0.44604017 5 5
## 502 0.014547703 0.20557414 0.4139538 0.20225793 4 4
## 503 0.142650846 0.03601647 0.3105615 0.24761164 4 5
## 506 0.014957099 0.13811707 0.2670785 0.52192471 5 5
## 508 0.012197548 0.04959110 0.4006871 0.74546054 5 5
## 512 0.023532462 0.12133994 0.1979834 0.71579769 5 5
## 513 0.022313378 0.11059552 0.2033728 0.41931859 5 5
## 521 0.184671681 0.07430778 0.2750057 0.55265447 5 2
## 524 0.025315749 0.13809010 0.3233416 0.29302854 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 0 15 9
## 4 0 0 27 35
## 5 0 1 20 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5769231
Precision
## 2
## 0.5769231
Recall
## 2
## 0.4853141