setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/70")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import the TP feature set with a 70th percentile cut-off.
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/70/Feature Set 1 70th TP.csv")
Features <- Features[-1]
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:761){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 761 variables:
## $ abl : num 1 0 0 0 0 0 0 0 0 0 ...
## $ absolut : num 0 0 0 0 0 0 0 0 0 0 ...
## $ access : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accommod : num 0 0 0 0 0 0 0 0 0 0 ...
## $ actual : num 0 0 0 0 0 0 0 0 1 0 ...
## $ adequ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advanc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adverti : num 0 0 0 0 1 0 0 0 0 0 ...
## $ advic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ affect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ after : num 0 0 0 0 1 0 0 0 0 0 ...
## $ air : num 0 0 0 0 0 0 0 0 0 0 ...
## $ airport : num 0 0 0 0 0 0 0 0 0 0 ...
## $ all : num 0 0 0 0 0 0 0 0 0 0 ...
## $ allow : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alreadi : num 0 0 0 1 0 0 0 0 0 0 ...
## $ also : num 0 0 0 1 0 0 1 0 1 0 ...
## $ although : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amaz : num 0 0 0 0 1 0 0 0 0 0 ...
## $ amen : num 0 1 0 0 0 0 0 0 0 0 ...
## $ amsterdam : num 0 1 0 1 0 0 0 0 0 0 ...
## $ and : num 1 0 0 0 0 0 0 0 0 0 ...
## $ annoy : num 1 0 0 0 0 0 0 0 0 0 ...
## $ anoth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ answer : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anymor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyon : num 0 1 0 0 0 0 0 0 0 0 ...
## $ anyth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ apart : num 0 0 0 0 0 0 0 1 0 0 ...
## $ appear : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appreci : num 0 0 0 0 0 0 0 0 0 0 ...
## $ architectur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ area : num 0 0 0 1 0 0 0 0 0 0 ...
## $ arena : num 0 0 0 0 0 0 0 0 0 0 ...
## $ around : num 0 0 0 1 0 0 0 0 0 0 ...
## $ arriv : num 1 0 0 0 1 0 0 0 0 0 ...
## $ ask : num 1 0 1 0 0 0 0 0 0 0 ...
## $ aspect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ atmosph : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attract : num 0 0 0 0 0 0 0 0 0 0 ...
## $ avail : num 1 0 0 0 0 0 0 0 0 0 ...
## $ averag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ away : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awesom : num 0 0 0 0 0 1 0 0 0 0 ...
## $ back : num 0 1 1 0 0 0 0 0 0 0 ...
## $ bacon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bad : num 0 0 0 1 0 0 0 0 0 0 ...
## $ bag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bang : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bank : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bar : num 0 0 1 1 0 0 0 0 0 0 ...
## $ bare : num 0 0 0 0 0 0 0 0 0 0 ...
## $ basement : num 0 0 0 0 0 0 0 0 0 0 ...
## $ basic : num 0 0 1 0 0 0 0 0 0 0 ...
## $ bath : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bathroom : num 0 0 0 0 0 0 0 0 0 0 ...
## $ beauti : num 1 0 0 0 0 0 1 0 0 0 ...
## $ bed : num 0 0 0 1 0 0 1 0 0 1 ...
## $ bedroom : num 0 0 0 0 0 0 0 0 0 0 ...
## $ beer : num 0 0 0 0 0 0 0 0 0 0 ...
## $ believ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ best : num 1 0 0 0 0 0 0 0 0 0 ...
## $ better : num 0 0 0 0 0 0 0 0 0 0 ...
## $ big : num 1 0 0 0 0 0 0 0 0 1 ...
## $ bigger : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bike : num 0 0 0 0 0 0 0 0 0 0 ...
## $ birthday : num 0 0 0 0 1 0 0 0 0 0 ...
## $ bit : num 0 1 1 0 0 0 0 0 0 0 ...
## $ black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ block : num 0 0 0 0 0 0 0 0 0 0 ...
## $ board : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bonus : num 0 0 0 0 0 0 0 0 0 0 ...
## $ book : num 1 0 0 0 1 0 0 0 0 0 ...
## $ boutiqu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ box : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bread : num 0 0 0 0 0 0 0 0 0 0 ...
## $ breakfast : num 0 0 1 0 0 0 0 1 0 1 ...
## $ bright : num 0 0 0 0 0 0 1 0 0 0 ...
## $ brilliant : num 0 0 0 0 0 0 0 0 0 0 ...
## $ broken : num 1 0 0 0 0 0 0 0 0 0 ...
## $ brought : num 0 0 0 0 0 0 0 0 0 0 ...
## $ buffet : num 0 0 0 0 0 0 0 0 0 0 ...
## $ build : num 0 0 0 1 1 0 0 0 0 0 ...
## $ busi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ but : num 0 0 0 0 0 0 0 0 0 0 ...
## $ buy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ cafe : num 0 0 0 0 0 0 0 0 0 0 ...
## $ call : num 0 0 0 0 0 0 0 0 0 0 ...
## $ came : num 1 0 0 1 0 0 0 0 0 0 ...
## $ can : num 1 1 0 0 0 0 0 0 0 0 ...
## $ car : num 0 0 0 0 0 0 0 0 0 0 ...
## $ card : num 0 0 0 0 0 0 0 0 0 0 ...
## $ care : num 0 0 0 0 0 0 0 0 0 0 ...
## $ carpet : num 0 0 0 0 0 0 0 0 0 0 ...
## $ case : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use the probability as an input for the voting procedure. The class with the highest probability is chosen.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.050198745 0.9498013 0.7912596 0.20874038 0.6506548 0.3493452
## 14 0.006137467 0.9938625 0.9262031 0.07379692 0.6999023 0.3000977
## 16 0.011076573 0.9889234 0.9279990 0.07200104 0.7930228 0.2069772
## 26 0.053168843 0.9468312 0.9270521 0.07294790 0.6098518 0.3901482
## 28 0.050742577 0.9492574 0.8603031 0.13969686 0.6802491 0.3197509
## 29 0.021487153 0.9785128 0.8334554 0.16654460 0.5869645 0.4130355
## Class 5: 0 Class5: 1
## 5 0.9192999 0.08070014
## 14 0.3888130 0.61118703
## 16 0.3739708 0.62602924
## 26 0.7076255 0.29237448
## 28 0.6362310 0.36376898
## 29 0.8813972 0.11860281
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.050198745 0.20874038 0.3493452 0.08070014
## 14 0.006137467 0.07379692 0.3000977 0.61118703
## 16 0.011076573 0.07200104 0.2069772 0.62602924
## 26 0.053168843 0.07294790 0.3901482 0.29237448
## 28 0.050742577 0.13969686 0.3197509 0.36376898
## 29 0.021487153 0.16654460 0.4130355 0.11860281
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.050198745 0.20874038 0.3493452 0.08070014 4 4
## 14 0.006137467 0.07379692 0.3000977 0.61118703 5 5
## 16 0.011076573 0.07200104 0.2069772 0.62602924 5 5
## 26 0.053168843 0.07294790 0.3901482 0.29237448 4 4
## 28 0.050742577 0.13969686 0.3197509 0.36376898 5 4
## 29 0.021487153 0.16654460 0.4130355 0.11860281 4 4
## 39 0.168626076 0.07396024 0.3090875 0.12079657 4 5
## 40 0.014448429 0.20684732 0.6817530 0.05090089 4 3
## 60 0.010357915 0.04181923 0.2458085 0.77772015 5 5
## 61 0.031473386 0.27819943 0.3051827 0.31082314 5 3
## 72 0.003056225 0.08073608 0.3420918 0.59025396 5 4
## 81 0.011847641 0.10354846 0.3858845 0.32949379 4 3
## 86 0.028066354 0.13713380 0.2054629 0.63157921 5 5
## 90 0.081136390 0.17727033 0.3424094 0.11127708 4 4
## 92 0.029478535 0.15031042 0.3313006 0.13391321 4 4
## 113 0.064267643 0.07144724 0.3308216 0.28609695 4 5
## 116 0.030524084 0.13231247 0.2136585 0.39977453 5 4
## 117 0.028865129 0.07738160 0.3122476 0.37684643 5 5
## 122 0.043542123 0.14543686 0.3603615 0.07020024 4 4
## 123 0.018052530 0.08304918 0.3654577 0.24692148 4 2
## 124 0.027133870 0.18127525 0.3423944 0.06650750 4 4
## 131 0.004400150 0.11900219 0.3447399 0.56191308 5 4
## 135 0.159253483 0.20666579 0.4459900 0.04119318 4 3
## 137 0.002710185 0.06067866 0.3619807 0.50000000 5 5
## 140 0.015168784 0.16779732 0.3469577 0.21981968 4 4
## 142 0.011715036 0.07486885 0.3204670 0.49132878 5 5
## 149 0.015667191 0.06254969 0.3445750 0.51204164 5 4
## 154 0.035445885 0.13207655 0.2333623 0.34620470 5 5
## 156 0.150723556 0.07854195 0.3387122 0.18557229 4 3
## 158 0.396546257 0.22029512 0.4386574 0.02205837 4 3
## 169 0.006034343 0.07841304 0.2398375 0.75279896 5 5
## 185 0.008180010 0.13547951 0.1892871 0.52439115 5 5
## 187 0.003936755 0.06677733 0.4573341 0.33951549 4 5
## 192 0.025224635 0.18009246 0.5954588 0.02565033 4 3
## 194 0.021966644 0.18667631 0.4102105 0.18962034 4 4
## 195 0.014201540 0.25573718 0.2889290 0.22339302 4 4
## 196 0.135281025 0.14874912 0.4565876 0.03669117 4 5
## 197 0.368076412 0.18724325 0.2650021 0.05280078 2 3
## 199 0.005561729 0.08955217 0.2441305 0.84199173 5 5
## 210 0.107085853 0.16672663 0.3573707 0.02778787 4 3
## 216 0.016865921 0.06150811 0.1461455 0.91004898 5 5
## 220 0.012206240 0.26917141 0.3083345 0.10149769 4 4
## 227 0.122204139 0.04937165 0.5093372 0.08659952 4 5
## 234 0.029320159 0.11369993 0.4791119 0.19059053 4 3
## 240 0.019531896 0.07883463 0.4448295 0.19950609 4 5
## 245 0.048634574 0.11204877 0.3059943 0.30610136 5 4
## 249 0.015765564 0.13228289 0.3334327 0.38386579 5 5
## 261 0.026592191 0.18127966 0.3322931 0.31167848 4 3
## 277 0.009729815 0.07359786 0.2488677 0.91648487 5 5
## 283 0.013830882 0.09807656 0.2688636 0.68580928 5 5
## 290 0.009909005 0.07547284 0.2079586 0.88726743 5 4
## 293 0.013710398 0.07168669 0.3862232 0.21398310 4 5
## 302 0.008970155 0.12079481 0.2870276 0.38412595 5 4
## 305 0.020423727 0.09201249 0.3881990 0.50000000 5 4
## 308 0.027518619 0.14567372 0.2402644 0.35303899 5 4
## 311 0.008242433 0.07613708 0.2295368 0.80338534 5 5
## 320 0.015285975 0.08730620 0.2411326 0.79255875 5 2
## 322 0.022275895 0.07195280 0.2051618 0.89285073 5 5
## 330 0.013340034 0.07612833 0.1936597 0.84320721 5 4
## 332 0.031752461 0.11026949 0.4031462 0.17612703 4 4
## 333 0.022186863 0.08461833 0.2802216 0.79821162 5 5
## 339 0.009032803 0.09966750 0.2598537 0.67589206 5 5
## 341 0.021650891 0.10214553 0.4376644 0.17525968 4 4
## 344 0.026017606 0.07409970 0.3230614 0.73220414 5 5
## 349 0.009666410 0.06983383 0.1631300 0.91159963 5 5
## 355 0.012874843 0.05971465 0.2051916 0.93805389 5 5
## 356 0.030971367 0.08263984 0.2887584 0.59916964 5 3
## 365 0.012083859 0.14537126 0.2851206 0.36030776 5 3
## 366 0.007219614 0.12237835 0.3307716 0.47652923 5 4
## 369 0.007605116 0.08335856 0.3387505 0.42480819 5 4
## 371 0.010750562 0.08328880 0.2175404 0.85238561 5 5
## 373 0.013249449 0.07059719 0.3057463 0.78006846 5 5
## 389 0.048953483 0.10674804 0.2902172 0.38591078 5 2
## 390 0.034028726 0.13484410 0.2709804 0.41181560 5 4
## 396 0.011274034 0.06924667 0.3793796 0.36975263 4 4
## 412 0.005799538 0.07806326 0.4071543 0.31900546 4 5
## 413 0.007429899 0.10571032 0.3098268 0.30213440 4 3
## 415 0.007899690 0.13131778 0.3592732 0.40994287 5 4
## 422 0.019433376 0.13165150 0.3987179 0.19988167 4 5
## 425 0.017004081 0.05987807 0.1846781 0.95042828 5 5
## 434 0.014328643 0.05215296 0.3056037 0.48364281 5 5
## 438 0.004424970 0.08337654 0.2869318 0.72971015 5 4
## 441 0.061061644 0.17567664 0.3137047 0.14597876 4 5
## 442 0.026916043 0.07972993 0.1954781 0.71074065 5 5
## 445 0.009438971 0.09574750 0.4150343 0.46773924 5 5
## 447 0.034879376 0.14867734 0.2973834 0.09817384 4 3
## 453 0.039027959 0.16239374 0.4542491 0.18808054 4 4
## 454 0.122231150 0.09870721 0.2138307 0.13872214 4 5
## 462 0.005518763 0.05332820 0.2237572 0.83750875 5 5
## 474 0.047402821 0.08704512 0.3188089 0.41881481 5 3
## 476 0.011660994 0.16870266 0.3072160 0.28496362 4 3
## 493 0.008782093 0.12360494 0.2640248 0.45378642 5 5
## 502 0.015272914 0.21608205 0.4743104 0.15075794 4 4
## 503 0.153199095 0.04112550 0.2382604 0.20559390 4 5
## 506 0.016125125 0.13374001 0.2585934 0.59174023 5 5
## 508 0.013172311 0.06102079 0.3673410 0.77864044 5 5
## 512 0.023386386 0.09792667 0.2188649 0.69878722 5 5
## 513 0.021024112 0.12532958 0.1939403 0.37760604 5 5
## 521 0.203227542 0.09024426 0.2533483 0.48934188 5 2
## 524 0.054576764 0.11612636 0.2925148 0.28111587 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 4 5
## 2 0 3 4
## 3 1 17 7
## 4 0 29 33
## 5 0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(0,29,93)/sum(CM)
#Precision
Rows <- rowSums(CM)
Precision2 <- CM[1,1]/Rows[1]
Precision3 <- CM[3,2]/Rows[3]
Precision4 <- CM[4,3]/Rows[4]
Precision <- (Precision2*Length3+Precision3*Length4+Precision4*Length5)/208
#Recall
Col <- colSums(CM)
Recall2 <- CM[1,1]/Col[1]
Recall3 <- CM[3,2]/Col[2]
Recall4 <- CM[4,3]/Col[3]
Recall <- (Recall2*Length3+Recall3*Length4+Recall4*Length5)/208
Accuracy
## [1] 0.5865385
Precision
## 2
## 0.5865385
Recall
## 2
## 0.4955412