setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/10")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import the TP feature set with a 10-30th percentile cut-off.
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/10/Feature Set 1 10th TP.csv")
Features <- Features[-1]
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:1462){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 1462 variables:
## $ abit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ abl : num 1 0 0 0 0 0 0 0 0 0 ...
## $ absolut : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accept : num 0 0 0 0 0 0 0 0 0 0 ...
## $ access : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accommod : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ach : num 0 0 0 0 0 0 0 0 0 0 ...
## $ acknowledg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ across : num 0 0 0 0 0 0 0 0 0 0 ...
## $ actual : num 0 0 0 0 0 0 0 0 1 0 ...
## $ add : num 0 0 0 0 0 0 0 0 0 0 ...
## $ addit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adequ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adjac : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adult : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advanc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adverti : num 0 0 0 0 1 0 0 0 0 0 ...
## $ advi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ affect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afford : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afraid : num 0 0 0 1 0 0 0 0 0 0 ...
## $ after : num 0 0 0 0 1 0 0 0 0 0 ...
## $ afternoon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ago : num 0 0 0 0 0 0 0 0 0 0 ...
## $ air : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aircon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aircondit : num 0 0 0 0 0 0 0 0 0 1 ...
## $ airi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ airport : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alarm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ albert : num 0 0 0 0 0 0 0 0 0 0 ...
## $ all : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alloc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ allow : num 0 0 0 0 0 0 0 0 0 0 ...
## $ almost : num 0 0 0 0 0 0 0 0 0 0 ...
## $ along : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alreadi : num 0 0 0 1 0 0 0 0 0 0 ...
## $ also : num 0 0 0 1 0 0 1 0 1 0 ...
## $ altern : num 0 0 0 0 0 0 0 0 0 0 ...
## $ although : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amaz : num 0 0 0 0 1 0 0 0 0 0 ...
## $ ambienc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amen : num 0 1 0 0 0 0 0 0 0 0 ...
## $ american : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amount : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amsterdam : num 0 1 0 1 0 0 0 0 0 0 ...
## $ and : num 1 0 0 0 0 0 0 0 0 0 ...
## $ angri : num 1 0 0 1 0 0 0 0 0 0 ...
## $ anna : num 0 0 0 0 0 0 0 0 0 0 ...
## $ annoy : num 1 0 0 0 0 0 0 0 0 0 ...
## $ anoth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ answer : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anymor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyon : num 0 1 0 0 0 0 0 0 0 0 ...
## $ anyth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anywh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ apart : num 0 0 0 0 0 0 0 1 0 0 ...
## $ apolog : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appal : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appeal : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appear : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appoint : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appreci : num 0 0 0 0 0 0 0 0 0 0 ...
## $ approach : num 0 0 0 0 0 0 0 0 0 0 ...
## $ apt : num 0 0 0 0 0 0 0 0 0 0 ...
## $ architectur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ area : num 0 0 0 1 0 0 0 0 0 0 ...
## $ aren : num 0 0 0 0 0 0 0 0 0 0 ...
## $ arena : num 0 0 0 0 0 0 0 0 0 0 ...
## $ around : num 0 0 0 1 0 0 0 0 0 0 ...
## $ arrang : num 0 0 0 0 0 0 0 0 0 0 ...
## $ arriv : num 1 0 0 0 1 0 0 0 0 0 ...
## $ art : num 0 0 0 0 0 0 0 0 0 0 ...
## $ asid : num 0 1 0 0 0 0 0 0 0 0 ...
## $ ask : num 1 0 1 0 0 0 0 0 0 0 ...
## $ asleep : num 1 0 0 0 0 0 0 0 0 0 ...
## $ aspect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ assist : num 0 0 0 0 0 0 0 0 0 0 ...
## $ assum : num 0 0 0 0 0 0 0 0 0 0 ...
## $ assur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ atm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ atmosph : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attend : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attitud : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attract : num 0 0 0 0 0 0 0 0 0 0 ...
## $ avail : num 1 0 0 0 0 0 0 0 0 0 ...
## $ averag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ avoid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ away : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awesom : num 0 0 0 0 0 1 0 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use probabilities as an input for the voting procedure.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.060719120 0.9392809 0.7713557 0.22864428 0.6275775 0.3724225
## 14 0.006332847 0.9936672 0.9334676 0.06653235 0.6964144 0.3035856
## 16 0.010132759 0.9898672 0.9264671 0.07353291 0.7902200 0.2097800
## 26 0.068531945 0.9314681 0.9188981 0.08110194 0.6331179 0.3668821
## 28 0.049043421 0.9509566 0.8624833 0.13751674 0.6835997 0.3164003
## 29 0.017917369 0.9820826 0.8052007 0.19479926 0.5810587 0.4189413
## Class 5: 0 Class5: 1
## 5 0.9227214 0.07727862
## 14 0.3734984 0.62650156
## 16 0.3685950 0.63140500
## 26 0.7005314 0.29946859
## 28 0.6276657 0.37233427
## 29 0.8874327 0.11256734
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.060719120 0.22864428 0.3724225 0.07727862
## 14 0.006332847 0.06653235 0.3035856 0.62650156
## 16 0.010132759 0.07353291 0.2097800 0.63140500
## 26 0.068531945 0.08110194 0.3668821 0.29946859
## 28 0.049043421 0.13751674 0.3164003 0.37233427
## 29 0.017917369 0.19479926 0.4189413 0.11256734
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.060719120 0.22864428 0.3724225 0.07727862 4 4
## 14 0.006332847 0.06653235 0.3035856 0.62650156 5 5
## 16 0.010132759 0.07353291 0.2097800 0.63140500 5 5
## 26 0.068531945 0.08110194 0.3668821 0.29946859 4 4
## 28 0.049043421 0.13751674 0.3164003 0.37233427 5 4
## 29 0.017917369 0.19479926 0.4189413 0.11256734 4 4
## 39 0.226342515 0.11433187 0.3456262 0.12051710 4 5
## 40 0.017749073 0.22209291 0.6678924 0.04949230 4 3
## 60 0.011693348 0.04159902 0.2620006 0.78093919 5 5
## 61 0.039318476 0.30737606 0.2975770 0.30612058 3 3
## 72 0.004469306 0.07829862 0.3293939 0.58174245 5 4
## 81 0.013485745 0.10062743 0.3875409 0.32235260 4 3
## 86 0.025173853 0.12467652 0.2208405 0.63129873 5 5
## 90 0.079688152 0.16764719 0.3396656 0.11109406 4 4
## 92 0.031358774 0.16839193 0.3209157 0.12988879 4 4
## 113 0.056425664 0.06324450 0.3457549 0.29536740 4 5
## 116 0.029047482 0.13562505 0.2164791 0.40184236 5 4
## 117 0.022722076 0.07828635 0.3128063 0.37591822 5 5
## 122 0.037941413 0.14613976 0.3616944 0.07257338 4 4
## 123 0.018243466 0.07643456 0.3629782 0.24827252 4 2
## 124 0.029313522 0.21166707 0.3455539 0.06301302 4 4
## 131 0.004269490 0.13263824 0.3115432 0.56026315 5 4
## 135 0.176813923 0.22504112 0.4932368 0.03614343 4 3
## 137 0.002941211 0.06051015 0.3510127 0.50000000 5 5
## 140 0.015115373 0.17076920 0.3372815 0.22389708 4 4
## 142 0.011290302 0.08260615 0.3117485 0.48748413 5 5
## 149 0.016651423 0.06776552 0.3599204 0.50603014 5 4
## 154 0.030418911 0.12882384 0.2485209 0.35029694 5 5
## 156 0.215798253 0.08881103 0.3515711 0.16750402 4 3
## 158 0.329360216 0.25657426 0.4573377 0.02103792 4 3
## 169 0.006054609 0.07864171 0.2433113 0.74806974 5 5
## 185 0.009340180 0.12911276 0.1942001 0.52752693 5 5
## 187 0.004402003 0.06726239 0.4684700 0.33651861 4 5
## 192 0.019783195 0.23533567 0.5614581 0.02531717 4 3
## 194 0.015901540 0.18783591 0.4144194 0.18765891 4 4
## 195 0.013964384 0.23640444 0.2897635 0.22537757 4 4
## 196 0.150094180 0.16763864 0.4920486 0.03508336 4 5
## 197 0.388348204 0.19290411 0.2737666 0.04996482 2 3
## 199 0.005280535 0.08621173 0.2431860 0.84620152 5 5
## 210 0.099445969 0.14197552 0.4122735 0.02521855 4 3
## 216 0.016903820 0.06187738 0.1493248 0.91139099 5 5
## 220 0.012341666 0.29484706 0.3191344 0.09537202 4 4
## 227 0.105123717 0.06048995 0.5375615 0.08843665 4 5
## 234 0.030660956 0.12861921 0.4659388 0.17999524 4 3
## 240 0.019570433 0.08063124 0.4573038 0.19815581 4 5
## 245 0.045430699 0.11332974 0.3114897 0.30907821 4 4
## 249 0.015296104 0.12311512 0.3354832 0.38327979 5 5
## 261 0.024834312 0.16498748 0.3173907 0.31444332 4 3
## 277 0.010302206 0.07226569 0.2444348 0.91516205 5 5
## 283 0.017201635 0.09453889 0.2629123 0.67669898 5 5
## 290 0.009293342 0.07145557 0.2066114 0.88659642 5 4
## 293 0.014709553 0.06701530 0.3947714 0.20800119 4 5
## 302 0.009047436 0.12155207 0.2816959 0.38434127 5 4
## 305 0.018408029 0.09372087 0.3776387 0.50000000 5 4
## 308 0.024151760 0.13814472 0.2419496 0.35903463 5 4
## 311 0.008198472 0.07020128 0.2309308 0.80538692 5 5
## 320 0.014644361 0.08135214 0.2396642 0.79198194 5 2
## 322 0.020698047 0.06717452 0.2061664 0.89324217 5 5
## 330 0.012660500 0.07264244 0.1976266 0.84578430 5 4
## 332 0.030270027 0.10719641 0.4060413 0.17998966 4 4
## 333 0.020029206 0.07946882 0.2748141 0.79696074 5 5
## 339 0.009308403 0.09490192 0.2581795 0.67981961 5 5
## 341 0.019927286 0.09791916 0.4259198 0.17985423 4 4
## 344 0.025086695 0.07010483 0.3190742 0.73149307 5 5
## 349 0.009698833 0.06714504 0.1653868 0.91087586 5 5
## 355 0.012440108 0.05720770 0.2025275 0.93677900 5 5
## 356 0.027905673 0.08601825 0.2882003 0.60221850 5 3
## 365 0.011277395 0.13375912 0.2824516 0.36689528 5 3
## 366 0.008325038 0.12266957 0.3121282 0.47491721 5 4
## 369 0.007398590 0.07433960 0.3222977 0.43403714 5 4
## 371 0.009651876 0.07981347 0.2168568 0.85122560 5 5
## 373 0.013285824 0.06913943 0.3076453 0.78181093 5 5
## 389 0.043015673 0.10230682 0.2857322 0.38967938 5 2
## 390 0.035177997 0.12293714 0.2685294 0.41095283 5 4
## 396 0.010892282 0.06387390 0.3729171 0.37706690 5 4
## 412 0.005900847 0.07849760 0.4010352 0.33587574 4 5
## 413 0.007814346 0.10614232 0.3326566 0.30850152 4 3
## 415 0.009471610 0.12050424 0.3331822 0.41111998 5 4
## 422 0.020155630 0.15441490 0.3997204 0.19224287 4 5
## 425 0.017927333 0.05878618 0.1900615 0.95016327 5 5
## 434 0.012675364 0.05299373 0.2955342 0.48590569 5 5
## 438 0.005002268 0.08441154 0.2819459 0.73110061 5 4
## 441 0.065923243 0.16877365 0.3577106 0.14407533 4 5
## 442 0.023377313 0.08165659 0.1998990 0.71289101 5 5
## 445 0.010033707 0.09518258 0.4023962 0.47480945 5 5
## 447 0.033344309 0.14750803 0.3416606 0.09594671 4 3
## 453 0.083893816 0.12859772 0.4604940 0.17039171 4 4
## 454 0.122788265 0.10677410 0.2331023 0.13939656 4 5
## 462 0.006101032 0.05596089 0.2173711 0.83974507 5 5
## 474 0.040958983 0.10150962 0.3405297 0.38847884 5 3
## 476 0.011657052 0.16420354 0.2909361 0.28074862 4 3
## 493 0.008911849 0.12240648 0.2592187 0.46251680 5 5
## 502 0.016337894 0.21298737 0.4876286 0.15275984 4 4
## 503 0.183244099 0.04913485 0.2509922 0.20226163 4 5
## 506 0.015934542 0.12677168 0.2612792 0.60460471 5 5
## 508 0.011700285 0.05994788 0.3713655 0.78560140 5 5
## 512 0.020163185 0.10692991 0.2173027 0.69767460 5 5
## 513 0.022383389 0.12166685 0.1966970 0.38040046 5 5
## 521 0.177948288 0.09604614 0.2630601 0.50000000 5 2
## 524 0.044141182 0.12202923 0.3091491 0.27538217 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 1 17 6
## 4 0 0 30 32
## 5 0 0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5961538
Precision
## 2
## 0.5961538
Recall
## 2
## 0.6237044