setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/50")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import actual labels.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
Import the TP feature set with a 50th percentile cut-off.
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/50/Feature Set 1 50th TP.csv")
Features <- Features[-1]
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:1101){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 1101 variables:
## $ abl : num 1 0 0 0 0 0 0 0 0 0 ...
## $ absolut : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accept : num 0 0 0 0 0 0 0 0 0 0 ...
## $ access : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accommod : num 0 0 0 0 0 0 0 0 0 0 ...
## $ acknowledg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ across : num 0 0 0 0 0 0 0 0 0 0 ...
## $ actual : num 0 0 0 0 0 0 0 0 1 0 ...
## $ addit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adequ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adjac : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adult : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advanc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adverti : num 0 0 0 0 1 0 0 0 0 0 ...
## $ advi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ affect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ after : num 0 0 0 0 1 0 0 0 0 0 ...
## $ air : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aircondit : num 0 0 0 0 0 0 0 0 0 1 ...
## $ airi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ airport : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alarm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ all : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alloc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ allow : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alreadi : num 0 0 0 1 0 0 0 0 0 0 ...
## $ also : num 0 0 0 1 0 0 1 0 1 0 ...
## $ altern : num 0 0 0 0 0 0 0 0 0 0 ...
## $ although : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amaz : num 0 0 0 0 1 0 0 0 0 0 ...
## $ amen : num 0 1 0 0 0 0 0 0 0 0 ...
## $ american : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amsterdam : num 0 1 0 1 0 0 0 0 0 0 ...
## $ and : num 1 0 0 0 0 0 0 0 0 0 ...
## $ annoy : num 1 0 0 0 0 0 0 0 0 0 ...
## $ anoth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ answer : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anymor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyon : num 0 1 0 0 0 0 0 0 0 0 ...
## $ anyth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anyway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anywh : num 0 0 0 0 0 0 0 0 0 0 ...
## $ apart : num 0 0 0 0 0 0 0 1 0 0 ...
## $ apolog : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appear : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appoint : num 0 0 0 0 0 0 0 0 0 0 ...
## $ appreci : num 0 0 0 0 0 0 0 0 0 0 ...
## $ approach : num 0 0 0 0 0 0 0 0 0 0 ...
## $ architectur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ area : num 0 0 0 1 0 0 0 0 0 0 ...
## $ arena : num 0 0 0 0 0 0 0 0 0 0 ...
## $ around : num 0 0 0 1 0 0 0 0 0 0 ...
## $ arrang : num 0 0 0 0 0 0 0 0 0 0 ...
## $ arriv : num 1 0 0 0 1 0 0 0 0 0 ...
## $ ask : num 1 0 1 0 0 0 0 0 0 0 ...
## $ aspect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ assum : num 0 0 0 0 0 0 0 0 0 0 ...
## $ atm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ atmosph : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attend : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attitud : num 0 0 0 0 0 0 0 0 0 0 ...
## $ attract : num 0 0 0 0 0 0 0 0 0 0 ...
## $ avail : num 1 0 0 0 0 0 0 0 0 0 ...
## $ averag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ avoid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ away : num 0 0 0 0 0 0 0 0 0 0 ...
## $ awesom : num 0 0 0 0 0 1 0 0 0 0 ...
## $ back : num 0 1 1 0 0 0 0 0 0 0 ...
## $ bacon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bad : num 0 0 0 1 0 0 0 0 0 0 ...
## $ bag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bake : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bang : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bank : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bar : num 0 0 1 1 0 0 0 0 0 0 ...
## $ bare : num 0 0 0 0 0 0 0 0 0 0 ...
## $ base : num 0 0 0 0 0 0 0 0 0 0 ...
## $ basement : num 0 0 0 0 0 0 0 0 0 0 ...
## $ basic : num 0 0 1 0 0 0 0 0 0 0 ...
## $ bath : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bathroom : num 0 0 0 0 0 0 0 0 0 0 ...
## $ bathtub : num 0 0 0 0 0 0 0 0 0 0 ...
## $ beauti : num 1 0 0 0 0 0 1 0 0 0 ...
## $ bed : num 0 0 0 1 0 0 1 0 0 1 ...
## $ bedroom : num 0 0 0 0 0 0 0 0 0 0 ...
## $ beer : num 0 0 0 0 0 0 0 0 0 0 ...
## $ begin : num 1 0 0 0 0 0 0 0 0 0 ...
## $ behind : num 0 0 0 0 0 0 0 0 0 0 ...
## $ believ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ benefit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ besid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ best : num 1 0 0 0 0 0 0 0 0 0 ...
## $ better : num 0 0 0 0 0 0 0 0 0 0 ...
## $ big : num 1 0 0 0 0 0 0 0 0 1 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use probabilities as an input for the voting procedure.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.065601271 0.9343987 0.7675769 0.23242309 0.6385660 0.3614340
## 14 0.005602181 0.9943978 0.9330662 0.06693376 0.6995650 0.3004350
## 16 0.009361841 0.9906382 0.9276978 0.07230217 0.7964899 0.2035101
## 26 0.059216050 0.9407840 0.9201148 0.07988517 0.6339025 0.3660975
## 28 0.047756100 0.9522439 0.8650292 0.13497080 0.6831120 0.3168880
## 29 0.019548217 0.9804518 0.8148213 0.18517870 0.5858900 0.4141100
## Class 5: 0 Class5: 1
## 5 0.9211599 0.07884014
## 14 0.3811052 0.61889477
## 16 0.3686065 0.63139350
## 26 0.7018917 0.29810826
## 28 0.6296034 0.37039659
## 29 0.8837346 0.11626541
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.065601271 0.23242309 0.3614340 0.07884014
## 14 0.005602181 0.06693376 0.3004350 0.61889477
## 16 0.009361841 0.07230217 0.2035101 0.63139350
## 26 0.059216050 0.07988517 0.3660975 0.29810826
## 28 0.047756100 0.13497080 0.3168880 0.37039659
## 29 0.019548217 0.18517870 0.4141100 0.11626541
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.065601271 0.23242309 0.3614340 0.07884014 4 4
## 14 0.005602181 0.06693376 0.3004350 0.61889477 5 5
## 16 0.009361841 0.07230217 0.2035101 0.63139350 5 5
## 26 0.059216050 0.07988517 0.3660975 0.29810826 4 4
## 28 0.047756100 0.13497080 0.3168880 0.37039659 5 4
## 29 0.019548217 0.18517870 0.4141100 0.11626541 4 4
## 39 0.164007448 0.10157794 0.3465062 0.12075390 4 5
## 40 0.016210058 0.22490915 0.6621418 0.05063282 4 3
## 60 0.010361800 0.03937324 0.2549049 0.77568430 5 5
## 61 0.032643751 0.31140815 0.2855201 0.30885069 3 3
## 72 0.004156537 0.08190410 0.3331915 0.57841896 5 4
## 81 0.011141455 0.10255066 0.3934008 0.32886621 4 3
## 86 0.026327778 0.13395248 0.2215864 0.62983145 5 5
## 90 0.074854182 0.17562022 0.3437507 0.11232926 4 4
## 92 0.030657452 0.15748748 0.3282574 0.13226185 4 4
## 113 0.060322764 0.06702227 0.3410302 0.28695466 4 5
## 116 0.028574009 0.13287576 0.2227370 0.40510662 5 4
## 117 0.024010719 0.07800817 0.3101777 0.37402745 5 5
## 122 0.041958856 0.15093623 0.3591797 0.07235008 4 4
## 123 0.019362095 0.07686879 0.3633148 0.24736326 4 2
## 124 0.029507629 0.20634981 0.3440499 0.06251467 4 4
## 131 0.004088441 0.13170141 0.3186188 0.55368832 5 4
## 135 0.157151876 0.22426545 0.4728317 0.03847118 4 3
## 137 0.002733994 0.05963790 0.3555119 0.50000000 5 5
## 140 0.015001965 0.17232748 0.3362932 0.22382989 4 4
## 142 0.011432009 0.07760878 0.3116876 0.48754929 5 5
## 149 0.015428060 0.06463795 0.3434278 0.51481683 5 4
## 154 0.030784614 0.12960578 0.2413049 0.35187569 5 5
## 156 0.174179121 0.08922331 0.3535987 0.17185320 4 3
## 158 0.317430830 0.27835904 0.4497158 0.02137248 4 3
## 169 0.005951224 0.07836386 0.2451005 0.74791391 5 5
## 185 0.009580262 0.12719490 0.1949168 0.52361776 5 5
## 187 0.004053913 0.06400352 0.4590230 0.33691299 4 5
## 192 0.021519319 0.22035629 0.5693126 0.02579768 4 3
## 194 0.017372678 0.19653051 0.4150956 0.18689782 4 4
## 195 0.013534903 0.24141518 0.2812008 0.22835057 4 4
## 196 0.146262984 0.16273240 0.4775080 0.03580206 4 5
## 197 0.425794532 0.17893357 0.2765371 0.05090354 2 3
## 199 0.005327095 0.08763072 0.2420727 0.84405277 5 5
## 210 0.117968492 0.15321369 0.3962645 0.02571694 4 3
## 216 0.016026390 0.06176495 0.1497539 0.91033972 5 5
## 220 0.012714952 0.30157519 0.3192776 0.09521295 4 4
## 227 0.115383737 0.05567640 0.5465399 0.08599267 4 5
## 234 0.031984994 0.13141538 0.4658474 0.18353771 4 3
## 240 0.020057004 0.07855071 0.4549698 0.19872207 4 5
## 245 0.044355642 0.11328518 0.3145619 0.30786572 4 4
## 249 0.015138160 0.12574747 0.3384071 0.38207798 5 5
## 261 0.025451494 0.16929537 0.3275426 0.31504790 4 3
## 277 0.009636257 0.07028297 0.2460693 0.91527206 5 5
## 283 0.017644902 0.09650251 0.2617475 0.67468323 5 5
## 290 0.009366834 0.07205544 0.2080436 0.88651935 5 4
## 293 0.012504854 0.06849038 0.4025123 0.21026093 4 5
## 302 0.009178425 0.11540087 0.2830814 0.38834448 5 4
## 305 0.018764390 0.09157890 0.3808060 0.49215437 5 4
## 308 0.025632468 0.14121282 0.2418988 0.35805351 5 4
## 311 0.007925030 0.07077653 0.2271392 0.80425891 5 5
## 320 0.014301658 0.08217342 0.2405749 0.79203658 5 2
## 322 0.020846050 0.06701951 0.2082092 0.89265863 5 5
## 330 0.012135715 0.07447777 0.1969410 0.84388733 5 4
## 332 0.032627377 0.10922919 0.4053880 0.17926420 4 4
## 333 0.020310499 0.07848261 0.2751302 0.79735405 5 5
## 339 0.009021323 0.09402719 0.2589371 0.67862294 5 5
## 341 0.021463689 0.09796297 0.4319255 0.17971315 4 4
## 344 0.025455943 0.06661961 0.3287234 0.73067749 5 5
## 349 0.009391533 0.06761823 0.1645056 0.91085596 5 5
## 355 0.012141932 0.05652093 0.2032807 0.93703876 5 5
## 356 0.028826169 0.08447198 0.2890631 0.60138585 5 3
## 365 0.011687758 0.13767487 0.2808409 0.36595339 5 3
## 366 0.008183069 0.11748070 0.3216713 0.48006138 5 4
## 369 0.007731737 0.07374461 0.3288342 0.42916884 5 4
## 371 0.009560912 0.07963605 0.2187417 0.85143776 5 5
## 373 0.013199130 0.06692234 0.3079821 0.77981711 5 5
## 389 0.046665101 0.10471227 0.2871548 0.38972585 5 2
## 390 0.037075001 0.12668332 0.2700089 0.41049955 5 4
## 396 0.011081879 0.06371549 0.3748828 0.37499418 5 4
## 412 0.005419143 0.07273130 0.4045119 0.33027242 4 5
## 413 0.006918447 0.11061131 0.3192161 0.30562455 4 3
## 415 0.007745250 0.13066093 0.3386417 0.41445867 5 4
## 422 0.018067535 0.14511107 0.3995792 0.19312475 4 5
## 425 0.016659309 0.05854865 0.1909779 0.94926307 5 5
## 434 0.012888516 0.05192523 0.2998809 0.48627736 5 5
## 438 0.004448969 0.08095203 0.2863108 0.72996856 5 4
## 441 0.067347113 0.17366548 0.3469161 0.14250019 4 5
## 442 0.023577459 0.08233886 0.2011293 0.71268568 5 5
## 445 0.009628341 0.09361407 0.4092050 0.47168075 5 5
## 447 0.036049107 0.14769153 0.3389257 0.09356921 4 3
## 453 0.089237281 0.12744136 0.4521446 0.17329824 4 4
## 454 0.125413958 0.10176170 0.2309437 0.13893078 4 5
## 462 0.005555445 0.05383906 0.2198852 0.83858855 5 5
## 474 0.046658397 0.10231819 0.3183467 0.39444134 5 3
## 476 0.012122992 0.16343337 0.2997408 0.28304400 4 3
## 493 0.008668906 0.12177729 0.2629375 0.46007434 5 5
## 502 0.015688649 0.21770241 0.4806482 0.14992814 4 4
## 503 0.171914800 0.04266066 0.2324954 0.20795206 4 5
## 506 0.015872601 0.13046940 0.2628529 0.59907342 5 5
## 508 0.011968136 0.05884374 0.3673096 0.78243650 5 5
## 512 0.020365992 0.09426126 0.2171769 0.70349134 5 5
## 513 0.019901958 0.12679680 0.1924247 0.38743758 5 5
## 521 0.186497500 0.09241020 0.2584039 0.49410256 5 2
## 524 0.048886562 0.12223018 0.3099854 0.27418604 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 1 17 6
## 4 0 0 30 32
## 5 0 0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5961538
Precision
## 2
## 0.5961538
Recall
## 2
## 0.6237044