setwd("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/Full")
#install.packages("naivebayes")
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.4.3
library(dplyr)
## Warning: Installed Rcpp (0.12.16) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(e1071)
library(readxl)
Import Feature Set “Full” - the feature set with no cut-off.
#Import Labels
Labels <- read_excel("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/Naive Bayes/1.Labels/Source Data.xlsx")
Label <- Labels$Score
#Import Features
Features <- read.csv("~/Google Drive/UM/Smart Services/Thesis/Thesis/Code/SVM/2.Feature Set 1/TP/Full/Feature Set 1 Full TP.csv")
Features <- Features[-1]
#Class 2
Label2 <- list()
for(i in 1:1000){
if(Label[i]==3| Label[i]==4){
Label2[i] <- 1
}else{
Label2[i] <- 0
}
}
#As Factor
Label2 <- as.factor(unlist(Label2))
#Class 3
Label3 <- list()
for(i in 1:1000){
if(Label[i]==5| Label[i]==6){
Label3[i] <- 1
}else{
Label3[i] <- 0
}
}
#As Factor
Label3 <- as.factor(unlist(Label3))
#Class 4
Label4 <- list()
for(i in 1:1000){
if(Label[i]==7| Label[i]==8){
Label4[i] <- 1
}else{
Label4[i] <- 0
}
}
#As Factor
Label4 <- as.factor(unlist(Label4))
#Class 5
Label5 <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
Label5[i] <- 1
}else{
Label5[i] <- 0
}
}
#As Factor
Label5 <- as.factor(unlist(Label5))
#All Labels
All <- list()
for(i in 1:1000){
if(Label[i]==9| Label[i]==10){
All[i] <- 5
}else if(Label[i]==7| Label[i]==8){
All[i] <- 4
}else if(Label[i]==5| Label[i]==6){
All[i] <- 3
}else{
All[i] <- 2
}
}
#As Factor
All <- as.factor(unlist(All))
#Transform Integer to Factor
for(i in 1:2672){
Features[,i] <- as.numeric(Features[,i])
}
str(Features)
## 'data.frame': 1000 obs. of 2672 variables:
## $ abil : num 0 0 0 0 0 0 0 0 0 0 ...
## $ abit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ abl : num 1 0 0 0 0 0 0 0 0 0 ...
## $ abnorm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ about : num 0 0 0 0 0 0 0 0 0 0 ...
## $ abov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ abrupt : num 0 0 0 0 0 0 0 0 0 0 ...
## $ absolut : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accept : num 0 0 0 0 0 0 0 0 0 0 ...
## $ access : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accommod : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accomplish : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ accustom : num 0 0 0 0 0 0 0 0 0 0 ...
## $ acess : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ach : num 0 0 0 0 0 0 0 0 0 0 ...
## $ acknowledg : num 0 0 0 0 0 0 0 0 0 0 ...
## $ acomod : num 0 0 0 0 0 0 0 0 0 0 ...
## $ across : num 0 0 0 0 0 0 0 0 0 0 ...
## $ activ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ actual : num 0 0 0 0 0 0 0 0 1 0 ...
## $ adaptor : num 0 0 0 0 0 0 0 0 0 0 ...
## $ add : num 0 0 0 0 0 0 0 0 0 0 ...
## $ addit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adequ : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adjac : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adjust : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adult : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advanc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advantag : num 0 0 0 0 0 0 0 0 0 0 ...
## $ adverti : num 0 0 0 0 1 0 0 0 0 0 ...
## $ advi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ advic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ affair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ affect : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afford : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afraid : num 0 0 0 1 0 0 0 0 0 0 ...
## $ africa : num 0 0 0 0 0 0 0 0 0 0 ...
## $ after : num 0 0 0 0 1 0 0 0 0 0 ...
## $ afterdinn : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afternoon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ afterward : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ago : num 0 0 0 0 0 0 0 0 0 0 ...
## $ agr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ agreeabl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ahead : num 0 0 0 0 0 0 0 0 0 0 ...
## $ air : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aircon : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aircondit : num 0 0 0 0 0 0 0 0 0 1 ...
## $ airi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ airless : num 0 0 0 0 0 0 0 0 0 0 ...
## $ airport : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alarm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ albeit : num 0 0 0 0 0 0 0 0 0 0 ...
## $ albert : num 0 0 0 0 0 0 0 0 0 0 ...
## $ albrt : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alcohol : num 0 0 0 0 0 0 0 0 0 0 ...
## $ aldo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alittl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ all : num 0 0 0 0 0 0 0 0 0 0 ...
## $ allevi : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alloc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ allow : num 0 0 0 0 0 0 0 0 0 0 ...
## $ almost : num 0 0 0 0 0 0 0 0 0 0 ...
## $ along : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alongsid : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alot : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alreadi : num 0 0 0 1 0 0 0 0 0 0 ...
## $ alright : num 0 0 0 0 0 0 0 0 0 0 ...
## $ also : num 0 0 0 1 0 0 1 0 1 0 ...
## $ altern : num 0 0 0 0 0 0 0 0 0 0 ...
## $ although : num 0 0 0 0 0 0 0 0 0 0 ...
## $ alway : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amaz : num 0 0 0 0 1 0 0 0 0 0 ...
## $ ambianc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ambienc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amen : num 0 1 0 0 0 0 0 0 0 0 ...
## $ amend : num 0 0 0 0 0 0 0 0 0 0 ...
## $ america : num 0 0 0 0 0 0 0 0 0 0 ...
## $ american : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amongst : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amount : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ampl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ amsterdam : num 0 1 0 1 0 0 0 0 0 0 ...
## $ and : num 1 0 0 0 0 0 0 0 0 0 ...
## $ angl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ angri : num 1 0 0 1 0 0 0 0 0 0 ...
## $ ann : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anna : num 0 0 0 0 0 0 0 0 0 0 ...
## $ annex : num 0 0 0 0 0 0 0 0 0 0 ...
## $ announc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ annoy : num 1 0 0 0 0 0 0 0 0 0 ...
## $ anoth : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ansterdam : num 0 0 0 0 0 0 0 0 0 0 ...
## $ answer : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
#Features
set.seed(1234)
ind <- sample(2,nrow(Features),replace = T, prob =c(0.8,0.2))
train <- Features[ind == 1,]
test <- Features[ind ==2,]
train.labels.2 <- Label2[ind == 1]
test.labels.2 <- Label2[ind ==2]
train.labels.3 <- Label3[ind == 1]
test.labels.3 <- Label3[ind ==2]
train.labels.4 <- Label4[ind == 1]
test.labels.4 <- Label4[ind ==2]
train.labels.5 <- Label5[ind == 1]
test.labels.5 <- Label5[ind ==2]
train.labels <- All[ind == 1]
test.labels <- All[ind ==2]
#SVM2
train2 <- train
train2$Score <- train.labels.2
SVM2 <- svm(Score~.,data = train2,scale = FALSE,probability=TRUE)
train3 <- train
train3$Score <- train.labels.3
SVM3 <- svm(Score~.,data = train3,scale = FALSE,probability=TRUE)
train4 <- train
train4$Score <- train.labels.4
SVM4 <- svm(Score~.,data = train4,scale = FALSE,probability=TRUE)
train5 <- train
train5$Score <- train.labels.5
SVM5 <- svm(Score~.,data = train5,scale = FALSE,probability=TRUE)
P2 <- predict(SVM2,newdata = test,probability = TRUE)
P3 <- predict(SVM3,newdata = test,probability = TRUE)
P4 <- predict(SVM4,newdata = test,probability = TRUE)
P5 <- predict(SVM5,newdata = test,probability = TRUE)
Prob2 <- attr(P2,"probabilities")
Prob3 <- attr(P3,"probabilities")
Prob4 <- attr(P4,"probabilities")
Prob5 <- attr(P5,"probabilities")
Use the probabilities as an input for the voting procedure. Choose the class with the highest probability.
Voting.df <- data.frame(Prob2, Prob3,Prob4,Prob5)
colnames(Voting.df) <- c("Class 2: 1","Class2: 0","Class 3: 0","Class3: 1","Class 4: 0","Class4: 1","Class 5: 0","Class5: 1")
head(Voting.df)
## Class 2: 1 Class2: 0 Class 3: 0 Class3: 1 Class 4: 0 Class4: 1
## 5 0.059325858 0.9406741 0.7700948 0.22990517 0.6194502 0.3805498
## 14 0.006354669 0.9936453 0.9345275 0.06547254 0.6970027 0.3029973
## 16 0.010897097 0.9891029 0.9274546 0.07254541 0.7921826 0.2078174
## 26 0.064600556 0.9353994 0.9162288 0.08377122 0.6320345 0.3679655
## 28 0.046912467 0.9530875 0.8645258 0.13547417 0.6795935 0.3204065
## 29 0.019217561 0.9807824 0.8068216 0.19317842 0.5771262 0.4228738
## Class 5: 0 Class5: 1
## 5 0.9220958 0.0779042
## 14 0.3667004 0.6332996
## 16 0.3637557 0.6362443
## 26 0.6943750 0.3056250
## 28 0.6244933 0.3755067
## 29 0.8882729 0.1117271
SEQ <- c(1,4,6,8)
Transformed.Voting.df <- Voting.df[SEQ]
colnames(Transformed.Voting.df) <- c("2","3","4","5")
head(Transformed.Voting.df)
## 2 3 4 5
## 5 0.059325858 0.22990517 0.3805498 0.0779042
## 14 0.006354669 0.06547254 0.3029973 0.6332996
## 16 0.010897097 0.07254541 0.2078174 0.6362443
## 26 0.064600556 0.08377122 0.3679655 0.3056250
## 28 0.046912467 0.13547417 0.3204065 0.3755067
## 29 0.019217561 0.19317842 0.4228738 0.1117271
Evaluation <- Transformed.Voting.df
Index <- as.numeric(apply(Transformed.Voting.df,MARGIN = 1,which.max))
Index <- Index+1
Evaluation$Vote <- Index
Evaluation$Actual <- test.labels
head(Evaluation,100)
## 2 3 4 5 Vote Actual
## 5 0.059325858 0.22990517 0.3805498 0.07790420 4 4
## 14 0.006354669 0.06547254 0.3029973 0.63329962 5 5
## 16 0.010897097 0.07254541 0.2078174 0.63624425 5 5
## 26 0.064600556 0.08377122 0.3679655 0.30562498 4 4
## 28 0.046912467 0.13547417 0.3204065 0.37550674 5 4
## 29 0.019217561 0.19317842 0.4228738 0.11172715 4 4
## 39 0.272512002 0.12850545 0.3663182 0.12824322 4 5
## 40 0.018545222 0.22653838 0.6740571 0.05007697 4 3
## 60 0.012632899 0.04120022 0.2661603 0.78576299 5 5
## 61 0.044779507 0.31313805 0.3005885 0.31083666 3 3
## 72 0.004672894 0.07918454 0.3315088 0.58196656 5 4
## 81 0.013290352 0.10000418 0.3826273 0.32063088 4 3
## 86 0.024525374 0.12327212 0.2276819 0.63359750 5 5
## 90 0.075871600 0.16849081 0.3338143 0.11128996 4 4
## 92 0.030338523 0.16910858 0.3207941 0.12850672 4 4
## 113 0.058825241 0.06453796 0.3406573 0.30764457 4 5
## 116 0.029721084 0.13065323 0.2179422 0.40272728 5 4
## 117 0.023561928 0.07864363 0.3150070 0.37217963 5 5
## 122 0.038610787 0.15027816 0.3629138 0.07163905 4 4
## 123 0.017662819 0.08045601 0.3611065 0.24640388 4 2
## 124 0.028115152 0.21818711 0.3564957 0.06372599 4 4
## 131 0.004200636 0.13179201 0.3055440 0.56463678 5 4
## 135 0.190719986 0.23354435 0.5000000 0.03526430 4 3
## 137 0.002869735 0.05937267 0.3478987 0.50000000 5 5
## 140 0.014853885 0.16637399 0.3375522 0.22184128 4 4
## 142 0.010873556 0.08231967 0.3093147 0.48526807 5 5
## 149 0.015964792 0.06717236 0.3546831 0.50550901 5 4
## 154 0.031740671 0.13253397 0.2479786 0.34972630 5 5
## 156 0.211044934 0.09165400 0.3560818 0.16720736 4 3
## 158 0.371902880 0.28150098 0.4597658 0.02142464 4 3
## 169 0.006200316 0.07553956 0.2418227 0.74631875 5 5
## 185 0.009887789 0.13094611 0.1979845 0.52836920 5 5
## 187 0.004682547 0.06415782 0.4609615 0.33017053 4 5
## 192 0.020835027 0.23143091 0.5534388 0.02499704 4 3
## 194 0.016519042 0.19124418 0.4090025 0.18695622 4 4
## 195 0.013522396 0.22357761 0.2883438 0.22303105 4 4
## 196 0.164979171 0.16741927 0.5000000 0.03466019 4 5
## 197 0.370850637 0.19965007 0.2785941 0.04937581 2 3
## 199 0.005449677 0.08194214 0.2449273 0.84897821 5 5
## 210 0.097546510 0.15006168 0.4158526 0.02502458 4 3
## 216 0.015185060 0.06662203 0.1524800 0.91245925 5 5
## 220 0.014014042 0.29796455 0.3130246 0.09473492 4 4
## 227 0.110743515 0.06062925 0.5289363 0.09233281 4 5
## 234 0.031126931 0.12841604 0.4587588 0.17803205 4 3
## 240 0.019187936 0.07999859 0.4493222 0.19559540 4 5
## 245 0.043781404 0.11299057 0.3149894 0.30896815 4 4
## 249 0.014657140 0.12358970 0.3339082 0.37875820 5 5
## 261 0.023515058 0.16404980 0.3147852 0.31096906 4 3
## 277 0.009961137 0.07169570 0.2418350 0.91441119 5 5
## 283 0.017808583 0.09433438 0.2642887 0.67668912 5 5
## 290 0.009189587 0.07021753 0.2063569 0.88608166 5 4
## 293 0.014243032 0.06742994 0.3948653 0.20602138 4 5
## 302 0.008718099 0.12063653 0.2836645 0.38228508 5 4
## 305 0.018660461 0.08997736 0.3718278 0.50000000 5 4
## 308 0.023504239 0.13800375 0.2409229 0.35836908 5 4
## 311 0.007957456 0.07030196 0.2305856 0.80535638 5 5
## 320 0.014435085 0.07965039 0.2398128 0.79000362 5 2
## 322 0.019763452 0.06560852 0.2074372 0.89360102 5 5
## 330 0.012249899 0.07225188 0.1998089 0.84632792 5 4
## 332 0.027888763 0.10706910 0.4006138 0.17864641 4 4
## 333 0.019442797 0.07898515 0.2738407 0.79517886 5 5
## 339 0.009140503 0.09384334 0.2579765 0.67890531 5 5
## 341 0.019947251 0.09218453 0.4233523 0.17817311 4 4
## 344 0.024994684 0.06792967 0.3182269 0.73180903 5 5
## 349 0.009532052 0.06444804 0.1663806 0.91021258 5 5
## 355 0.012068832 0.05497200 0.2006331 0.93662992 5 5
## 356 0.028095036 0.08425981 0.2869127 0.60002628 5 3
## 365 0.011546114 0.12969776 0.2830908 0.36269053 5 3
## 366 0.008633088 0.12119562 0.3122069 0.47376828 5 4
## 369 0.007649108 0.07444844 0.3214340 0.43648353 5 4
## 371 0.009637870 0.07596169 0.2168805 0.84973708 5 5
## 373 0.013568699 0.06734426 0.3039575 0.78289276 5 5
## 389 0.040270357 0.10330167 0.2843236 0.38591983 5 2
## 390 0.033173062 0.12325444 0.2700131 0.40851678 5 4
## 396 0.011096723 0.06136803 0.3729068 0.37599345 5 4
## 412 0.005822502 0.07852685 0.4041251 0.33833892 4 5
## 413 0.007953292 0.10396231 0.3232255 0.31854369 4 3
## 415 0.009493377 0.11998726 0.3384452 0.41191791 5 4
## 422 0.024026956 0.15813401 0.4129313 0.19837992 4 5
## 425 0.017661162 0.05752743 0.1913810 0.95119745 5 5
## 434 0.012644308 0.05569189 0.2926453 0.48341125 5 5
## 438 0.004975260 0.08397950 0.2866411 0.72875268 5 4
## 441 0.065576532 0.17515936 0.3670258 0.15062911 4 5
## 442 0.024188008 0.08304819 0.2043371 0.71408928 5 5
## 445 0.010731841 0.09579384 0.3998345 0.47148658 5 5
## 447 0.032233026 0.14908243 0.3442012 0.09573800 4 3
## 453 0.083043108 0.13139310 0.4609986 0.16772013 4 4
## 454 0.135064245 0.11283651 0.2402330 0.13935669 4 5
## 462 0.006097432 0.05435568 0.2142035 0.83949891 5 5
## 474 0.038500599 0.10407685 0.3429597 0.39211686 5 3
## 476 0.010962992 0.15977908 0.2908361 0.27789219 4 3
## 493 0.009186388 0.12450798 0.2584795 0.46376558 5 5
## 502 0.017351106 0.20331736 0.4895393 0.15253238 4 4
## 503 0.185962403 0.05047058 0.2642774 0.21112020 4 5
## 506 0.016522463 0.12484893 0.2644950 0.61113805 5 5
## 508 0.012280471 0.05655856 0.3735849 0.78871511 5 5
## 512 0.019792351 0.10665004 0.2190826 0.69643024 5 5
## 513 0.024443744 0.12794950 0.2008131 0.38010857 5 5
## 521 0.159807171 0.09561297 0.2657449 0.50000000 5 2
## 524 0.043229468 0.12150807 0.3117975 0.27520466 4 5
CM <- table(Evaluation$Actual,Evaluation$Vote)
CM
##
## 2 3 4 5
## 2 0 0 3 4
## 3 1 1 17 6
## 4 0 0 30 32
## 5 0 0 21 93
#Proportions
Overall <- length(Evaluation$Actual)
Length2 <- length(which(Evaluation$Actual==2))
Length3 <- length(which(Evaluation$Actual==3))
Length4 <- length(which(Evaluation$Actual==4))
Length5 <- length(which(Evaluation$Actual==5))
#Accuracy
Accuracy <- sum(diag(CM))/sum(CM)
#Precision
Precision <- diag(CM)/rowSums(CM)
Precision <- (Precision[1]*Length2+Precision[2]*Length3+Precision[3]*Length4+Precision[4]*Length5)/208
#Recall
Recall <- diag(CM)/colSums(CM)
Recall <- (Recall[1]*Length2+Recall[2]*Length3+Recall[3]*Length4+Recall[4]*Length5)/208
Accuracy
## [1] 0.5961538
Precision
## 2
## 0.5961538
Recall
## 2
## 0.6237044