library(readr)
library(naivebayes)
## naivebayes 0.9.7 loaded
creditData <- read_csv("~/530/Lab 2/creditData.csv")
## Parsed with column specification:
## cols(
## .default = col_double()
## )
## See spec(...) for full column specifications.
View(creditData)
sum(is.na(creditData))
## [1] 0
#No missing value
creditData$Creditability <-as.factor(creditData$Creditability)
set.seed(12345)
credit_rand <-creditData[order(runif(1000)),]
credit_train <-credit_rand[1:750,]
credit_test <-credit_rand[751:1000,]
prop.table(table(credit_train$Creditability))
##
## 0 1
## 0.3146667 0.6853333
prop.table(table(credit_test$Creditability))
##
## 0 1
## 0.256 0.744
model1 <- naive_bayes(Creditability ~. , data = credit_train)
model1
##
## ================================== Naive Bayes ==================================
##
## Call:
## naive_bayes.formula(formula = Creditability ~ ., data = credit_train)
##
## ---------------------------------------------------------------------------------
##
## Laplace smoothing: 0
##
## ---------------------------------------------------------------------------------
##
## A priori probabilities:
##
## 0 1
## 0.3146667 0.6853333
##
## ---------------------------------------------------------------------------------
##
## Tables:
##
## ---------------------------------------------------------------------------------
## ::: Account Balance (Gaussian)
## ---------------------------------------------------------------------------------
##
## Account Balance 0 1
## mean 1.923729 2.793774
## sd 1.036826 1.252008
##
## ---------------------------------------------------------------------------------
## ::: Duration of Credit (month) (Gaussian)
## ---------------------------------------------------------------------------------
##
## Duration of Credit (month) 0 1
## mean 24.46610 19.20039
## sd 13.82208 11.13433
##
## ---------------------------------------------------------------------------------
## ::: Payment Status of Previous Credit (Gaussian)
## ---------------------------------------------------------------------------------
##
## Payment Status of Previous Credit 0 1
## mean 2.161017 2.665370
## sd 1.071649 1.045219
##
## ---------------------------------------------------------------------------------
## ::: Purpose (Gaussian)
## ---------------------------------------------------------------------------------
##
## Purpose 0 1
## mean 2.927966 2.803502
## sd 2.944722 2.633253
##
## ---------------------------------------------------------------------------------
## ::: Credit Amount (Gaussian)
## ---------------------------------------------------------------------------------
##
## Credit Amount 0 1
## mean 3964.195 2984.177
## sd 3597.093 2379.685
##
## ---------------------------------------------------------------------------------
##
## # ... and 15 more tables
##
## ---------------------------------------------------------------------------------
(Model_nat <- table(predict(model1,credit_test),credit_test$Creditability))
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
##
## 0 1
## 0 42 35
## 1 22 151
(Accuracy <- sum(diag(Model_nat))/sum(Model_nat)*100)
## [1] 77.2
##This model predicts the result with a 77.2% accuracy which is a good model in my opinion.With test dataset, the model predicted 151 TP and 42 TF.
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
creditDataScaled <-scale(credit_rand[,2:ncol(credit_rand)],center=TRUE,scale = TRUE)
m <- cor(creditDataScaled)
highlycor <- findCorrelation(m, 0.30)
highlycor
## [1] 5 12 19 15 3
filteredData <- credit_rand[, -(highlycor[4]+1)]
filteredTraining <- filteredData[1:750, ]
filteredTest <- filteredData[751:1000, ]
nb_model <- naive_bayes(Creditability ~ ., data=filteredTraining)
filteredTestPred <- predict(nb_model, newdata = filteredTest)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
table(filteredTestPred, filteredTest$Creditability)
##
## filteredTestPred 0 1
## 0 41 34
## 1 23 152
Model_Q3<- table(filteredTestPred, filteredTest$Creditability)
(Accuracy <- sum(diag(Model_Q3))/sum(Model_Q3)*100)
## [1] 77.2
##The accuracy is the same so no improvement
#Laboratory 2: Naïve Bayes Classifiers, Part 3
letters <- read.csv("letterdata.csv")
str(letters)
## 'data.frame': 20000 obs. of 17 variables:
## $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
## $ xbox : int 2 5 4 7 2 4 4 1 2 11 ...
## $ ybox : int 8 12 11 11 1 11 2 1 2 15 ...
## $ width : int 3 3 6 6 3 5 5 3 4 13 ...
## $ height: int 5 7 8 6 1 8 4 2 4 9 ...
## $ onpix : int 1 2 6 3 1 3 4 1 2 7 ...
## $ xbar : int 8 10 10 5 8 8 8 8 10 13 ...
## $ ybar : int 13 5 6 9 6 8 7 2 6 2 ...
## $ x2bar : int 0 5 2 4 6 6 6 2 2 6 ...
## $ y2bar : int 6 4 6 6 6 9 6 2 6 2 ...
## $ xybar : int 6 13 10 4 6 5 7 8 12 12 ...
## $ x2ybar: int 10 3 3 4 5 6 6 2 4 1 ...
## $ xy2bar: int 8 9 7 10 9 6 6 8 8 9 ...
## $ xedge : int 0 2 3 6 1 0 2 1 1 8 ...
## $ xedgey: int 8 8 7 10 7 8 8 6 6 1 ...
## $ yedge : int 0 4 3 2 5 9 7 2 1 1 ...
## $ yedgex: int 8 10 9 8 10 7 10 7 7 8 ...
letters_train <- letters[1:18000, ]
letters_test <- letters[18001:20000, ]
library('kernlab')
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
letter_classifier <- ksvm(letter ~ ., data = letters_train, kernel = "vanilladot")
## Setting default kernel parameters
summary(letter_classifier)
## Length Class Mode
## 1 ksvm S4
letter_predictions <- predict(letter_classifier, letters_test)
(p <- table(letter_predictions,letters_test$letter))
##
## letter_predictions A B C D E F G H I J K L M N O P Q R S T
## A 73 0 0 0 0 0 0 0 0 1 0 0 0 0 3 0 4 0 0 1
## B 0 61 0 3 2 0 1 1 0 0 1 1 0 0 0 2 0 1 3 0
## C 0 0 64 0 2 0 4 2 1 0 1 2 0 0 1 0 0 0 0 0
## D 2 1 0 67 0 0 1 3 3 2 1 2 0 3 4 2 1 2 0 0
## E 0 0 1 0 64 1 1 0 0 0 2 2 0 0 0 0 2 0 6 0
## F 0 0 0 0 0 70 1 1 4 0 0 0 0 0 0 5 1 0 2 0
## G 1 1 2 1 3 2 68 1 0 0 0 1 0 0 0 0 4 1 3 2
## H 0 0 0 1 0 1 0 46 0 2 3 1 1 1 9 0 0 5 0 3
## I 0 0 0 0 0 0 0 0 65 3 0 0 0 0 0 0 0 0 2 0
## J 0 1 0 0 0 1 0 0 3 61 0 0 0 0 1 0 0 0 1 0
## K 0 1 4 0 0 0 0 5 0 0 56 0 0 2 0 0 0 4 0 1
## L 0 0 0 0 1 0 0 1 0 0 0 63 0 0 0 0 0 0 0 0
## M 0 0 1 0 0 0 1 0 0 0 0 0 70 2 0 0 0 0 0 0
## N 0 0 0 0 0 0 0 0 0 0 0 0 0 77 0 0 0 1 0 0
## O 0 0 1 1 0 0 0 1 0 1 0 0 0 0 49 1 2 0 0 0
## P 0 0 0 0 0 3 0 0 0 0 0 0 0 0 2 69 0 0 0 0
## Q 0 0 0 0 0 0 3 1 0 0 0 2 0 0 2 1 52 0 1 0
## R 0 4 0 0 1 0 0 3 0 0 3 0 0 0 1 0 0 64 0 1
## S 0 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 6 0 47 1
## T 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 83
## U 0 0 2 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## V 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
## W 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## X 0 1 0 0 1 0 0 1 0 0 1 4 0 0 0 0 0 1 0 0
## Y 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 1
## Z 1 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 5 1
##
## letter_predictions U V W X Y Z
## A 2 0 1 0 0 0
## B 0 0 0 0 0 0
## C 0 0 0 0 0 0
## D 0 0 0 0 1 0
## E 0 0 0 1 0 0
## F 0 1 0 0 2 0
## G 0 0 0 0 0 0
## H 0 2 0 0 1 0
## I 0 0 0 2 1 0
## J 0 0 0 1 0 4
## K 2 0 0 4 0 0
## L 0 0 0 0 0 0
## M 1 0 6 0 0 0
## N 1 0 2 0 0 0
## O 1 0 0 0 0 0
## P 0 0 0 0 1 0
## Q 0 0 0 0 0 0
## R 0 1 0 0 0 0
## S 0 0 0 1 0 6
## T 1 0 0 0 2 2
## U 83 0 0 0 0 0
## V 0 64 1 0 1 0
## W 0 3 59 0 0 0
## X 0 0 0 76 1 0
## Y 0 0 0 1 58 0
## Z 0 0 0 0 0 70
agreement <- letter_predictions == letters_test$letter
table(agreement)
## agreement
## FALSE TRUE
## 321 1679
(Accuracy <- sum(diag(p))/sum(p)*100)
## [1] 83.95
letter_classifier_PO <- ksvm(letter ~ ., data = letters_train, kernel ="polydot")
## Setting default kernel parameters
letter_predictions_po <- predict(letter_classifier_PO, letters_test)
(po <- table(letter_predictions_po,letters_test$letter))
##
## letter_predictions_po A B C D E F G H I J K L M N O P Q R S
## A 73 0 0 0 0 0 0 0 0 1 0 0 0 0 3 0 4 0 0
## B 0 61 0 3 2 0 1 1 0 0 1 1 0 0 0 2 0 1 3
## C 0 0 64 0 2 0 4 2 1 0 1 2 0 0 1 0 0 0 0
## D 2 1 0 67 0 0 1 3 3 2 1 2 0 3 4 2 1 2 0
## E 0 0 1 0 64 1 1 0 0 0 2 2 0 0 0 0 2 0 6
## F 0 0 0 0 0 70 1 1 4 0 0 0 0 0 0 5 1 0 2
## G 1 1 2 1 3 2 68 1 0 0 0 1 0 0 0 0 4 1 3
## H 0 0 0 1 0 1 0 46 0 2 3 1 1 1 9 0 0 5 0
## I 0 0 0 0 0 0 0 0 65 2 0 0 0 0 0 0 0 0 2
## J 0 1 0 0 0 1 0 0 3 62 0 0 0 0 1 0 0 0 1
## K 0 1 4 0 0 0 0 5 0 0 56 0 0 2 0 0 0 4 0
## L 0 0 0 0 1 0 0 1 0 0 0 63 0 0 0 0 0 0 0
## M 0 0 1 0 0 0 1 0 0 0 0 0 70 2 0 0 0 0 0
## N 0 0 0 0 0 0 0 0 0 0 0 0 0 77 0 0 0 1 0
## O 0 0 1 1 0 0 0 1 0 1 0 0 0 0 49 1 2 0 0
## P 0 0 0 0 0 3 0 0 0 0 0 0 0 0 2 69 0 0 0
## Q 0 0 0 0 0 0 3 1 0 0 0 2 0 0 2 1 52 0 1
## R 0 4 0 0 1 0 0 3 0 0 3 0 0 0 1 0 0 64 0
## S 0 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 6 0 47
## T 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1
## U 0 0 2 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## V 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## W 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## X 0 1 0 0 1 0 0 1 0 0 1 4 0 0 0 0 0 1 0
## Y 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0
## Z 1 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 5
##
## letter_predictions_po T U V W X Y Z
## A 1 2 0 1 0 0 0
## B 0 0 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 0 0 1 0
## E 0 0 0 0 1 0 0
## F 0 0 1 0 0 2 0
## G 2 0 0 0 0 0 0
## H 3 0 2 0 0 1 0
## I 0 0 0 0 2 1 0
## J 0 0 0 0 1 0 4
## K 1 2 0 0 4 0 0
## L 0 0 0 0 0 0 0
## M 0 1 0 6 0 0 0
## N 0 1 0 2 0 0 0
## O 0 1 0 0 0 0 0
## P 0 0 0 0 0 1 0
## Q 0 0 0 0 0 0 0
## R 1 0 1 0 0 0 0
## S 1 0 0 0 1 0 6
## T 83 1 0 0 0 2 2
## U 0 83 0 0 0 0 0
## V 0 0 64 1 0 1 0
## W 0 0 3 59 0 0 0
## X 0 0 0 0 76 1 0
## Y 1 0 0 0 1 58 0
## Z 1 0 0 0 0 0 70
(Accuracy <- sum(diag(po))/sum(po)*100)
## [1] 84
# for Polynomial the accuracy is 84% almost the same
letter_classifier_RBF <- ksvm(letter ~ ., data = letters_train, kernel ="rbfdot")
letter_predictions_RBF <- predict(letter_classifier_RBF, letters_test)
(RBF <- table(letter_predictions_RBF,letters_test$letter))
##
## letter_predictions_RBF A B C D E F G H I J K L M N O P Q R S
## A 75 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0
## B 0 67 0 2 0 1 0 0 0 0 0 1 0 1 0 2 1 1 1
## C 0 0 72 0 3 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## D 1 1 0 71 0 0 1 2 2 2 1 0 0 0 0 2 1 1 0
## E 0 0 0 0 70 2 0 0 0 1 0 2 0 0 0 0 0 0 0
## F 0 0 0 0 0 76 0 0 3 0 0 0 0 0 0 6 0 0 1
## G 0 0 1 0 3 0 76 1 0 0 0 0 0 0 0 0 0 0 0
## H 0 0 0 1 0 0 1 58 0 1 0 1 1 0 0 0 1 1 0
## I 0 0 0 0 0 0 0 0 69 1 0 0 0 0 0 0 0 0 0
## J 0 0 0 0 0 0 0 0 2 66 0 0 0 0 0 0 0 0 0
## K 0 0 0 0 0 0 0 3 0 0 62 0 0 1 0 0 0 2 0
## L 0 0 0 0 0 0 1 0 0 0 0 69 0 0 0 0 0 0 0
## M 0 0 0 0 0 0 1 0 0 0 0 0 71 1 0 0 0 0 0
## N 0 0 0 0 0 1 0 0 0 0 0 0 0 78 0 0 0 0 0
## O 0 0 1 0 0 0 0 0 0 1 0 0 0 2 67 1 2 0 0
## P 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 72 0 0 0
## Q 0 0 0 0 0 0 0 1 0 0 0 0 0 0 3 1 65 0 0
## R 0 1 0 0 0 0 1 1 0 0 4 0 0 2 1 0 0 74 0
## S 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 68
## T 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## U 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## V 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## W 0 0 1 0 0 0 0 0 0 0 0 0 1 0 2 0 0 0 0
## X 0 1 0 0 0 0 0 0 0 0 2 4 0 0 0 0 0 0 0
## Y 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Z 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1
##
## letter_predictions_RBF T U V W X Y Z
## A 1 0 0 0 0 0 0
## B 0 0 1 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 1 0 0 0 0 0
## E 0 0 0 0 0 0 0
## F 0 0 1 0 0 0 0
## G 0 0 0 0 0 0 0
## H 3 0 1 0 0 0 0
## I 0 0 0 0 2 0 0
## J 0 0 0 0 0 0 1
## K 0 0 0 0 0 0 0
## L 0 0 0 0 0 0 0
## M 0 0 0 2 0 0 0
## N 0 0 0 1 0 0 0
## O 0 0 0 0 0 0 0
## P 0 0 0 0 0 0 0
## Q 0 0 0 0 0 0 0
## R 1 0 0 0 0 0 0
## S 0 0 0 0 0 0 0
## T 88 0 0 0 0 1 0
## U 0 89 0 0 0 0 0
## V 0 0 68 0 0 1 0
## W 0 1 0 66 0 0 0
## X 0 0 0 0 84 1 0
## Y 1 0 0 0 0 65 0
## Z 0 0 0 0 0 0 81
# for rbf the accuracy is 93.35% which is much better
##Now apply the Naïve Bayes classifier and SVM that you saw in Parts 1 through 3 on News popularity data set from lab 1. Q5- Do you see any improvement compared to last three techniques? Please completely explain your results and analysis.
news <- read.csv("OnlineNewsPopularity.csv")
newsShort <- data.frame(news$n_tokens_title, news$n_tokens_content, news$n_unique_tokens, news$n_non_stop_words, news$num_hrefs, news$num_imgs, news$num_videos, news$average_token_length, news$num_keywords, news$kw_max_max, news$global_sentiment_polarity, news$avg_positive_polarity, news$title_subjectivity, news$title_sentiment_polarity, news$abs_title_subjectivity, news$abs_title_sentiment_polarity, news$shares)
colnames(newsShort) <- c("n_tokens_title", "n_tokens_content", "n_unique_tokens", "n_non_stop_words", "num_hrefs", "num_imgs", "num_videos", "average_token_length", "num_keywords", "kw_max_max", "global_sentiment_polarity", "avg_positive_polarity", "title_subjectivity", "title_sentiment_polarity", "abs_title_subjectivity", "abs_title_sentiment_polarity", "shares")
newsShort$popular = rep('na', nrow(newsShort))
for(i in 1:39644) {
if(newsShort$shares[i] >= 1400) {
newsShort$popular[i] = "yes"}
else {newsShort$popular[i] = "no"}
}
newsShort$shares = newsShort$popular
newsShort$shares <- as.factor(newsShort$shares)
news_rand <- newsShort[order(runif(10000)), ]
set.seed(12345)
#Split the data into training and test datasets
news_train <- news_rand[1:9000, ]
news_test <- news_rand[9001:10000, ]
nb_model <- naive_bayes(shares ~ ., data=news_train)
## Warning: naive_bayes(): Feature popular - zero probabilities are present.
## Consider Laplace smoothing.
nb_model
##
## ================================== Naive Bayes ==================================
##
## Call:
## naive_bayes.formula(formula = shares ~ ., data = news_train)
##
## ---------------------------------------------------------------------------------
##
## Laplace smoothing: 0
##
## ---------------------------------------------------------------------------------
##
## A priori probabilities:
##
## no yes
## 0.4287778 0.5712222
##
## ---------------------------------------------------------------------------------
##
## Tables:
##
## ---------------------------------------------------------------------------------
## ::: n_tokens_title (Gaussian)
## ---------------------------------------------------------------------------------
##
## n_tokens_title no yes
## mean 9.840891 9.697919
## sd 1.940023 1.988384
##
## ---------------------------------------------------------------------------------
## ::: n_tokens_content (Gaussian)
## ---------------------------------------------------------------------------------
##
## n_tokens_content no yes
## mean 453.4509 513.4272
## sd 351.7631 450.8013
##
## ---------------------------------------------------------------------------------
## ::: n_unique_tokens (Gaussian)
## ---------------------------------------------------------------------------------
##
## n_unique_tokens no yes
## mean 0.5707594 0.5538531
## sd 0.1125435 0.1235955
##
## ---------------------------------------------------------------------------------
## ::: n_non_stop_words (Gaussian)
## ---------------------------------------------------------------------------------
##
## n_non_stop_words no yes
## mean 0.99429904 0.99066329
## sd 0.07529892 0.09618384
##
## ---------------------------------------------------------------------------------
## ::: num_hrefs (Gaussian)
## ---------------------------------------------------------------------------------
##
## num_hrefs no yes
## mean 9.144079 10.620307
## sd 8.613435 11.641156
##
## ---------------------------------------------------------------------------------
##
## # ... and 12 more tables
##
## ---------------------------------------------------------------------------------
#Evaluate the model
news_Pred <-predict(nb_model,newdata = news_test)
## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.
news_Pred
## [1] no no no no no yes no no yes no yes no no no yes no yes yes
## [19] yes yes no no no no no yes yes yes no no yes yes no yes no yes
## [37] no yes no no no no no no yes no yes yes yes yes yes yes no no
## [55] yes yes no no yes yes yes no no yes no yes yes no yes no no no
## [73] yes yes no no no yes yes no yes no yes yes yes yes no no no yes
## [91] no yes no yes no no yes yes no no yes yes no no yes no yes no
## [109] no yes yes yes yes yes yes no no no yes no yes no yes no yes yes
## [127] no yes no no no yes yes yes yes no yes yes no no no yes yes no
## [145] no yes yes no yes yes yes yes no yes no yes yes yes yes yes yes yes
## [163] yes yes no yes no yes yes yes yes yes yes yes no yes no yes yes yes
## [181] yes yes no yes no no yes yes yes yes no yes yes no no no no yes
## [199] no yes yes yes no yes yes yes no yes no yes yes no no no yes yes
## [217] no no yes yes yes no yes no no no yes yes no no yes yes no no
## [235] yes no yes yes yes yes yes no yes yes yes no yes yes yes yes no yes
## [253] yes no no yes yes no yes yes no yes yes yes no yes no no no no
## [271] yes no no yes yes yes no yes yes yes yes yes yes yes no yes yes no
## [289] yes no yes no no yes no yes no yes yes no no no yes yes yes yes
## [307] no no no yes yes no yes no yes yes yes no yes no no no yes yes
## [325] no yes no no yes no yes no no no yes yes no yes no yes yes no
## [343] yes yes no no no no no no yes yes yes no no yes yes no yes yes
## [361] no no no yes yes no no no no no yes yes yes yes yes yes no no
## [379] yes no yes yes yes yes no yes no yes yes yes yes no yes no no yes
## [397] no yes yes no no yes no no yes yes no yes yes no no no no yes
## [415] yes no yes yes no no no yes no yes no yes yes yes yes yes yes yes
## [433] no no yes yes yes no no yes no yes yes no yes no no yes yes no
## [451] no no yes yes no yes yes yes no yes yes yes no no yes yes yes no
## [469] yes yes yes no no no yes no no yes yes no yes yes yes yes yes yes
## [487] no yes no yes yes no no no yes yes yes yes yes yes no no no yes
## [505] yes yes yes yes yes yes yes no yes no no no yes yes yes yes no no
## [523] no no no yes no no no no yes yes no yes no no yes no no no
## [541] yes yes yes yes yes yes yes no yes yes yes no no no yes yes no no
## [559] no yes no no no no no yes yes no yes yes yes yes yes yes yes yes
## [577] yes yes yes yes no yes yes no yes no yes yes yes yes yes yes no yes
## [595] no yes no no no yes yes no no no yes yes no no yes no no yes
## [613] yes yes no no no yes yes yes no no yes yes yes yes yes yes no yes
## [631] no yes no yes yes yes yes yes no no yes yes yes yes yes no yes no
## [649] yes yes no no yes yes yes no no no yes no no yes no yes no yes
## [667] yes no yes no no no yes yes yes no no yes yes no no no yes yes
## [685] yes yes no no yes yes yes no yes no yes yes no yes yes yes no yes
## [703] no no no yes no yes yes yes yes yes no yes no no yes no no yes
## [721] yes no no no yes yes yes no no yes yes no yes no yes no no yes
## [739] yes yes no yes yes no yes yes no yes no no no yes no no yes yes
## [757] yes no no yes yes yes no yes yes no no yes yes yes yes yes yes yes
## [775] no yes yes yes no yes yes yes yes yes yes yes no yes no no no no
## [793] yes yes yes yes no yes yes no no no yes yes yes yes yes yes yes no
## [811] no yes yes yes no yes yes yes no no yes yes no yes yes yes no yes
## [829] no no yes yes yes no yes no no yes no yes yes yes yes yes no yes
## [847] no yes yes no no no no yes yes yes yes no no yes yes no yes yes
## [865] yes yes yes yes no no yes no yes yes no no yes no yes yes yes yes
## [883] no yes no yes yes yes yes no no yes yes yes no no yes no yes yes
## [901] yes no yes no yes no yes yes no yes no yes no yes yes no no yes
## [919] yes yes yes no yes yes yes no no yes yes no yes yes no no no yes
## [937] no yes no yes yes no yes yes no yes no no yes no no yes yes yes
## [955] yes no no yes yes no yes no no yes no yes yes no yes yes no no
## [973] yes yes yes no yes no yes no no yes yes yes yes no yes yes yes no
## [991] no yes yes yes yes yes yes yes no no
## Levels: no yes
(new_model <- table(news_Pred, news_test$shares))
##
## news_Pred no yes
## no 424 0
## yes 9 567
(Accuracy <-sum(diag(new_model))/sum(new_model)*100)
## [1] 99.1
#The accuracy is much better the the last three techniques which is very close to 100%. It seems like over fitting and it may have high correlation between independent variables. Or presence of too much independent variables.