ANLY530_Lab2

library (C50)

## Warning: package 'C50' was built under R version 3.6.3

library (gmodels)

## Warning: package 'gmodels' was built under R version 3.6.3

library (rpart.plot)

## Warning: package 'rpart.plot' was built under R version 3.6.3

## Loading required package: rpart

library (ca)

## Warning: package 'ca' was built under R version 3.6.3

library (class)

## Warning: package 'class' was built under R version 3.6.3

library (ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.3

library (MASS)

## Warning: package 'MASS' was built under R version 3.6.3

library (colorspace)
library (bnlearn)

## Warning: package 'bnlearn' was built under R version 3.6.3

## 
## Attaching package: 'bnlearn'

## The following object is masked from 'package:stats':
## 
##     sigma

library (tm)

## Warning: package 'tm' was built under R version 3.6.3

## Loading required package: NLP

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

library (e1071)

## 
## Attaching package: 'e1071'

## The following object is masked from 'package:bnlearn':
## 
##     impute

library (kernlab)

## 
## Attaching package: 'kernlab'

## The following object is masked from 'package:ggplot2':
## 
##     alpha

library (SnowballC)

## Warning: package 'SnowballC' was built under R version 3.6.3

library (klaR)

## Warning: package 'klaR' was built under R version 3.6.3

library (corrplot)

## corrplot 0.84 loaded

library (caret)

## Warning: package 'caret' was built under R version 3.6.3

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 3.6.3

library (leaps)

## Warning: package 'leaps' was built under R version 3.6.3

library (dplyr)

## Warning: package 'dplyr' was built under R version 3.6.3

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library (colorspace)
library (naivebayes)

## Warning: package 'naivebayes' was built under R version 3.6.3

## naivebayes 0.9.7 loaded

library (psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:kernlab':
## 
##     alpha

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

#Read Data/ Data Collection

creditData <- read.csv("C:/Users/punthakur/Documents/HU - ANALYTICS/530-Machine Learning/creditData.csv")

str(creditData)

## 'data.frame':    1000 obs. of  21 variables:
##  $ Creditability                    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Account.Balance                  : int  1 1 2 1 1 1 1 1 4 2 ...
##  $ Duration.of.Credit..month.       : int  18 9 12 12 12 10 8 6 18 24 ...
##  $ Payment.Status.of.Previous.Credit: int  4 4 2 4 4 4 4 4 4 2 ...
##  $ Purpose                          : int  2 0 9 0 0 0 0 0 3 3 ...
##  $ Credit.Amount                    : int  1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
##  $ Value.Savings.Stocks             : int  1 1 2 1 1 1 1 1 1 3 ...
##  $ Length.of.current.employment     : int  2 3 4 3 3 2 4 2 1 1 ...
##  $ Instalment.per.cent              : int  4 2 2 3 4 1 1 2 4 1 ...
##  $ Sex...Marital.Status             : int  2 3 2 3 3 3 3 3 2 2 ...
##  $ Guarantors                       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Duration.in.Current.address      : int  4 2 4 2 4 3 4 4 4 4 ...
##  $ Most.valuable.available.asset    : int  2 1 1 1 2 1 1 1 3 4 ...
##  $ Age..years.                      : int  21 36 23 39 38 48 39 40 65 23 ...
##  $ Concurrent.Credits               : int  3 3 3 3 1 3 3 3 3 3 ...
##  $ Type.of.apartment                : int  1 1 1 1 2 1 2 2 2 1 ...
##  $ No.of.Credits.at.this.Bank       : int  1 2 1 2 2 2 2 1 2 1 ...
##  $ Occupation                       : int  3 3 2 2 2 2 2 2 1 1 ...
##  $ No.of.dependents                 : int  1 2 1 2 1 2 1 2 1 1 ...
##  $ Telephone                        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Foreign.Worker                   : int  1 1 1 2 2 2 2 2 1 1 ...

#Exploring and Preparing the Data

creditData$Creditability <- as.factor (creditData$Creditability)
sum (is.na(creditData)) #No missing values

## [1] 0

#Split data into 75% training and 25% testing (Note: there are no missing vales)

set.seed (12345)
credit_rand <- creditData[order(runif(1000)), ]
credit_train <- credit_rand[1:750, ]
credit_test <- credit_rand[751:1000, ]

#Check percentages for both training and testing datasets

prop.table(table(credit_train$Creditability))

## 
##         0         1 
## 0.3146667 0.6853333

#Model Training
naive_model1 <- naive_bayes (Creditability ~. , data = credit_train)
naive_model1

## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = Creditability ~ ., data = credit_train)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
##         0         1 
## 0.3146667 0.6853333 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: Account.Balance (Gaussian) 
## --------------------------------------------------------------------------------- 
##                
## Account.Balance        0        1
##            mean 1.923729 2.793774
##            sd   1.036826 1.252008
## 
## --------------------------------------------------------------------------------- 
##  ::: Duration.of.Credit..month. (Gaussian) 
## --------------------------------------------------------------------------------- 
##                           
## Duration.of.Credit..month.        0        1
##                       mean 24.46610 19.20039
##                       sd   13.82208 11.13433
## 
## --------------------------------------------------------------------------------- 
##  ::: Payment.Status.of.Previous.Credit (Gaussian) 
## --------------------------------------------------------------------------------- 
##                                  
## Payment.Status.of.Previous.Credit        0        1
##                              mean 2.161017 2.665370
##                              sd   1.071649 1.045219
## 
## --------------------------------------------------------------------------------- 
##  ::: Purpose (Gaussian) 
## --------------------------------------------------------------------------------- 
##        
## Purpose        0        1
##    mean 2.927966 2.803502
##    sd   2.944722 2.633253
## 
## --------------------------------------------------------------------------------- 
##  ::: Credit.Amount (Gaussian) 
## --------------------------------------------------------------------------------- 
##              
## Credit.Amount        0        1
##          mean 3964.195 2984.177
##          sd   3597.093 2379.685
## 
## ---------------------------------------------------------------------------------
## 
## # ... and 15 more tables
## 
## ---------------------------------------------------------------------------------

#Model evaluation

(conf_mat <- table(predict(naive_model1, credit_test), credit_test$Creditability))

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

##    
##       0   1
##   0  42  35
##   1  22 151

(Accuracy <- sum(diag(conf_mat))/sum(conf_mat)*100)

## [1] 77.2

#The model has an accuracy of 77.2%. 

#The model accurately predicts no-risk 42 times and inaccurately predicts no-risk 35 times, while accurately predicts rosk 151 times and inaccurately predicts risk 22 times.

#PART2
#Improve the Performance


#Exploring and preparing the Data
credit_rand <- creditData[order(runif(1000)), ]

creditDataScaled <- scale(credit_rand[,2:ncol(credit_rand)], center=TRUE, scale = TRUE)

m <- cor(creditDataScaled)

highlycor <- findCorrelation(m, 0.30)

filteredData <- credit_rand[, -(highlycor[5]+1)]
filteredTraining <- filteredData[1:750, ]
filteredTest <- filteredData[751:1000, ]

#Training a model

nb_model <- naive_bayes(Creditability ~ ., data=filteredTraining)

#Evaluate the Model

filteredTestPred <- predict(nb_model, newdata = filteredTest)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

table(filteredTestPred, filteredTest$Creditability)

##                 
## filteredTestPred   0   1
##                0  49  44
##                1  21 136

(conf_mat2 <- table(filteredTestPred, filteredTest$Creditability))

##                 
## filteredTestPred   0   1
##                0  49  44
##                1  21 136

(Accuracy <- sum(diag(conf_mat2))/sum(conf_mat2)*100)

## [1] 74

#The accuracy reduces to 74%

#Laboratory 2: Support Vector Machine, Part 3

letters <- read.csv("letterdata.csv")
str (letters)

## 'data.frame':    20000 obs. of  17 variables:
##  $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
##  $ xbox  : int  2 5 4 7 2 4 4 1 2 11 ...
##  $ ybox  : int  8 12 11 11 1 11 2 1 2 15 ...
##  $ width : int  3 3 6 6 3 5 5 3 4 13 ...
##  $ height: int  5 7 8 6 1 8 4 2 4 9 ...
##  $ onpix : int  1 2 6 3 1 3 4 1 2 7 ...
##  $ xbar  : int  8 10 10 5 8 8 8 8 10 13 ...
##  $ ybar  : int  13 5 6 9 6 8 7 2 6 2 ...
##  $ x2bar : int  0 5 2 4 6 6 6 2 2 6 ...
##  $ y2bar : int  6 4 6 6 6 9 6 2 6 2 ...
##  $ xybar : int  6 13 10 4 6 5 7 8 12 12 ...
##  $ x2ybar: int  10 3 3 4 5 6 6 2 4 1 ...
##  $ xy2bar: int  8 9 7 10 9 6 6 8 8 9 ...
##  $ xedge : int  0 2 3 6 1 0 2 1 1 8 ...
##  $ xedgey: int  8 8 7 10 7 8 8 6 6 1 ...
##  $ yedge : int  0 4 3 2 5 9 7 2 1 1 ...
##  $ yedgex: int  8 10 9 8 10 7 10 7 7 8 ...

#Preparing the Data
letters_train <- letters[1:18000, ]
letters_test <- letters[18001:20000, ]


#Training a model

letter_classifier <- ksvm(letter ~ ., data = letters_train, kernel = "vanilladot")

##  Setting default kernel parameters

letter_classifier

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Linear (vanilla) kernel function. 
## 
## Number of Support Vectors : 7886 
## 
## Objective Function Value : -15.3458 -21.3403 -25.7672 -6.8685 -8.8812 -35.9555 -59.5883 -18.1975 -65.6075 -41.5654 -18.8559 -39.3558 -36.9961 -60.3052 -15.1694 -42.144 -35.0941 -19.4069 -15.8234 -38.6718 -33.3013 -8.5298 -12.4387 -38.2194 -14.3682 -9.5508 -165.7154 -53.2778 -79.2163 -134.5053 -184.4809 -58.9285 -46.3252 -81.004 -28.1341 -29.6955 -27.5983 -38.1764 -47.2889 -137.0497 -208.1396 -239.2616 -23.8945 -10.9655 -64.228 -12.2139 -55.7818 -10.8001 -21.2407 -11.1795 -121.5639 -33.2229 -267.3926 -81.0708 -9.4937 -4.6577 -161.5171 -86.7114 -20.9146 -16.8272 -86.6582 -16.7205 -30.3036 -20.0054 -26.2331 -29.9289 -56.1072 -11.6335 -5.2564 -14.8153 -4.983 -4.8171 -8.5044 -43.2267 -55.9 -214.755 -47.0748 -49.6539 -50.2278 -18.3767 -19.1813 -97.6132 -113.6502 -42.4112 -32.5859 -127.4807 -33.7418 -30.7568 -40.0953 -18.6792 -5.4826 -49.3916 -10.6142 -20.0286 -63.8287 -183.8297 -57.0671 -43.3721 -35.2783 -85.4451 -145.9585 -11.8002 -6.1194 -12.5323 -33.5245 -155.2248 -57.2602 -194.0785 -111.0155 -10.8207 -16.7926 -3.7766 -77.3561 -7.9004 -106.5759 -52.523 -107.0402 -78.0148 -74.4773 -24.8166 -13.2372 -7.8706 -27.2788 -13.2342 -280.2869 -32.7288 -25.9531 -149.5447 -153.8495 -10.0146 -40.8917 -6.7333 -65.2053 -72.818 -35.1252 -246.7046 -38.0738 -16.9126 -158.18 -184.0021 -50.8427 -28.7686 -164.5969 -97.8359 -386.1426 -160.3188 -181.8759 -38.3648 -37.2272 -60.116 -28.2074 -53.7383 -7.8729 -12.3159 -37.8942 -72.6434 -211.8342 -58.5023 -105.1605 -176.7259 -685.8994 -142.8147 -159.635 -366.9437 -37.6409 -73.1357 -175.1906 -131.2833 -41.1464 -77.8404 -57.8131 -8.6365 -251.3728 -14.0836 -36.5144 -2.2292 -6.1598 -16.8011 -26.5165 -67.19 -21.3366 -221.4815 -22.9219 -4.2616 -4.7901 -0.8263 -134.7538 -8.8843 -83.1109 -23.1019 -14.4251 -5.7337 -17.5244 -29.7925 -23.9243 -88.9084 -28.6719 -106.0564 -16.4981 -10.6486 -7.9315 -1.5742 -91.1706 -7.3819 -118.2628 -117.5543 -48.5606 -26.6093 -71.2968 -30.4913 -63.5712 -279.2921 -46.3025 -50.4912 -37.9431 -21.5243 -11.6202 -134.9023 -7.516 -5.8131 -10.1595 -13.6329 -27.0293 -25.7282 -151.8511 -39.0524 -105.4861 -34.2434 -15.7051 -10.2304 -3.6687 -98.2094 -7.4666 -15.2668 -75.1283 -116.5382 -16.6429 -14.9215 -55.1062 -3.0636 -8.4262 -93.6829 -38.1162 -123.1859 -4.9078 -9.1612 -1.3077 -102.9021 -23.1138 -8.5262 -57.2623 -3.4297 -20.9579 -78.2019 -50.3741 -62.3531 -6.4908 -21.9308 -2.3736 -84.3835 -126.3997 -114.8723 -26.4109 -21.5589 -61.6405 -34.9162 -66.3243 -25.1148 -6.7203 -4.6695 -65.3518 -39.7924 -67.3505 -36.2154 -10.9031 -62.2195 -14.9491 -24.3238 -65.0847 -4.9657 -64.2797 -278.2873 -14.6902 -13.9198 -18.2059 -9.8972 -78.2645 -17.454 -49.5929 -55.7786 -28.7673 -15.9476 -47.531 -17.4379 -71.0516 -5.6899 -6.2519 -97.5508 -3.8196 -7.0502 -1.1238 -147.6952 -28.2018 -414.2586 -32.3275 -35.1191 -4.9605 -90.2307 -151.3409 -90.0329 -27.9491 -42.4688 -12.5118 -26.4828 -2.0045 -62.195 -9.1662 -178.4616 -1.9406 -1.9871 -11.3982 -0.5214 -29.6136 -35.0449 -6.7569 
## Training error : 0.1335

#Evaluating Model Performance

letter_predictions <- predict(letter_classifier, letters_test)
table(letter_predictions, letters_test$letter)

##                   
## letter_predictions  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T
##                  A 73  0  0  0  0  0  0  0  0  1  0  0  0  0  3  0  4  0  0  1
##                  B  0 61  0  3  2  0  1  1  0  0  1  1  0  0  0  2  0  1  3  0
##                  C  0  0 64  0  2  0  4  2  1  0  1  2  0  0  1  0  0  0  0  0
##                  D  2  1  0 67  0  0  1  3  3  2  1  2  0  3  4  2  1  2  0  0
##                  E  0  0  1  0 64  1  1  0  0  0  2  2  0  0  0  0  2  0  6  0
##                  F  0  0  0  0  0 70  1  1  4  0  0  0  0  0  0  5  1  0  2  0
##                  G  1  1  2  1  3  2 68  1  0  0  0  1  0  0  0  0  4  1  3  2
##                  H  0  0  0  1  0  1  0 46  0  2  3  1  1  1  9  0  0  5  0  3
##                  I  0  0  0  0  0  0  0  0 65  3  0  0  0  0  0  0  0  0  2  0
##                  J  0  1  0  0  0  1  0  0  3 61  0  0  0  0  1  0  0  0  1  0
##                  K  0  1  4  0  0  0  0  5  0  0 56  0  0  2  0  0  0  4  0  1
##                  L  0  0  0  0  1  0  0  1  0  0  0 63  0  0  0  0  0  0  0  0
##                  M  0  0  1  0  0  0  1  0  0  0  0  0 70  2  0  0  0  0  0  0
##                  N  0  0  0  0  0  0  0  0  0  0  0  0  0 77  0  0  0  1  0  0
##                  O  0  0  1  1  0  0  0  1  0  1  0  0  0  0 49  1  2  0  0  0
##                  P  0  0  0  0  0  3  0  0  0  0  0  0  0  0  2 69  0  0  0  0
##                  Q  0  0  0  0  0  0  3  1  0  0  0  2  0  0  2  1 52  0  1  0
##                  R  0  4  0  0  1  0  0  3  0  0  3  0  0  0  1  0  0 64  0  1
##                  S  0  1  0  0  1  1  1  0  1  1  0  0  0  0  0  0  6  0 47  1
##                  T  0  0  0  0  1  1  0  0  0  0  1  0  0  0  0  0  0  0  1 83
##                  U  0  0  2  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##                  V  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0
##                  W  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##                  X  0  1  0  0  1  0  0  1  0  0  1  4  0  0  0  0  0  1  0  0
##                  Y  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  4  0  0  0  1
##                  Z  1  0  0  0  2  0  0  0  0  2  0  0  0  0  0  0  0  0  5  1
##                   
## letter_predictions  U  V  W  X  Y  Z
##                  A  2  0  1  0  0  0
##                  B  0  0  0  0  0  0
##                  C  0  0  0  0  0  0
##                  D  0  0  0  0  1  0
##                  E  0  0  0  1  0  0
##                  F  0  1  0  0  2  0
##                  G  0  0  0  0  0  0
##                  H  0  2  0  0  1  0
##                  I  0  0  0  2  1  0
##                  J  0  0  0  1  0  4
##                  K  2  0  0  4  0  0
##                  L  0  0  0  0  0  0
##                  M  1  0  6  0  0  0
##                  N  1  0  2  0  0  0
##                  O  1  0  0  0  0  0
##                  P  0  0  0  0  1  0
##                  Q  0  0  0  0  0  0
##                  R  0  1  0  0  0  0
##                  S  0  0  0  1  0  6
##                  T  1  0  0  0  2  2
##                  U 83  0  0  0  0  0
##                  V  0 64  1  0  1  0
##                  W  0  3 59  0  0  0
##                  X  0  0  0 76  1  0
##                  Y  0  0  0  1 58  0
##                  Z  0  0  0  0  0 70

agreement <- letter_predictions == letters_test$letter

table(agreement)

## agreement
## FALSE  TRUE 
##   321  1679

(conf_mat3 <- table(agreement, letter_predictions))

##          letter_predictions
## agreement  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W
##     FALSE 12 15 13 28 16 17 22 30  8 12 23  2 11  4  8  6 10 14 20  9  4  4  4
##     TRUE  73 61 64 67 64 70 68 46 65 61 56 63 70 77 49 69 52 64 47 83 83 64 59
##          letter_predictions
## agreement  X  Y  Z
##     FALSE 10  8 11
##     TRUE  76 58 70

(Accuracy <- sum(diag(conf_mat3))/sum(conf_mat3)*100)

## [1] 3.65

#Polynomial Kernel

letter_classifier <- ksvm(letter ~ ., data = letters_train, kernel = "polydot")

##  Setting default kernel parameters

letter_classifier

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Polynomial kernel function. 
##  Hyperparameters : degree =  1  scale =  1  offset =  1 
## 
## Number of Support Vectors : 7887 
## 
## Objective Function Value : -15.3458 -21.3403 -25.7672 -6.8685 -8.8812 -35.9555 -59.5883 -18.1975 -65.6075 -41.5654 -18.8559 -39.3558 -36.9961 -60.3052 -15.1694 -42.144 -35.0941 -19.4069 -15.8234 -38.6718 -33.3013 -8.5298 -12.4387 -38.2194 -14.3682 -9.5508 -165.7154 -53.2778 -79.2163 -134.5052 -184.481 -58.9285 -46.3252 -81.004 -28.1341 -29.6955 -27.5983 -38.1764 -47.2889 -137.0497 -208.1397 -239.2617 -23.8945 -10.9655 -64.2279 -12.2139 -55.7818 -10.8001 -21.2407 -11.1795 -121.5639 -33.2229 -267.3926 -81.0708 -9.4937 -4.6577 -161.5171 -86.7114 -20.9146 -16.8272 -86.6582 -16.7205 -30.3036 -20.0054 -26.233 -29.9289 -56.1072 -11.6335 -5.2564 -14.8153 -4.983 -4.8171 -8.5044 -43.2267 -55.9 -214.755 -47.0748 -49.6539 -50.2278 -18.3767 -19.1813 -97.6132 -113.6502 -42.4112 -32.5859 -127.4807 -33.7418 -30.7568 -40.0953 -18.6792 -5.4826 -49.3916 -10.6142 -20.0286 -63.8287 -183.8297 -57.0671 -43.3721 -35.2782 -85.4452 -145.9585 -11.8002 -6.1194 -12.5323 -33.5245 -155.2248 -57.2602 -194.0785 -111.0155 -10.8207 -16.7926 -3.7766 -77.3561 -7.9004 -106.5759 -52.523 -107.0402 -78.0148 -74.4773 -24.8166 -13.2372 -7.8706 -27.2788 -13.2342 -280.2868 -32.7288 -25.9531 -149.5447 -153.8495 -10.0146 -40.8917 -6.7333 -65.2053 -72.8179 -35.1252 -246.7046 -38.0738 -16.9126 -158.18 -184.0021 -50.8427 -28.7687 -164.597 -97.8359 -386.1426 -160.3188 -181.8759 -38.3648 -37.2272 -60.116 -28.2074 -53.7383 -7.8729 -12.3159 -37.8942 -72.6434 -211.8342 -58.5022 -105.1605 -176.7259 -685.8994 -142.8147 -159.635 -366.9437 -37.641 -73.1357 -175.1906 -131.2833 -41.1464 -77.8404 -57.8131 -8.6365 -251.3728 -14.0836 -36.5144 -2.2292 -6.1598 -16.8011 -26.5165 -67.19 -21.3366 -221.4815 -22.9219 -4.2616 -4.7901 -0.8263 -134.7538 -8.8843 -83.1109 -23.1019 -14.4251 -5.7337 -17.5244 -29.7925 -23.9243 -88.9083 -28.6719 -106.0564 -16.4981 -10.6486 -7.9315 -1.5742 -91.1706 -7.3819 -118.2628 -117.5543 -48.5606 -26.6093 -71.2968 -30.4913 -63.5712 -279.292 -46.3024 -50.4912 -37.9431 -21.5243 -11.6202 -134.9025 -7.516 -5.8131 -10.1595 -13.6329 -27.0294 -25.7282 -151.851 -39.0524 -105.4861 -34.2434 -15.7051 -10.2304 -3.6687 -98.2094 -7.4666 -15.2668 -75.1283 -116.5383 -16.6429 -14.9215 -55.1062 -3.0636 -8.4262 -93.6829 -38.1162 -123.1859 -4.9078 -9.1612 -1.3077 -102.9021 -23.1138 -8.5262 -57.2623 -3.4297 -20.9579 -78.2019 -50.3741 -62.3531 -6.4908 -21.9308 -2.3736 -84.3835 -126.3997 -114.8725 -26.4109 -21.5589 -61.6405 -34.9162 -66.3243 -25.1148 -6.7203 -4.6695 -65.3518 -39.7924 -67.3505 -36.2154 -10.9031 -62.2195 -14.9491 -24.3238 -65.0847 -4.9657 -64.2797 -278.2874 -14.6902 -13.9198 -18.2059 -9.8972 -78.2645 -17.454 -49.5929 -55.7786 -28.7673 -15.9476 -47.531 -17.4379 -71.0516 -5.6899 -6.2519 -97.5508 -3.8196 -7.0502 -1.1238 -147.6952 -28.2018 -414.2587 -32.3275 -35.1191 -4.9605 -90.2308 -151.3409 -90.0329 -27.9491 -42.4688 -12.5118 -26.4828 -2.0045 -62.195 -9.1662 -178.4617 -1.9406 -1.9871 -11.3982 -0.5214 -29.6136 -35.0449 -6.7569 
## Training error : 0.1335

#Evaluating Model Performance

letter_predictions <- predict(letter_classifier, letters_test)
table(letter_predictions, letters_test$letter)

##                   
## letter_predictions  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T
##                  A 73  0  0  0  0  0  0  0  0  1  0  0  0  0  3  0  4  0  0  1
##                  B  0 61  0  3  2  0  1  1  0  0  1  1  0  0  0  2  0  1  3  0
##                  C  0  0 64  0  2  0  4  2  1  0  1  2  0  0  1  0  0  0  0  0
##                  D  2  1  0 67  0  0  1  3  3  2  1  2  0  3  4  2  1  2  0  0
##                  E  0  0  1  0 64  1  1  0  0  0  2  2  0  0  0  0  2  0  6  0
##                  F  0  0  0  0  0 70  1  1  4  0  0  0  0  0  0  5  1  0  2  0
##                  G  1  1  2  1  3  2 68  1  0  0  0  1  0  0  0  0  4  1  3  2
##                  H  0  0  0  1  0  1  0 46  0  2  3  1  1  1  9  0  0  5  0  3
##                  I  0  0  0  0  0  0  0  0 65  2  0  0  0  0  0  0  0  0  2  0
##                  J  0  1  0  0  0  1  0  0  3 62  0  0  0  0  1  0  0  0  1  0
##                  K  0  1  4  0  0  0  0  5  0  0 56  0  0  2  0  0  0  4  0  1
##                  L  0  0  0  0  1  0  0  1  0  0  0 63  0  0  0  0  0  0  0  0
##                  M  0  0  1  0  0  0  1  0  0  0  0  0 70  2  0  0  0  0  0  0
##                  N  0  0  0  0  0  0  0  0  0  0  0  0  0 77  0  0  0  1  0  0
##                  O  0  0  1  1  0  0  0  1  0  1  0  0  0  0 49  1  2  0  0  0
##                  P  0  0  0  0  0  3  0  0  0  0  0  0  0  0  2 69  0  0  0  0
##                  Q  0  0  0  0  0  0  3  1  0  0  0  2  0  0  2  1 52  0  1  0
##                  R  0  4  0  0  1  0  0  3  0  0  3  0  0  0  1  0  0 64  0  1
##                  S  0  1  0  0  1  1  1  0  1  1  0  0  0  0  0  0  6  0 47  1
##                  T  0  0  0  0  1  1  0  0  0  0  1  0  0  0  0  0  0  0  1 83
##                  U  0  0  2  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##                  V  0  0  0  0  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0
##                  W  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##                  X  0  1  0  0  1  0  0  1  0  0  1  4  0  0  0  0  0  1  0  0
##                  Y  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  4  0  0  0  1
##                  Z  1  0  0  0  2  0  0  0  0  2  0  0  0  0  0  0  0  0  5  1
##                   
## letter_predictions  U  V  W  X  Y  Z
##                  A  2  0  1  0  0  0
##                  B  0  0  0  0  0  0
##                  C  0  0  0  0  0  0
##                  D  0  0  0  0  1  0
##                  E  0  0  0  1  0  0
##                  F  0  1  0  0  2  0
##                  G  0  0  0  0  0  0
##                  H  0  2  0  0  1  0
##                  I  0  0  0  2  1  0
##                  J  0  0  0  1  0  4
##                  K  2  0  0  4  0  0
##                  L  0  0  0  0  0  0
##                  M  1  0  6  0  0  0
##                  N  1  0  2  0  0  0
##                  O  1  0  0  0  0  0
##                  P  0  0  0  0  1  0
##                  Q  0  0  0  0  0  0
##                  R  0  1  0  0  0  0
##                  S  0  0  0  1  0  6
##                  T  1  0  0  0  2  2
##                  U 83  0  0  0  0  0
##                  V  0 64  1  0  1  0
##                  W  0  3 59  0  0  0
##                  X  0  0  0 76  1  0
##                  Y  0  0  0  1 58  0
##                  Z  0  0  0  0  0 70

agreement <- letter_predictions == letters_test$letter

table(agreement)

## agreement
## FALSE  TRUE 
##   320  1680

(conf_mat4 <- table(agreement, letter_predictions))

##          letter_predictions
## agreement  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W
##     FALSE 12 15 13 28 16 17 22 30  7 12 23  2 11  4  8  6 10 14 20  9  4  4  4
##     TRUE  73 61 64 67 64 70 68 46 65 62 56 63 70 77 49 69 52 64 47 83 83 64 59
##          letter_predictions
## agreement  X  Y  Z
##     FALSE 10  8 11
##     TRUE  76 58 70

(Accuracy <- sum(diag(conf_mat4))/sum(conf_mat4)*100)

## [1] 3.65

#RBF

letter_classifier <- ksvm(letter ~ ., data = letters_train, kernel = "rbfdot")

letter_classifier

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 1 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.0472856751333276 
## 
## Number of Support Vectors : 9522 
## 
## Objective Function Value : -45.068 -35.4757 -61.1237 -27.7907 -36.4603 -49.0627 -72.4834 -41.0048 -71.1378 -54.2509 -48.7404 -62.9416 -69.9208 -51.8761 -37.2656 -50.2077 -61.3152 -46.7269 -42.1529 -68.0496 -51.0068 -39.5543 -43.3192 -59.4686 -43.0489 -37.0312 -175.8505 -113.2333 -101.3396 -124.3197 -194.513 -85.2816 -82.3514 -106.6628 -67.7708 -66.3184 -60.4829 -74.7244 -86.8999 -105.7321 -255.1937 -202.9954 -54.6302 -43.7191 -83.1566 -44.631 -107.619 -41.4851 -64.9836 -35.1415 -156.9712 -46.4411 -233.9847 -85.4517 -36.736 -27.5488 -125.9081 -88.3635 -39.9353 -42.9862 -94.0438 -47.8249 -60.866 -50.3466 -53.5631 -63.0599 -91.8204 -38.5397 -27.7093 -51.5103 -28.9676 -29.688 -42.867 -77.6766 -86.7789 -229.0298 -78.5526 -93.4866 -87.7459 -43.4439 -54.9169 -126.981 -149.1979 -79.7537 -70.8292 -159.5355 -96.3539 -60.4871 -66.4617 -43.2212 -29.2534 -104.1644 -39.3303 -64.4109 -85.6034 -188.9203 -109.4053 -76.4357 -57.4581 -151.2605 -127.2811 -40.6934 -26.7698 -47.4028 -65.7925 -101.7349 -86.9243 -187.9294 -105.0938 -39.5062 -44.9652 -25.2312 -155.1584 -32.3489 -153.1351 -73.9524 -109.154 -103.7196 -91.8203 -55.0493 -40.5303 -34.2454 -53.6705 -38.7576 -242.8607 -44.7822 -58.9175 -136.1429 -143.3246 -36.5523 -61.5641 -32.2199 -84.9382 -82.8876 -55.8831 -174.4871 -62.5113 -51.4646 -130.9532 -127.289 -71.9613 -56.4582 -165.7605 -98.0346 -190.3282 -141.7568 -116.0067 -71.3192 -73.5917 -80.7562 -61.8935 -96.6942 -37.0531 -45.0699 -65.3472 -88.8936 -225.0344 -82.6592 -97.25 -183.8182 -228.777 -113.7945 -132.2202 -253.7498 -93.8184 -85.7501 -141.955 -96.7489 -66.3552 -142.9304 -67.299 -45.4005 -185.417 -50.4301 -63.0946 -20.1681 -28.8991 -39.4485 -63.743 -63.1508 -52.1296 -145.7413 -51.3411 -24.8724 -23.796 -14.9319 -133.4394 -31.292 -86.5325 -49.6102 -50.8745 -28.0592 -42.8325 -55.6295 -55.7383 -60.8545 -59.7283 -119.1918 -48.1552 -36.8772 -27.1907 -17.067 -99.2173 -32.0584 -89.8497 -101.8716 -67.3186 -67.1192 -62.2483 -53.5223 -63.1039 -195.2706 -78.4711 -71.501 -79.0982 -49.8285 -43.1706 -190.3124 -31.9702 -40.3842 -34.6775 -39.6438 -51.0436 -53.649 -93.3179 -86.2953 -104.2327 -53.7924 -39.4504 -33.9345 -24.2052 -91.3393 -30.5823 -51.2496 -105.6786 -70.8294 -42.6057 -41.8111 -80.5221 -28.5596 -35.9553 -97.5454 -71.6715 -128.2217 -27.5813 -33.9253 -16.5173 -110.1321 -55.923 -44.5816 -98.0165 -31.6628 -43.7738 -99.6683 -70.8738 -79.2039 -42.764 -49.5717 -22.8451 -84.0768 -210.4489 -101.4114 -57.7494 -49.0711 -101.9206 -58.5467 -81.8676 -79.1256 -29.7675 -30.6112 -67.198 -71.941 -74.9827 -64.5243 -36.8208 -75.6632 -49.5469 -51.9236 -78.3021 -29.4058 -88.5447 -108.1962 -42.3913 -48.9789 -43.8053 -37.8094 -83.1124 -49.4488 -67.7982 -94.1202 -60.4357 -49.4828 -75.9437 -51.6446 -105.2105 -31.0395 -39.2052 -106.5495 -31.5897 -36.3352 -18.8946 -160.026 -49.5644 -239.91 -61.561 -72.3322 -29.669 -92.6541 -166.4571 -80.3586 -67.2158 -79.2135 -50.4271 -55.907 -21.08 -113.6714 -34.7217 -192.5426 -19.4235 -20.4897 -43.8845 -11.1356 -47.3078 -107.6987 -36.577 
## Training error : 0.049833

#Evaluating Model Performance

letter_predictions <- predict(letter_classifier, letters_test)
table(letter_predictions, letters_test$letter)

##                   
## letter_predictions  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T
##                  A 75  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0  1
##                  B  0 67  0  2  0  1  0  0  0  0  0  1  0  1  0  2  1  1  1  0
##                  C  0  0 72  0  3  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
##                  D  1  1  0 71  0  0  1  2  2  2  1  0  0  0  0  2  1  1  0  0
##                  E  0  0  0  0 70  2  0  0  0  1  0  2  0  0  0  0  0  0  0  0
##                  F  0  0  0  0  0 76  0  0  3  0  0  0  0  0  0  6  0  0  1  0
##                  G  0  0  1  0  3  0 76  1  0  0  0  0  0  0  0  0  0  0  0  0
##                  H  0  0  0  1  0  0  1 58  0  1  0  1  1  0  0  0  1  1  0  3
##                  I  0  0  0  0  0  0  0  0 69  1  0  0  0  0  0  0  0  0  0  0
##                  J  0  0  0  0  0  0  0  0  2 66  0  0  0  0  0  0  0  0  0  0
##                  K  0  0  0  0  0  0  0  3  0  0 62  0  0  1  0  0  0  2  0  0
##                  L  0  0  0  0  0  0  1  0  0  0  0 69  0  0  0  0  0  0  0  0
##                  M  0  0  0  0  0  0  1  0  0  0  0  0 71  1  0  0  0  0  0  0
##                  N  0  0  0  0  0  1  0  0  0  0  0  0  0 78  0  0  0  0  0  0
##                  O  0  0  1  0  0  0  0  0  0  1  0  0  0  2 67  1  2  0  0  0
##                  P  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 72  0  0  0  0
##                  Q  0  0  0  0  0  0  0  1  0  0  0  0  0  0  3  1 65  0  0  0
##                  R  0  1  0  0  0  0  1  1  0  0  4  0  0  2  1  0  0 74  0  1
##                  S  0  1  0  0  0  0  0  0  1  1  0  0  0  0  0  0  0  0 68  0
##                  T  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 88
##                  U  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##                  V  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##                  W  0  0  1  0  0  0  0  0  0  0  0  0  1  0  2  0  0  0  0  0
##                  X  0  1  0  0  0  0  0  0  0  0  2  4  0  0  0  0  0  0  0  0
##                  Y  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1
##                  Z  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0
##                   
## letter_predictions  U  V  W  X  Y  Z
##                  A  0  0  0  0  0  0
##                  B  0  1  0  0  0  0
##                  C  0  0  0  0  0  0
##                  D  1  0  0  0  0  0
##                  E  0  0  0  0  0  0
##                  F  0  1  0  0  0  0
##                  G  0  0  0  0  0  0
##                  H  0  1  0  0  0  0
##                  I  0  0  0  2  0  0
##                  J  0  0  0  0  0  1
##                  K  0  0  0  0  0  0
##                  L  0  0  0  0  0  0
##                  M  0  0  2  0  0  0
##                  N  0  0  1  0  0  0
##                  O  0  0  0  0  0  0
##                  P  0  0  0  0  0  0
##                  Q  0  0  0  0  0  0
##                  R  0  0  0  0  0  0
##                  S  0  0  0  0  0  0
##                  T  0  0  0  0  1  0
##                  U 89  0  0  0  0  0
##                  V  0 68  0  0  1  0
##                  W  1  0 66  0  0  0
##                  X  0  0  0 84  1  0
##                  Y  0  0  0  0 65  0
##                  Z  0  0  0  0  0 81

agreement <- letter_predictions == letters_test$letter

table(agreement)

## agreement
## FALSE  TRUE 
##   133  1867

(conf_mat3 <- table(agreement, letter_predictions))

##          letter_predictions
## agreement  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W
##     FALSE  3 11  4 15  5 11  5 11  3  3  6  1  4  2  7  0  5 11  3  1  1  1  5
##     TRUE  75 67 72 71 70 76 76 58 69 66 62 69 71 78 67 72 65 74 68 88 89 68 66
##          letter_predictions
## agreement  X  Y  Z
##     FALSE  8  4  3
##     TRUE  84 65 81

(Accuracy <- sum(diag(conf_mat3))/sum(conf_mat3)*100)

## [1] 3.5

#Online News Popularity

news <- read.csv("OnlineNewsPopularity_for_R.csv")

str (news)

## 'data.frame':    39644 obs. of  61 variables:
##  $ url                          : Factor w/ 39644 levels "http://mashable.com/2013/01/07/amazon-instant-video-browser/",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ timedelta                    : num  731 731 731 731 731 731 731 731 731 731 ...
##  $ n_tokens_title               : num  12 9 9 9 13 10 8 12 11 10 ...
##  $ n_tokens_content             : num  219 255 211 531 1072 ...
##  $ n_unique_tokens              : num  0.664 0.605 0.575 0.504 0.416 ...
##  $ n_non_stop_words             : num  1 1 1 1 1 ...
##  $ n_non_stop_unique_tokens     : num  0.815 0.792 0.664 0.666 0.541 ...
##  $ num_hrefs                    : num  4 3 3 9 19 2 21 20 2 4 ...
##  $ num_self_hrefs               : num  2 1 1 0 19 2 20 20 0 1 ...
##  $ num_imgs                     : num  1 1 1 1 20 0 20 20 0 1 ...
##  $ num_videos                   : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ average_token_length         : num  4.68 4.91 4.39 4.4 4.68 ...
##  $ num_keywords                 : num  5 4 6 7 7 9 10 9 7 5 ...
##  $ data_channel_is_lifestyle    : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ data_channel_is_entertainment: num  1 0 0 1 0 0 0 0 0 0 ...
##  $ data_channel_is_bus          : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ data_channel_is_socmed       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ data_channel_is_tech         : num  0 0 0 0 1 1 0 1 1 0 ...
##  $ data_channel_is_world        : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ kw_min_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_min                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_max                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_min_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_max_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ kw_avg_avg                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ self_reference_min_shares    : num  496 0 918 0 545 8500 545 545 0 0 ...
##  $ self_reference_max_shares    : num  496 0 918 0 16000 8500 16000 16000 0 0 ...
##  $ self_reference_avg_sharess   : num  496 0 918 0 3151 ...
##  $ weekday_is_monday            : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ weekday_is_tuesday           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_wednesday         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_thursday          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_friday            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_saturday          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weekday_is_sunday            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ is_weekend                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ LDA_00                       : num  0.5003 0.7998 0.2178 0.0286 0.0286 ...
##  $ LDA_01                       : num  0.3783 0.05 0.0333 0.4193 0.0288 ...
##  $ LDA_02                       : num  0.04 0.0501 0.0334 0.4947 0.0286 ...
##  $ LDA_03                       : num  0.0413 0.0501 0.0333 0.0289 0.0286 ...
##  $ LDA_04                       : num  0.0401 0.05 0.6822 0.0286 0.8854 ...
##  $ global_subjectivity          : num  0.522 0.341 0.702 0.43 0.514 ...
##  $ global_sentiment_polarity    : num  0.0926 0.1489 0.3233 0.1007 0.281 ...
##  $ global_rate_positive_words   : num  0.0457 0.0431 0.0569 0.0414 0.0746 ...
##  $ global_rate_negative_words   : num  0.0137 0.01569 0.00948 0.02072 0.01213 ...
##  $ rate_positive_words          : num  0.769 0.733 0.857 0.667 0.86 ...
##  $ rate_negative_words          : num  0.231 0.267 0.143 0.333 0.14 ...
##  $ avg_positive_polarity        : num  0.379 0.287 0.496 0.386 0.411 ...
##  $ min_positive_polarity        : num  0.1 0.0333 0.1 0.1364 0.0333 ...
##  $ max_positive_polarity        : num  0.7 0.7 1 0.8 1 0.6 1 1 0.8 0.5 ...
##  $ avg_negative_polarity        : num  -0.35 -0.119 -0.467 -0.37 -0.22 ...
##  $ min_negative_polarity        : num  -0.6 -0.125 -0.8 -0.6 -0.5 -0.4 -0.5 -0.5 -0.125 -0.5 ...
##  $ max_negative_polarity        : num  -0.2 -0.1 -0.133 -0.167 -0.05 ...
##  $ title_subjectivity           : num  0.5 0 0 0 0.455 ...
##  $ title_sentiment_polarity     : num  -0.188 0 0 0 0.136 ...
##  $ abs_title_subjectivity       : num  0 0.5 0.5 0.5 0.0455 ...
##  $ abs_title_sentiment_polarity : num  0.188 0 0 0 0.136 ...
##  $ shares                       : int  593 711 1500 1200 505 855 556 891 3600 710 ...

#Data Preparation

newsShort <- data.frame(news$n_tokens_title, news$n_tokens_content, news$n_unique_tokens, news$n_non_stop_words, news$num_hrefs, news$num_imgs, news$num_videos, news$average_token_length, news$num_keywords, news$kw_max_max, news$global_sentiment_polarity, news$avg_positive_polarity, news$title_subjectivity, news$title_sentiment_polarity, news$abs_title_subjectivity, news$abs_title_sentiment_polarity, news$shares)



colnames(newsShort) <- c("n_tokens_title", "n_tokens_content", "n_unique_tokens", "n_non_stop_words", "num_hrefs", "num_imgs", "num_videos", "average_token_length", "num_keywords", "kw_max_max", "global_sentiment_polarity", "avg_positive_polarity", "title_subjectivity", "title_sentiment_polarity", "abs_title_subjectivity", "abs_title_sentiment_polarity", "shares")


newsShort$popular = rep('na', nrow(newsShort))
for(i in 1:39644) {
     if(newsShort$shares[i] >= 1400) {
         newsShort$popular[i] = "yes"} 
     else {newsShort$popular[i] = "no"}
}
newsShort$shares = newsShort$popular

newsShort$shares <- as.factor(newsShort$shares)

#Data Prep


news_rand <- newsShort[order(runif(10000)), ]
set.seed(12345)

#Split the data into training and test datasets
news_train <- news_rand[1:9000, ]
news_test <- news_rand[9001:10000, ]

#Model Design

nb_model <- naive_bayes(shares~., data = news_train)

## Warning: naive_bayes(): Feature popular - zero probabilities are present.
## Consider Laplace smoothing.

nb_model

## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = shares ~ ., data = news_train)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
##        no       yes 
## 0.4248889 0.5751111 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: n_tokens_title (Gaussian) 
## --------------------------------------------------------------------------------- 
##               
## n_tokens_title       no      yes
##           mean 9.836820 9.719668
##           sd   1.945381 1.985466
## 
## --------------------------------------------------------------------------------- 
##  ::: n_tokens_content (Gaussian) 
## --------------------------------------------------------------------------------- 
##                 
## n_tokens_content       no      yes
##             mean 455.5978 506.6084
##             sd   355.7531 439.7857
## 
## --------------------------------------------------------------------------------- 
##  ::: n_unique_tokens (Gaussian) 
## --------------------------------------------------------------------------------- 
##                
## n_unique_tokens        no       yes
##            mean 0.5693068 0.5552458
##            sd   0.1133274 0.1241536
## 
## --------------------------------------------------------------------------------- 
##  ::: n_non_stop_words (Gaussian) 
## --------------------------------------------------------------------------------- 
##                 
## n_non_stop_words         no        yes
##             mean 0.99372384 0.99053322
##             sd   0.07898349 0.09684504
## 
## --------------------------------------------------------------------------------- 
##  ::: num_hrefs (Gaussian) 
## --------------------------------------------------------------------------------- 
##          
## num_hrefs        no       yes
##      mean  9.131015 10.495363
##      sd    8.663762 11.341514
## 
## ---------------------------------------------------------------------------------
## 
## # ... and 12 more tables
## 
## ---------------------------------------------------------------------------------

#Evaluate the Model

news_Pred <- predict(nb_model, newdata = news_test)

## Warning: predict.naive_bayes(): more features in the newdata are provided as
## there are probability tables in the object. Calculation is performed based on
## features to be found in the tables.

(conf_nat <- table(news_Pred, news_test$shares))

##          
## news_Pred  no yes
##       no  466   0
##       yes   2 532

(Accuracy <- sum(diag(conf_nat))/sum(conf_nat)*100)

## [1] 99.8

ANLY530_Lab2

Punit Thakur

9/9/2020