Read Data

abalone <- read.csv("abalone.csv", header =T, na.strings=c("","NA"))
suppressWarnings(suppressMessages(library(dplyr)))
#create an age column
train1 <- abalone %>%
  mutate(age=case_when(
    rings %in% 1:13 ~ "young",
    rings %in% 14:30 ~ "old"
  ))
#convert AGE into factor
train1$age <- as.factor(train1$age)
set.seed(100)
#randomize data
train1 <- train1[sample(1:4177),]
# Take sample
# trainset = 60% , testset = 40% (any %)
trainset <- train1[1:2506, ]
testset <- train1[2507:4177, ]
# remove rings since i've already created an AGE FACTOR above
trainset1 <- trainset[c(-9)] 
str(trainset1)
'data.frame':   2506 obs. of  9 variables:
 $ sex     : Factor w/ 3 levels "F","I","M": 2 2 1 2 1 3 3 2 3 2 ...
 $ length  : num  0.5 0.42 0.55 0.295 0.645 0.505 0.74 0.37 0.51 0.295 ...
 $ diameter: num  0.375 0.3 0.415 0.225 0.51 0.385 0.535 0.27 0.405 0.225 ...
 $ height  : num  0.125 0.105 0.135 0.08 0.18 0.115 0.185 0.095 0.13 0.09 ...
 $ weight  : num  0.57 0.316 0.775 0.124 1.619 ...
 $ shucked : num  0.259 0.1255 0.302 0.0485 0.7815 ...
 $ viscera : num  0.124 0.07 0.179 0.032 0.322 ...
 $ shell   : num  0.157 0.103 0.26 0.04 0.468 ...
 $ age     : Factor w/ 2 levels "old","young": 2 2 1 2 2 2 2 2 2 2 ...
suppressWarnings(suppressMessages(library(naivebayes)))
bayes2 <- naive_bayes(age ~., data = trainset1)
bayes2
===================== Naive Bayes ===================== 
Call: 
naive_bayes.formula(formula = age ~ ., data = trainset1)

A priori probabilities: 

      old     young 
0.1213089 0.8786911 

Tables: 
   
sex       old     young
  F 0.4375000 0.3024523
  I 0.1118421 0.3410536
  M 0.4506579 0.3564941

      
length        old      young
  mean 0.58437500 0.51701181
  sd   0.07543124 0.12109177

        
diameter       old     young
    mean 0.4627632 0.4015781
    sd   0.0634677 0.0994117

      
height        old      young
  mean 0.16828947 0.13650091
  sd   0.02945380 0.04363673

      
weight       old     young
  mean 1.1082434 0.7906989
  sd   0.4268078 0.4799089

# ... and 3 more tables

The priori probabilities is shown below

  old     young 

0.1213089 0.8786911

bayes2$tables
$sex
   
sex       old     young
  F 0.4375000 0.3024523
  I 0.1118421 0.3410536
  M 0.4506579 0.3564941

$length
      
length        old      young
  mean 0.58437500 0.51701181
  sd   0.07543124 0.12109177

$diameter
        
diameter       old     young
    mean 0.4627632 0.4015781
    sd   0.0634677 0.0994117

$height
      
height        old      young
  mean 0.16828947 0.13650091
  sd   0.02945380 0.04363673

$weight
      
weight       old     young
  mean 1.1082434 0.7906989
  sd   0.4268078 0.4799089

$shucked
       
shucked       old     young
   mean 0.4197533 0.3509986
   sd   0.1717971 0.2243031

$viscera
       
viscera        old      young
   mean 0.23354441 0.17352520
   sd   0.09201213 0.10859887

$shell
      
shell        old     young
  mean 0.3550789 0.2234444
  sd   0.1420152 0.1294044
# compute the accuracy on the testset
pred <- predict(bayes2, testset)
mean(pred == testset$age)
[1] 0.6648713

The classification model is 66% accuracy

Use Confusion Matrix to prove it is 66% accurate

table(pred,testset$age)
       
pred    old young
  old   129   503
  young  57   982

Therefore Accuracy model is (129+503) / total (which is 1671) = 0.664871 which is stated above. Misclassfication of the model is 1-0.66 = 33%

LS0tDQp0aXRsZTogIk5vdGVib29rIG9uIEFiYWxvbmUgQ2xhc3NpZmljYXRpb24gdXNpbmcgQmF5ZXMgQ2xhc3NpZmljYXRpb24iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFJlYWQgRGF0YQ0KDQpgYGB7cn0NCmFiYWxvbmUgPC0gcmVhZC5jc3YoImFiYWxvbmUuY3N2IiwgaGVhZGVyID1ULCBuYS5zdHJpbmdzPWMoIiIsIk5BIikpDQoNCnN1cHByZXNzV2FybmluZ3Moc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGRwbHlyKSkpDQoNCiNjcmVhdGUgYW4gYWdlIGNvbHVtbg0KdHJhaW4xIDwtIGFiYWxvbmUgJT4lDQogIG11dGF0ZShhZ2U9Y2FzZV93aGVuKA0KICAgIHJpbmdzICVpbiUgMToxMyB+ICJ5b3VuZyIsDQogICAgcmluZ3MgJWluJSAxNDozMCB+ICJvbGQiDQogICkpDQoNCiNjb252ZXJ0IEFHRSBpbnRvIGZhY3Rvcg0KdHJhaW4xJGFnZSA8LSBhcy5mYWN0b3IodHJhaW4xJGFnZSkNCg0KDQpzZXQuc2VlZCgxMDApDQoNCiNyYW5kb21pemUgZGF0YQ0KdHJhaW4xIDwtIHRyYWluMVtzYW1wbGUoMTo0MTc3KSxdDQoNCiMgVGFrZSBzYW1wbGUNCiMgdHJhaW5zZXQgPSA2MCUgLCB0ZXN0c2V0ID0gNDAlIChhbnkgJSkNCnRyYWluc2V0IDwtIHRyYWluMVsxOjI1MDYsIF0NCnRlc3RzZXQgPC0gdHJhaW4xWzI1MDc6NDE3NywgXQ0KDQojIHJlbW92ZSByaW5ncyBzaW5jZSBpJ3ZlIGFscmVhZHkgY3JlYXRlZCBhbiBBR0UgRkFDVE9SIGFib3ZlDQoNCnRyYWluc2V0MSA8LSB0cmFpbnNldFtjKC05KV0gDQpzdHIodHJhaW5zZXQxKQ0KDQpgYGANCg0KDQpgYGB7cn0NCg0Kc3VwcHJlc3NXYXJuaW5ncyhzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkobmFpdmViYXllcykpKQ0KYmF5ZXMyIDwtIG5haXZlX2JheWVzKGFnZSB+LiwgZGF0YSA9IHRyYWluc2V0MSkNCmJheWVzMg0KYGBgDQoNCiMgVGhlIHByaW9yaSBwcm9iYWJpbGl0aWVzIGlzIHNob3duIGJlbG93DQoNCiAgICAgIG9sZCAgICAgeW91bmcgDQowLjEyMTMwODkgMC44Nzg2OTExDQoNCg0KYGBge3J9DQpiYXllczIkdGFibGVzDQoNCiMgY29tcHV0ZSB0aGUgYWNjdXJhY3kgb24gdGhlIHRlc3RzZXQNCnByZWQgPC0gcHJlZGljdChiYXllczIsIHRlc3RzZXQpDQptZWFuKHByZWQgPT0gdGVzdHNldCRhZ2UpDQpgYGANCiMgVGhlIGNsYXNzaWZpY2F0aW9uIG1vZGVsIGlzIDY2JSBhY2N1cmFjeQ0KDQojIFVzZSBDb25mdXNpb24gTWF0cml4IHRvIHByb3ZlIGl0IGlzIDY2JSBhY2N1cmF0ZQ0KDQpgYGB7cn0NCnRhYmxlKHByZWQsdGVzdHNldCRhZ2UpDQpgYGANCg0KIVtdKGJheWVzX2FjY3VyYWN5LnBuZykNCg0KDQojIFRoZXJlZm9yZSBBY2N1cmFjeSBtb2RlbCBpcyAoMTI5KzUwMykgLyB0b3RhbCAod2hpY2ggaXMgMTY3MSkgPSAwLjY2NDg3MSB3aGljaCBpcyBzdGF0ZWQgYWJvdmUuIE1pc2NsYXNzZmljYXRpb24gb2YgdGhlIG1vZGVsIGlzIDEtMC42NiA9IDMzJQ0KDQoNCg0KDQo=