library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
str(BreastCancer)
## 'data.frame': 699 obs. of 11 variables:
## $ Id : chr "1000025" "1002945" "1015425" "1016277" ...
## $ Cl.thickness : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 5 5 3 6 4 8 1 2 2 4 ...
## $ Cell.size : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 1 1 2 ...
## $ Cell.shape : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 2 1 1 ...
## $ Marg.adhesion : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 5 1 1 3 8 1 1 1 1 ...
## $ Epith.c.size : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 2 7 2 3 2 7 2 2 2 2 ...
## $ Bare.nuclei : Factor w/ 10 levels "1","2","3","4",..: 1 10 2 4 1 10 10 1 1 1 ...
## $ Bl.cromatin : Factor w/ 10 levels "1","2","3","4",..: 3 3 3 3 3 9 3 3 1 2 ...
## $ Normal.nucleoli: Factor w/ 10 levels "1","2","3","4",..: 1 2 1 7 1 7 1 1 1 1 ...
## $ Mitoses : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 5 1 ...
## $ Class : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
levels(BreastCancer$Class)
## [1] "benign" "malignant"
summary(BreastCancer)
## Id Cl.thickness Cell.size Cell.shape Marg.adhesion
## Length:699 1 :145 1 :384 1 :353 1 :407
## Class :character 5 :130 10 : 67 2 : 59 2 : 58
## Mode :character 3 :108 3 : 52 10 : 58 3 : 58
## 4 : 80 2 : 45 3 : 56 10 : 55
## 10 : 69 4 : 40 4 : 44 4 : 33
## 2 : 50 5 : 30 5 : 34 8 : 25
## (Other):117 (Other): 81 (Other): 95 (Other): 63
## Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses
## 2 :386 1 :402 2 :166 1 :443 1 :579
## 3 : 72 10 :132 3 :165 10 : 61 2 : 35
## 4 : 48 2 : 30 1 :152 3 : 44 3 : 33
## 1 : 47 5 : 30 7 : 73 2 : 36 10 : 14
## 6 : 41 3 : 28 4 : 40 8 : 24 4 : 12
## 5 : 39 (Other): 61 5 : 34 6 : 22 7 : 9
## (Other): 66 NA's : 16 (Other): 69 (Other): 69 (Other): 17
## Class
## benign :458
## malignant:241
##
##
##
##
##
library(mice) #library untuk mengatasi nilai yang hilang
## Warning: package 'mice' was built under R version 4.4.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(caret) #library untuk training dan ploting model
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
dataset_impute <- mice(BreastCancer[,2:10], print = FALSE) #Menghapus nilai yang hilang dan ID dari dataset
BreastCancer <- cbind(BreastCancer[,11, drop = FALSE], mice::complete(dataset_impute, 1)) #Menambahkan kelas target ke dataset yang diperhitungkan tanpa nilai yang hilang
summary(BreastCancer)
## Class Cl.thickness Cell.size Cell.shape Marg.adhesion
## benign :458 1 :145 1 :384 1 :353 1 :407
## malignant:241 5 :130 10 : 67 2 : 59 2 : 58
## 3 :108 3 : 52 10 : 58 3 : 58
## 4 : 80 2 : 45 3 : 56 10 : 55
## 10 : 69 4 : 40 4 : 44 4 : 33
## 2 : 50 5 : 30 5 : 34 8 : 25
## (Other):117 (Other): 81 (Other): 95 (Other): 63
## Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses
## 2 :386 1 :413 2 :166 1 :443 1 :579
## 3 : 72 10 :133 3 :165 10 : 61 2 : 35
## 4 : 48 2 : 31 1 :152 3 : 44 3 : 33
## 1 : 47 5 : 30 7 : 73 2 : 36 10 : 14
## 6 : 41 3 : 28 4 : 40 8 : 24 4 : 12
## 5 : 39 8 : 21 5 : 34 6 : 22 7 : 9
## (Other): 66 (Other): 43 (Other): 69 (Other): 69 (Other): 17
library(caTools) #pembagian dataset
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(150)
split=sample.split(BreastCancer, SplitRatio = 0.7) #Membagi dataset menjadi data training dan data testing
training_set=subset(BreastCancer,split==TRUE) #Dataset Training
test_set=subset(BreastCancer,split==FALSE) #Dtaset Testing
dim(training_set) #Dimensi data training
## [1] 490 10
dim(test_set)
## [1] 209 10
topredict_set<-test_set[2:10] #Menghapus Target Class
dim(topredict_set)
## [1] 209 9
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
model_naive <- naiveBayes(Class ~ ., data = training_set) #Implementasi Naive Bayes
preds_naive <- predict(model_naive, newdata = topredict_set) #Memprediksi Target Class untuk validasi
(conf_matrix_naive <- table(preds_naive, test_set$Class))
##
## preds_naive benign malignant
## benign 129 2
## malignant 6 72
confusionMatrix(conf_matrix_naive)
## Confusion Matrix and Statistics
##
##
## preds_naive benign malignant
## benign 129 2
## malignant 6 72
##
## Accuracy : 0.9617
## 95% CI : (0.926, 0.9833)
## No Information Rate : 0.6459
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9173
##
## Mcnemar's Test P-Value : 0.2888
##
## Sensitivity : 0.9556
## Specificity : 0.9730
## Pos Pred Value : 0.9847
## Neg Pred Value : 0.9231
## Prevalence : 0.6459
## Detection Rate : 0.6172
## Detection Prevalence : 0.6268
## Balanced Accuracy : 0.9643
##
## 'Positive' Class : benign
##
#Random Forest
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
model_rf <- randomForest(Class ~ ., data = training_set, importance=TRUE, ntree = 5) #Implementasi Random Forest
preds_rf <- predict(model_rf, topredict_set)
(conf_matrix_forest <- table(preds_rf, test_set$Class))
##
## preds_rf benign malignant
## benign 128 2
## malignant 7 72
confusionMatrix(conf_matrix_forest)
## Confusion Matrix and Statistics
##
##
## preds_rf benign malignant
## benign 128 2
## malignant 7 72
##
## Accuracy : 0.9569
## 95% CI : (0.9198, 0.9801)
## No Information Rate : 0.6459
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9073
##
## Mcnemar's Test P-Value : 0.1824
##
## Sensitivity : 0.9481
## Specificity : 0.9730
## Pos Pred Value : 0.9846
## Neg Pred Value : 0.9114
## Prevalence : 0.6459
## Detection Rate : 0.6124
## Detection Prevalence : 0.6220
## Balanced Accuracy : 0.9606
##
## 'Positive' Class : benign
##
#DecisionTree Classifier
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
model_dtree<- rpart(Class ~ ., data = training_set) #Implementasi Decision Tree
preds_dtree <- predict(model_dtree,newdata=topredict_set, type = "class")
(conf_matrix_dtree <- table(preds_dtree, test_set$Class))
##
## preds_dtree benign malignant
## benign 127 5
## malignant 8 69
confusionMatrix(conf_matrix_dtree)
## Confusion Matrix and Statistics
##
##
## preds_dtree benign malignant
## benign 127 5
## malignant 8 69
##
## Accuracy : 0.9378
## 95% CI : (0.896, 0.9665)
## No Information Rate : 0.6459
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8652
##
## Mcnemar's Test P-Value : 0.5791
##
## Sensitivity : 0.9407
## Specificity : 0.9324
## Pos Pred Value : 0.9621
## Neg Pred Value : 0.8961
## Prevalence : 0.6459
## Detection Rate : 0.6077
## Detection Prevalence : 0.6316
## Balanced Accuracy : 0.9366
##
## 'Positive' Class : benign
##