Memanggil data BreastCancer

library(mlbench)
library(neuralnet)
data(BreastCancer)
summary(BreastCancer)
##       Id             Cl.thickness   Cell.size     Cell.shape 
##  Length:699         1      :145   1      :384   1      :353  
##  Class :character   5      :130   10     : 67   2      : 59  
##  Mode  :character   3      :108   3      : 52   10     : 58  
##                     4      : 80   2      : 45   3      : 56  
##                     10     : 69   4      : 40   4      : 44  
##                     2      : 50   5      : 30   5      : 34  
##                     (Other):117   (Other): 81   (Other): 95  
##  Marg.adhesion  Epith.c.size  Bare.nuclei   Bl.cromatin  Normal.nucleoli
##  1      :407   2      :386   1      :402   2      :166   1      :443    
##  2      : 58   3      : 72   10     :132   3      :165   10     : 61    
##  3      : 58   4      : 48   2      : 30   1      :152   3      : 44    
##  10     : 55   1      : 47   5      : 30   7      : 73   2      : 36    
##  4      : 33   6      : 41   3      : 28   4      : 40   8      : 24    
##  8      : 25   5      : 39   (Other): 61   5      : 34   6      : 22    
##  (Other): 63   (Other): 66   NA's   : 16   (Other): 69   (Other): 69    
##     Mitoses          Class    
##  1      :579   benign   :458  
##  2      : 35   malignant:241  
##  3      : 33                  
##  10     : 14                  
##  4      : 12                  
##  7      :  9                  
##  (Other): 17

Membersihkan data dari nilai NA

mvindex = unique (unlist (lapply (BreastCancer, function (x) which (is.na (x)))))
data_cleaned <- na.omit(BreastCancer) 
summary(data_cleaned)
##       Id             Cl.thickness   Cell.size     Cell.shape 
##  Length:683         1      :139   1      :373   1      :346  
##  Class :character   5      :128   10     : 67   2      : 58  
##  Mode  :character   3      :104   3      : 52   10     : 58  
##                     4      : 79   2      : 45   3      : 53  
##                     10     : 69   4      : 38   4      : 43  
##                     2      : 50   5      : 30   5      : 32  
##                     (Other):114   (Other): 78   (Other): 93  
##  Marg.adhesion  Epith.c.size  Bare.nuclei   Bl.cromatin  Normal.nucleoli
##  1      :393   2      :376   1      :402   3      :161   1      :432    
##  2      : 58   3      : 71   10     :132   2      :160   10     : 60    
##  3      : 58   4      : 48   2      : 30   1      :150   3      : 42    
##  10     : 55   1      : 44   5      : 30   7      : 71   2      : 36    
##  4      : 33   6      : 40   3      : 28   4      : 39   8      : 23    
##  8      : 25   5      : 39   8      : 21   5      : 34   6      : 22    
##  (Other): 61   (Other): 65   (Other): 40   (Other): 68   (Other): 68    
##     Mitoses          Class    
##  1      :563   benign   :444  
##  2      : 35   malignant:239  
##  3      : 33                  
##  10     : 14                  
##  4      : 12                  
##  7      :  9                  
##  (Other): 17

Penyusunan data dan normalisasi data

#Mengatur data untuk dinormalisasi
input<-data_cleaned[,2:10]
indx <- sapply(input, is.factor)
input <- as.data.frame(lapply(input, function(x) as.numeric(as.character(x))))
#Normalisasi data
max_data <- apply(input, 2, max)
min_data <- apply(input, 2, min)
input_scaled <- as.data.frame(scale(input,center = min_data, scale = max_data - min_data))
#Membangun data baru 
Cancer<-data_cleaned$Class
Cancer<-as.data.frame(Cancer)
Cancer<-with(Cancer, data.frame(model.matrix(~Cancer+0)))
head(Cancer)
##   Cancerbenign Cancermalignant
## 1            1               0
## 2            1               0
## 3            1               0
## 4            1               0
## 5            1               0
## 6            0               1

Menggabungkan data Cancer dengan data input_scaled

final_data<-as.data.frame(cbind(input_scaled,Cancer))
head(final_data)
##   Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei
## 1    0.4444444 0.0000000  0.0000000     0.0000000    0.1111111   0.0000000
## 2    0.4444444 0.3333333  0.3333333     0.4444444    0.6666667   1.0000000
## 3    0.2222222 0.0000000  0.0000000     0.0000000    0.1111111   0.1111111
## 4    0.5555556 0.7777778  0.7777778     0.0000000    0.2222222   0.3333333
## 5    0.3333333 0.0000000  0.0000000     0.2222222    0.1111111   0.0000000
## 6    0.7777778 1.0000000  1.0000000     0.7777778    0.6666667   1.0000000
##   Bl.cromatin Normal.nucleoli Mitoses Cancerbenign Cancermalignant
## 1   0.2222222       0.0000000       0            1               0
## 2   0.2222222       0.1111111       0            1               0
## 3   0.2222222       0.0000000       0            1               0
## 4   0.2222222       0.6666667       0            1               0
## 5   0.2222222       0.0000000       0            1               0
## 6   0.8888889       0.6666667       0            0               1

Membuat data training dan testing

index = sample(1:nrow(final_data),round(0.70*nrow(final_data)))
train_data <- as.data.frame(final_data[index,])
test_data <- as.data.frame(final_data[-index,])

Membangun model neural network MLP

model = neuralnet(formula= Cancerbenign + Cancermalignant ~ Cl.thickness+Cell.size+Cell.shape+Marg.adhesion+Epith.c.size+Bare.nuclei+Bl.cromatin+Normal.nucleoli+Mitoses ,data=train_data,hidden=c(10,5,5),linear.output=FALSE)
library(NeuralNetTools)
plotnet(model)

Menguji model dengan data testing

prediksi.nn.test <- compute(model,test_data[,1:9])
hasil.prediksi<-round(prediksi.nn.test$net.result, digits = 0)
prediksi.nn = c("benign", "malignant")[apply(hasil.prediksi, 1, which.max)]
tabel.prediksi = table(data_cleaned$Class[-index], prediksi.nn)
tabel.prediksi
##            prediksi.nn
##             benign malignant
##   benign       131         3
##   malignant      4        67

Confusion matrix

library(gmodels)
CrossTable(x = data_cleaned$Class[-index], y = prediksi.nn,
 prop.chisq=T)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  205 
## 
##  
##                            | prediksi.nn 
## data_cleaned$Class[-index] |    benign | malignant | Row Total | 
## ---------------------------|-----------|-----------|-----------|
##                     benign |       131 |         3 |       134 | 
##                            |    20.716 |    39.953 |           | 
##                            |     0.978 |     0.022 |     0.654 | 
##                            |     0.970 |     0.043 |           | 
##                            |     0.639 |     0.015 |           | 
## ---------------------------|-----------|-----------|-----------|
##                  malignant |         4 |        67 |        71 | 
##                            |    39.098 |    75.404 |           | 
##                            |     0.056 |     0.944 |     0.346 | 
##                            |     0.030 |     0.957 |           | 
##                            |     0.020 |     0.327 |           | 
## ---------------------------|-----------|-----------|-----------|
##               Column Total |       135 |        70 |       205 | 
##                            |     0.659 |     0.341 |           | 
## ---------------------------|-----------|-----------|-----------|
## 
## 
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
confusionMatrix(tabel.prediksi)
## Confusion Matrix and Statistics
## 
##            prediksi.nn
##             benign malignant
##   benign       131         3
##   malignant      4        67
##                                                 
##                Accuracy : 0.9658537             
##                  95% CI : (0.9309138, 0.9861632)
##     No Information Rate : 0.6585366             
##     P-Value [Acc > NIR] : <0.0000000000000002   
##                                                 
##                   Kappa : 0.9243343             
##                                                 
##  Mcnemar's Test P-Value : 1                     
##                                                 
##             Sensitivity : 0.9703704             
##             Specificity : 0.9571429             
##          Pos Pred Value : 0.9776119             
##          Neg Pred Value : 0.9436620             
##              Prevalence : 0.6585366             
##          Detection Rate : 0.6390244             
##    Detection Prevalence : 0.6536585             
##       Balanced Accuracy : 0.9637566             
##                                                 
##        'Positive' Class : benign                
##