Memanggil data BreastCancer
library(mlbench)
library(neuralnet)
data(BreastCancer)
summary(BreastCancer)
## Id Cl.thickness Cell.size Cell.shape
## Length:699 1 :145 1 :384 1 :353
## Class :character 5 :130 10 : 67 2 : 59
## Mode :character 3 :108 3 : 52 10 : 58
## 4 : 80 2 : 45 3 : 56
## 10 : 69 4 : 40 4 : 44
## 2 : 50 5 : 30 5 : 34
## (Other):117 (Other): 81 (Other): 95
## Marg.adhesion Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli
## 1 :407 2 :386 1 :402 2 :166 1 :443
## 2 : 58 3 : 72 10 :132 3 :165 10 : 61
## 3 : 58 4 : 48 2 : 30 1 :152 3 : 44
## 10 : 55 1 : 47 5 : 30 7 : 73 2 : 36
## 4 : 33 6 : 41 3 : 28 4 : 40 8 : 24
## 8 : 25 5 : 39 (Other): 61 5 : 34 6 : 22
## (Other): 63 (Other): 66 NA's : 16 (Other): 69 (Other): 69
## Mitoses Class
## 1 :579 benign :458
## 2 : 35 malignant:241
## 3 : 33
## 10 : 14
## 4 : 12
## 7 : 9
## (Other): 17
Membersihkan data dari nilai NA
mvindex = unique (unlist (lapply (BreastCancer, function (x) which (is.na (x)))))
data_cleaned <- na.omit(BreastCancer)
summary(data_cleaned)
## Id Cl.thickness Cell.size Cell.shape
## Length:683 1 :139 1 :373 1 :346
## Class :character 5 :128 10 : 67 2 : 58
## Mode :character 3 :104 3 : 52 10 : 58
## 4 : 79 2 : 45 3 : 53
## 10 : 69 4 : 38 4 : 43
## 2 : 50 5 : 30 5 : 32
## (Other):114 (Other): 78 (Other): 93
## Marg.adhesion Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli
## 1 :393 2 :376 1 :402 3 :161 1 :432
## 2 : 58 3 : 71 10 :132 2 :160 10 : 60
## 3 : 58 4 : 48 2 : 30 1 :150 3 : 42
## 10 : 55 1 : 44 5 : 30 7 : 71 2 : 36
## 4 : 33 6 : 40 3 : 28 4 : 39 8 : 23
## 8 : 25 5 : 39 8 : 21 5 : 34 6 : 22
## (Other): 61 (Other): 65 (Other): 40 (Other): 68 (Other): 68
## Mitoses Class
## 1 :563 benign :444
## 2 : 35 malignant:239
## 3 : 33
## 10 : 14
## 4 : 12
## 7 : 9
## (Other): 17
Penyusunan data dan normalisasi data
#Mengatur data untuk dinormalisasi
input<-data_cleaned[,2:10]
indx <- sapply(input, is.factor)
input <- as.data.frame(lapply(input, function(x) as.numeric(as.character(x))))
#Normalisasi data
max_data <- apply(input, 2, max)
min_data <- apply(input, 2, min)
input_scaled <- as.data.frame(scale(input,center = min_data, scale = max_data - min_data))
#Membangun data baru
Cancer<-data_cleaned$Class
Cancer<-as.data.frame(Cancer)
Cancer<-with(Cancer, data.frame(model.matrix(~Cancer+0)))
head(Cancer)
## Cancerbenign Cancermalignant
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 0 1
Menggabungkan data Cancer dengan data input_scaled
final_data<-as.data.frame(cbind(input_scaled,Cancer))
head(final_data)
## Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei
## 1 0.4444444 0.0000000 0.0000000 0.0000000 0.1111111 0.0000000
## 2 0.4444444 0.3333333 0.3333333 0.4444444 0.6666667 1.0000000
## 3 0.2222222 0.0000000 0.0000000 0.0000000 0.1111111 0.1111111
## 4 0.5555556 0.7777778 0.7777778 0.0000000 0.2222222 0.3333333
## 5 0.3333333 0.0000000 0.0000000 0.2222222 0.1111111 0.0000000
## 6 0.7777778 1.0000000 1.0000000 0.7777778 0.6666667 1.0000000
## Bl.cromatin Normal.nucleoli Mitoses Cancerbenign Cancermalignant
## 1 0.2222222 0.0000000 0 1 0
## 2 0.2222222 0.1111111 0 1 0
## 3 0.2222222 0.0000000 0 1 0
## 4 0.2222222 0.6666667 0 1 0
## 5 0.2222222 0.0000000 0 1 0
## 6 0.8888889 0.6666667 0 0 1
Membuat data training dan testing
index = sample(1:nrow(final_data),round(0.70*nrow(final_data)))
train_data <- as.data.frame(final_data[index,])
test_data <- as.data.frame(final_data[-index,])
Membangun model neural network MLP
model = neuralnet(formula= Cancerbenign + Cancermalignant ~ Cl.thickness+Cell.size+Cell.shape+Marg.adhesion+Epith.c.size+Bare.nuclei+Bl.cromatin+Normal.nucleoli+Mitoses ,data=train_data,hidden=c(10,5,5),linear.output=FALSE)
library(NeuralNetTools)
plotnet(model)
Menguji model dengan data testing
prediksi.nn.test <- compute(model,test_data[,1:9])
hasil.prediksi<-round(prediksi.nn.test$net.result, digits = 0)
prediksi.nn = c("benign", "malignant")[apply(hasil.prediksi, 1, which.max)]
tabel.prediksi = table(data_cleaned$Class[-index], prediksi.nn)
tabel.prediksi
## prediksi.nn
## benign malignant
## benign 131 3
## malignant 4 67
Confusion matrix
library(gmodels)
CrossTable(x = data_cleaned$Class[-index], y = prediksi.nn,
prop.chisq=T)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 205
##
##
## | prediksi.nn
## data_cleaned$Class[-index] | benign | malignant | Row Total |
## ---------------------------|-----------|-----------|-----------|
## benign | 131 | 3 | 134 |
## | 20.716 | 39.953 | |
## | 0.978 | 0.022 | 0.654 |
## | 0.970 | 0.043 | |
## | 0.639 | 0.015 | |
## ---------------------------|-----------|-----------|-----------|
## malignant | 4 | 67 | 71 |
## | 39.098 | 75.404 | |
## | 0.056 | 0.944 | 0.346 |
## | 0.030 | 0.957 | |
## | 0.020 | 0.327 | |
## ---------------------------|-----------|-----------|-----------|
## Column Total | 135 | 70 | 205 |
## | 0.659 | 0.341 | |
## ---------------------------|-----------|-----------|-----------|
##
##
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
confusionMatrix(tabel.prediksi)
## Confusion Matrix and Statistics
##
## prediksi.nn
## benign malignant
## benign 131 3
## malignant 4 67
##
## Accuracy : 0.9658537
## 95% CI : (0.9309138, 0.9861632)
## No Information Rate : 0.6585366
## P-Value [Acc > NIR] : <0.0000000000000002
##
## Kappa : 0.9243343
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9703704
## Specificity : 0.9571429
## Pos Pred Value : 0.9776119
## Neg Pred Value : 0.9436620
## Prevalence : 0.6585366
## Detection Rate : 0.6390244
## Detection Prevalence : 0.6536585
## Balanced Accuracy : 0.9637566
##
## 'Positive' Class : benign
##