Import Dataset yang akan digunakan
setwd("D:/STIS/4SE/4. Data Mining/Tugas/Bismillah Dulu")
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.5
Data <- read_excel("dataya.xlsx")
#View(Data)
Meload semua library yang dibutuhkan
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.5
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.5
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(DT)
## Warning: package 'DT' was built under R version 4.0.5
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
library(rpart)
## Warning: package 'rpart' was built under R version 4.0.5
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.0.5
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(e1071)
## Warning: package 'e1071' was built under R version 4.0.5
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 4.0.5
library(ggplot2)
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(smotefamily)
## Warning: package 'smotefamily' was built under R version 4.0.5
dataset<-as.data.frame(Data)
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ V102 : num 2 2 2 2 2 2 2 2 2 2 ...
## $ V106 : num 1 1 1 0 3 1 1 1 3 3 ...
## $ V190 : num 1 1 3 1 5 2 2 1 2 2 ...
## $ V212 : num 21 21 17 22 26 18 18 18 25 25 ...
## $ M14 : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ M19 : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol: num 6 0 9 9 7 9 0 9 4 0 ...
#View(dataset)
##PREPROCESSING Pada tahap awal preprocessing, dilakukan data reduction dengan menyeleksi variabel yang akan digunakan dalam model. Dataset file excel yang kami gunakan sudah melalui tahap data reduction sehingga tahap ini tidak dilakukan lagi. Selanjutnya nama-nama variabel akan diubah untuk mempermudah memahami data.
dataset <- rename(dataset,Wilayah=V102)
dataset <- rename(dataset,Pend=V106)
dataset <- rename(dataset,SK=V190)
dataset <- rename(dataset,Umur=V212)
dataset <- rename(dataset,Ante=M14)
dataset <- rename(dataset,Berat=M19)
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ Wilayah: num 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : num 1 1 1 0 3 1 1 1 3 3 ...
## $ SK : num 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
for(i in 1:3){
dataset[,i] <- as.factor(dataset[,i])}
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 2 1 4 2 2 2 4 4 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
#View(dataset)
Selanjutnya, dilakukan tahapan data discretization yaitu mengklasifikasikan berat bayi lahir ke dalam tiga kategori yaitu BBLR, BBLN, dan BBLL
dataset <- mutate(dataset,BBL=Berat)
for(i in 1:nrow(dataset)){
if (dataset$Berat[i] <= 2500)
{dataset$BBL[i] <- "BBLR"}
else if (dataset$Berat[i] > 2500 && dataset$Berat[i] <= 3999)
{dataset$BBL[i] <- "BBLN"}
else
{dataset$BBL[i] <- "BBLL"}
}
dataset$BBL <- as.factor(dataset$BBL)
str(dataset)
## 'data.frame': 17848 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 2 1 4 2 2 2 4 4 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
## $ BBL : Factor w/ 3 levels "BBLL","BBLN",..: 3 3 2 3 2 1 1 2 1 2 ...
Selanjutnya, semua nilai missing value akan dibuang dan akan dilihat apakah terdapat nilai outlier pada variabel numerik yang digunakan
dataset <- na.omit(dataset)
boxplot(dataset$Umur)
boxplot(dataset$Umur, plot=FALSE)$out #Terlihat bahwa outlier Umur ibu saat melahirkan masih tergolong pada kategori WUS dan dapat dikatakan sebagai usia yang normal. Sehingga, outlier pada variabel ini akan tetap digunakan
## [1] 36 37 38 43 37 37 37 36 35 37 38 37 44 36 35 35 42 36 37 36 46 35 35 36 35
## [26] 35 36 41 36 36 39 36 35 39 37 37 38 35 39 39 35 38 41 42 39 35 35 42 37 35
## [51] 37 39 39 37 35 35 36 44 36 38 42 35 35 37 40 37 42 43 39 41 42 35 41 35 37
## [76] 38 38 42 38 39 42 40 38 40 36 39 39 38 38 35 37 38 39 41 37 35 37 35 37 38
## [101] 35 39 36 38 40 35 38 36 35 37 36 39 41 37 40 36 41 45 40 35 36 36 40 35 36
## [126] 38 41 35 37 36 35 35 42 41 36 37 37 36 35 36 40 38 39 43 35 38 37 37 37 37
## [151] 35 35 44 38 38 37 36 37 37 35 41 36 39 35 36 38 38 35 41 37 35 35 38 37 39
## [176] 35 37 38 38 38 35 35 37 43 42 39 35 41 40 37
boxplot(dataset$Ante)
boxplot(dataset$Ante, plot=FALSE)$out #Outlier pada variabel ini akan dibuang
## [1] 18 21 98 98 98 36 24 26 98 98 18 28 98 98 98 18 98 98 98 18 18 21 98 98 32
## [26] 98 18 32 98 98 98 18 17 17 18 26 17 17 18 17 17 98 17 21 20 19 18 18 17 18
## [51] 18 26 99 25 20 98 21 98 23 21 18 32 20 18 17 98 18 18 25 32 98 20 24 18 18
## [76] 18 19 18 17 22 98 19 17 98 33 98 18 27 98 98 99 18 18 17 40 20 18 18 18 18
## [101] 18 19 19 19 98 17 20 19 17 18 18 98 18 17 20 27 18 17 98 20 20 98 18 18 98
## [126] 21 20 21 17 18 20 17 18 17 98 98 17 99 18 24 24 20 19 20 20 22 18 21 18 20
## [151] 21 18 18 18 20 20 21 28 98 23 98 20 18 19 19 17 29 17 18 17 19 25 17 18 21
## [176] 98 18 24 18 17 32 20 19 20 17 19 21 22 20 18 17 18 18 24 17 21 18 18 21 17
## [201] 17 22 27 18 18 98 28 20 17 17 22 24 24 18 20 17 20 18 98 98 18 98 18 18 18
## [226] 18 98 18 23 98 17 21 98 98 99 98 18 98 98 98 18 27 22 24 32 17 18 17 98 17
## [251] 27 98 20 17 28 98 98 17 18 18 18 17 18 18 18 40 20 20 22 27 98 19 20 18 18
## [276] 17 18 26 28 98 98 98 24 98 98 20 98 22 98 98 27 17 25 17 99 23 18 24 40 22
## [301] 98 98 98 98 18 20 98 23 18 99 98 98 98 98 98 98 98 22 98 98 98 98 98
outliers <- boxplot(dataset$Ante, plot=FALSE)$out
dataset <- dataset[-which(dataset$Ante %in% outliers),]
#View(dataset)
Berdasarkan tujuan penelitian kelompok kami, kami akan menggabungkan BBLR (Berat Bayi Lahir Rendah) dan BBLL (Berat Bayi Lahir Lebih) menjadi satu kategori yaitu BBLTN (Berat Bayi Lahir Tidak Normal)
dataset1 <- dataset
dataset1$BBL <- gsub("BBLL", "BBLTN", dataset1$BBL)
dataset1$BBL <- gsub("BBLR", "BBLTN", dataset1$BBL)
dataset1$BBL <- as.factor(dataset1$BBL)
Untuk melihat apakah dataset yang digunakan merupakan imbalance dataset maka akan dilihat grafik proporsi berat bayi lahir
propBBL<-table(dataset1$BBL)
propBBL
##
## BBLN BBLTN
## 11282 3752
perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
main="Berat Bayi Lahir",
xlab="Label",
ylab="proporsi",
col="brown",
density = 10,
angle = 45,
names.arg = c("BBLN","BBLTN"))
Melihat jumlah data pada masing-masing kelas pada setiap variabel
with(dataset1,
{
print(table(Wilayah));
print(table(Pend));
print(table(SK));
print(table(BBL));
}
)
## Wilayah
## 1 2
## 7384 7650
## Pend
## 0 1 2 3
## 197 3791 8466 2580
## SK
## 1 2 3 4 5
## 4014 2987 2826 2685 2522
## BBL
## BBLN BBLTN
## 11282 3752
Berdasarkan grafik proporsi berat bayi lahir, terlihat bahwa terjadi permasalahan imbalance dataset, sehingga akan dilakukan upsampling untuk mengatasi permasalahan tersebut
dataset3 <- dataset1
str(dataset3)
## 'data.frame': 15034 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 1 4 2 2 4 4 3 2 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 3 1 5 2 1 2 2 1 1 ...
## $ Umur : num 21 17 22 26 18 18 25 36 20 17 ...
## $ Ante : num 6 9 9 7 9 9 4 9 7 4 ...
## $ Berat : num 2100 3000 2500 3400 5000 3000 4700 4100 3500 2800 ...
## $ M14nol : num 6 9 9 7 9 9 4 9 7 4 ...
## $ BBL : Factor w/ 2 levels "BBLN","BBLTN": 2 1 2 1 2 1 2 2 1 1 ...
## - attr(*, "na.action")= 'omit' Named int [1:2491] 2 7 10 12 26 40 43 60 91 93 ...
## ..- attr(*, "names")= chr [1:2491] "2" "7" "10" "12" ...
newdf <- upSample(dataset3[,-8],dataset3$BBL,yname="BBL") #upsampling
str(newdf)
## 'data.frame': 22564 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 4 2 3 2 2 3 3 3 3 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 3 5 1 1 1 2 4 2 4 2 ...
## $ Umur : num 17 26 18 20 17 19 18 27 21 30 ...
## $ Ante : num 9 7 9 7 4 7 4 9 3 8 ...
## $ Berat : num 3000 3400 3000 3500 2800 2800 3000 3500 3000 2800 ...
## $ M14nol : num 9 7 9 7 4 7 4 9 3 8 ...
## $ BBL : Factor w/ 2 levels "BBLN","BBLTN": 1 1 1 1 1 1 1 1 1 1 ...
with(newdf,
{
print(table(Wilayah));
print(table(Pend));
print(table(SK));
print(table(BBL));
}
)
## Wilayah
## 1 2
## 10240 12324
## Pend
## 0 1 2 3
## 428 6370 12246 3520
## SK
## 1 2 3 4 5
## 7284 4328 3958 3666 3328
## BBL
## BBLN BBLTN
## 11282 11282
propBBL<-table(newdf$BBL)
perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
main="Berat Bayi Lahir",
xlab="Label",
ylab="proporsi",
col="brown",
density = 10,
angle = 45,
names.arg = c("BBLN","BBLTN"))
##DATA MINING Membuat Model
attach(newdf)
model <- BBL ~ Pend + SK + Ante + Umur + Wilayah
Pada setiap metode yang digunakan, pembuatan data training dan data testing memakai Splitting Data Train dan Data Test dengan 70:30 dan Cross Validation
set.seed(100)
sampling <- sample(1:nrow(newdf), 0.7*nrow(newdf))
train_set <- newdf[sampling,]
test_set <- newdf[-sampling,]
myControl <- trainControl(
method = "cv",
number = 10,
verboseIter = TRUE
)
##Decision Tree dengan Splitting 70:30
dtree <- rpart(model, data = train_set, method = "class")
rpart.plot(dtree,extra = 106)
pred_dtree <- predict(dtree, newdata = test_set, type = "class")
confusionMatrix(pred_dtree,test_set$BBL)
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2359 1482
## BBLTN 1021 1908
##
## Accuracy : 0.6303
## 95% CI : (0.6187, 0.6418)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2607
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.6979
## Specificity : 0.5628
## Pos Pred Value : 0.6142
## Neg Pred Value : 0.6514
## Prevalence : 0.4993
## Detection Rate : 0.3484
## Detection Prevalence : 0.5674
## Balanced Accuracy : 0.6304
##
## 'Positive' Class : BBLN
##
Decision Tree Cross Validation
dtree_cv <- train(model, data=train_set,
method='rpart',
trControl=myControl)
## + Fold01: cp=0.003833
## - Fold01: cp=0.003833
## + Fold02: cp=0.003833
## - Fold02: cp=0.003833
## + Fold03: cp=0.003833
## - Fold03: cp=0.003833
## + Fold04: cp=0.003833
## - Fold04: cp=0.003833
## + Fold05: cp=0.003833
## - Fold05: cp=0.003833
## + Fold06: cp=0.003833
## - Fold06: cp=0.003833
## + Fold07: cp=0.003833
## - Fold07: cp=0.003833
## + Fold08: cp=0.003833
## - Fold08: cp=0.003833
## + Fold09: cp=0.003833
## - Fold09: cp=0.003833
## + Fold10: cp=0.003833
## - Fold10: cp=0.003833
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00383 on full training set
confusionMatrix(predict(dtree_cv,newdata = test_set)%>% as.factor(),test_set$BBL %>% as.factor())
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2471 1815
## BBLTN 909 1575
##
## Accuracy : 0.5976
## 95% CI : (0.5858, 0.6093)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.1956
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7311
## Specificity : 0.4646
## Pos Pred Value : 0.5765
## Neg Pred Value : 0.6341
## Prevalence : 0.4993
## Detection Rate : 0.3650
## Detection Prevalence : 0.6331
## Balanced Accuracy : 0.5978
##
## 'Positive' Class : BBLN
##
Naive Bayes dengan Splitting 70:30
nb <- naiveBayes(model, data = train_set)
print(nb)
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## BBLN BBLTN
## 0.5003166 0.4996834
##
## Conditional probabilities:
## Pend
## Y 0 1 2 3
## BBLN 0.006833713 0.220956720 0.585674513 0.186535054
## BBLTN 0.030283832 0.345919919 0.499746579 0.124049671
##
## SK
## Y 1 2 3 4 5
## BBLN 0.2158947 0.1979246 0.2003290 0.1974184 0.1884333
## BBLTN 0.4235935 0.1841105 0.1533198 0.1325393 0.1064369
##
## Ante
## Y [,1] [,2]
## BBLN 8.021893 3.041734
## BBLTN 6.422960 3.459346
##
## Umur
## Y [,1] [,2]
## BBLN 22.42356 4.323949
## BBLTN 21.46211 4.415467
##
## Wilayah
## Y 1 2
## BBLN 0.5234118 0.4765882
## BBLTN 0.3857070 0.6142930
pred_nb <- predict(nb, newdata = test_set)
confusionMatrix(pred_nb, test_set$BBL)
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2257 1366
## BBLTN 1123 2024
##
## Accuracy : 0.6323
## 95% CI : (0.6207, 0.6438)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2648
##
## Mcnemar's Test P-Value : 1.23e-06
##
## Sensitivity : 0.6678
## Specificity : 0.5971
## Pos Pred Value : 0.6230
## Neg Pred Value : 0.6432
## Prevalence : 0.4993
## Detection Rate : 0.3334
## Detection Prevalence : 0.5352
## Balanced Accuracy : 0.6324
##
## 'Positive' Class : BBLN
##
Naive Bayes dengan Cross Validation
nb_cv <- train(model, data=train_set,
method='naive_bayes',
trControl=myControl)
## + Fold01: usekernel= TRUE, laplace=0, adjust=1
## - Fold01: usekernel= TRUE, laplace=0, adjust=1
## + Fold01: usekernel=FALSE, laplace=0, adjust=1
## - Fold01: usekernel=FALSE, laplace=0, adjust=1
## + Fold02: usekernel= TRUE, laplace=0, adjust=1
## - Fold02: usekernel= TRUE, laplace=0, adjust=1
## + Fold02: usekernel=FALSE, laplace=0, adjust=1
## - Fold02: usekernel=FALSE, laplace=0, adjust=1
## + Fold03: usekernel= TRUE, laplace=0, adjust=1
## - Fold03: usekernel= TRUE, laplace=0, adjust=1
## + Fold03: usekernel=FALSE, laplace=0, adjust=1
## - Fold03: usekernel=FALSE, laplace=0, adjust=1
## + Fold04: usekernel= TRUE, laplace=0, adjust=1
## - Fold04: usekernel= TRUE, laplace=0, adjust=1
## + Fold04: usekernel=FALSE, laplace=0, adjust=1
## - Fold04: usekernel=FALSE, laplace=0, adjust=1
## + Fold05: usekernel= TRUE, laplace=0, adjust=1
## - Fold05: usekernel= TRUE, laplace=0, adjust=1
## + Fold05: usekernel=FALSE, laplace=0, adjust=1
## - Fold05: usekernel=FALSE, laplace=0, adjust=1
## + Fold06: usekernel= TRUE, laplace=0, adjust=1
## - Fold06: usekernel= TRUE, laplace=0, adjust=1
## + Fold06: usekernel=FALSE, laplace=0, adjust=1
## - Fold06: usekernel=FALSE, laplace=0, adjust=1
## + Fold07: usekernel= TRUE, laplace=0, adjust=1
## - Fold07: usekernel= TRUE, laplace=0, adjust=1
## + Fold07: usekernel=FALSE, laplace=0, adjust=1
## - Fold07: usekernel=FALSE, laplace=0, adjust=1
## + Fold08: usekernel= TRUE, laplace=0, adjust=1
## - Fold08: usekernel= TRUE, laplace=0, adjust=1
## + Fold08: usekernel=FALSE, laplace=0, adjust=1
## - Fold08: usekernel=FALSE, laplace=0, adjust=1
## + Fold09: usekernel= TRUE, laplace=0, adjust=1
## - Fold09: usekernel= TRUE, laplace=0, adjust=1
## + Fold09: usekernel=FALSE, laplace=0, adjust=1
## - Fold09: usekernel=FALSE, laplace=0, adjust=1
## + Fold10: usekernel= TRUE, laplace=0, adjust=1
## - Fold10: usekernel= TRUE, laplace=0, adjust=1
## + Fold10: usekernel=FALSE, laplace=0, adjust=1
## - Fold10: usekernel=FALSE, laplace=0, adjust=1
## Aggregating results
## Selecting tuning parameters
## Fitting laplace = 0, usekernel = FALSE, adjust = 1 on full training set
print(nb_cv)
## Naive Bayes
##
## 15794 samples
## 5 predictor
## 2 classes: 'BBLN', 'BBLTN'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 14215, 14215, 14215, 14215, 14213, 14214, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.6082052 0.2164282
## TRUE 0.5957967 0.1917538
##
## Tuning parameter 'laplace' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were laplace = 0, usekernel = FALSE
## and adjust = 1.
confusionMatrix(predict(nb_cv, newdata=test_set) %>% as.factor(),test_set$BBL %>% as.factor())
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2024 1235
## BBLTN 1356 2155
##
## Accuracy : 0.6173
## 95% CI : (0.6056, 0.6289)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.2345
##
## Mcnemar's Test P-Value : 0.0184
##
## Sensitivity : 0.5988
## Specificity : 0.6357
## Pos Pred Value : 0.6210
## Neg Pred Value : 0.6138
## Prevalence : 0.4993
## Detection Rate : 0.2990
## Detection Prevalence : 0.4814
## Balanced Accuracy : 0.6173
##
## 'Positive' Class : BBLN
##
Lalu akan dibuat kurva ROC untuk membandingkan kedua metode tersebut
par(pty="s")
# ROC untuk dtree
dtreeROC <- roc(ifelse(test_set$BBL == "BBLN", 0, 1),
ifelse(pred_dtree == "BBLN", 0, 1), plot=TRUE, print.auc=TRUE, col="green", lwd =4, legacy.axes=TRUE, main="ROC Curves")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
#ROC untuk naive bayes
nbayesROC <- roc(ifelse(test_set$BBL == "BBLN", 0, 1), ifelse(pred_nb == "BBLN", 0, 1), plot=TRUE, print.auc=TRUE, col="blue", lwd = 4, print.auc.y=0.4, legacy.axes=TRUE, add = TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
legend("bottomright", legend=c("Decision Tree","Naive Bayes"),col=c("green", "blue"),lwd=4)