Import Dataset Kita yagesya
setwd("D:/STIS/4SE/4. Data Mining/Tugas/Bismillah Dulu")
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.5
Data <- read_excel("dataya.xlsx")
#View(Data)
Load Library kita yagesya
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.5
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.5
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(DT)
## Warning: package 'DT' was built under R version 4.0.5
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
library(rpart)
## Warning: package 'rpart' was built under R version 4.0.5
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.0.5
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(e1071)
## Warning: package 'e1071' was built under R version 4.0.5
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 4.0.5
library(ggplot2)
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(smotefamily)
## Warning: package 'smotefamily' was built under R version 4.0.5
dataset<-as.data.frame(Data)
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ V102 : num 2 2 2 2 2 2 2 2 2 2 ...
## $ V106 : num 1 1 1 0 3 1 1 1 3 3 ...
## $ V190 : num 1 1 3 1 5 2 2 1 2 2 ...
## $ V212 : num 21 21 17 22 26 18 18 18 25 25 ...
## $ M14 : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ M19 : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol: num 6 0 9 9 7 9 0 9 4 0 ...
#View(dataset)
dataset <- rename(dataset,Wilayah=V102)
dataset <- rename(dataset,Pend=V106)
dataset <- rename(dataset,SK=V190)
dataset <- rename(dataset,Umur=V212)
dataset <- rename(dataset,Ante=M14)
dataset <- rename(dataset,Berat=M19)
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ Wilayah: num 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : num 1 1 1 0 3 1 1 1 3 3 ...
## $ SK : num 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
for(i in 1:3){
dataset[,i] <- as.factor(dataset[,i])}
str(dataset)
## 'data.frame': 17848 obs. of 7 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 2 1 4 2 2 2 4 4 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
#View(dataset)
dataset <- mutate(dataset,BBL=Berat)
for(i in 1:nrow(dataset)){
if (dataset$Berat[i] <= 2500)
{dataset$BBL[i] <- "BBLR"}
else if (dataset$Berat[i] > 2500 && dataset$Berat[i] <= 3999)
{dataset$BBL[i] <- "BBLN"}
else
{dataset$BBL[i] <- "BBLL"}
}
dataset$BBL <- as.factor(dataset$BBL)
str(dataset)
## 'data.frame': 17848 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 2 1 4 2 2 2 4 4 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 1 3 1 5 2 2 1 2 2 ...
## $ Umur : num 21 21 17 22 26 18 18 18 25 25 ...
## $ Ante : num 6 NA 9 9 7 9 NA 9 4 NA ...
## $ Berat : num 2100 2000 3000 2500 3400 5000 4500 3000 4700 3600 ...
## $ M14nol : num 6 0 9 9 7 9 0 9 4 0 ...
## $ BBL : Factor w/ 3 levels "BBLL","BBLN",..: 3 3 2 3 2 1 1 2 1 2 ...
buang missing value & cek outlier
dataset <- na.omit(dataset)
boxplot(dataset$Umur)
boxplot(dataset$Ante)
boxplot(dataset$Umur, plot=FALSE)$out
## [1] 36 37 38 43 37 37 37 36 35 37 38 37 44 36 35 35 42 36 37 36 46 35 35 36 35
## [26] 35 36 41 36 36 39 36 35 39 37 37 38 35 39 39 35 38 41 42 39 35 35 42 37 35
## [51] 37 39 39 37 35 35 36 44 36 38 42 35 35 37 40 37 42 43 39 41 42 35 41 35 37
## [76] 38 38 42 38 39 42 40 38 40 36 39 39 38 38 35 37 38 39 41 37 35 37 35 37 38
## [101] 35 39 36 38 40 35 38 36 35 37 36 39 41 37 40 36 41 45 40 35 36 36 40 35 36
## [126] 38 41 35 37 36 35 35 42 41 36 37 37 36 35 36 40 38 39 43 35 38 37 37 37 37
## [151] 35 35 44 38 38 37 36 37 37 35 41 36 39 35 36 38 38 35 41 37 35 35 38 37 39
## [176] 35 37 38 38 38 35 35 37 43 42 39 35 41 40 37
boxplot(dataset$Ante, plot=FALSE)$out
## [1] 18 21 98 98 98 36 24 26 98 98 18 28 98 98 98 18 98 98 98 18 18 21 98 98 32
## [26] 98 18 32 98 98 98 18 17 17 18 26 17 17 18 17 17 98 17 21 20 19 18 18 17 18
## [51] 18 26 99 25 20 98 21 98 23 21 18 32 20 18 17 98 18 18 25 32 98 20 24 18 18
## [76] 18 19 18 17 22 98 19 17 98 33 98 18 27 98 98 99 18 18 17 40 20 18 18 18 18
## [101] 18 19 19 19 98 17 20 19 17 18 18 98 18 17 20 27 18 17 98 20 20 98 18 18 98
## [126] 21 20 21 17 18 20 17 18 17 98 98 17 99 18 24 24 20 19 20 20 22 18 21 18 20
## [151] 21 18 18 18 20 20 21 28 98 23 98 20 18 19 19 17 29 17 18 17 19 25 17 18 21
## [176] 98 18 24 18 17 32 20 19 20 17 19 21 22 20 18 17 18 18 24 17 21 18 18 21 17
## [201] 17 22 27 18 18 98 28 20 17 17 22 24 24 18 20 17 20 18 98 98 18 98 18 18 18
## [226] 18 98 18 23 98 17 21 98 98 99 98 18 98 98 98 18 27 22 24 32 17 18 17 98 17
## [251] 27 98 20 17 28 98 98 17 18 18 18 17 18 18 18 40 20 20 22 27 98 19 20 18 18
## [276] 17 18 26 28 98 98 98 24 98 98 20 98 22 98 98 27 17 25 17 99 23 18 24 40 22
## [301] 98 98 98 98 18 20 98 23 18 99 98 98 98 98 98 98 98 22 98 98 98 98 98
outliers <- boxplot(dataset$Ante, plot=FALSE)$out
dataset <- dataset[-which(dataset$Ante %in% outliers),]
#View(dataset)
============================ DATASET1 COBA
dataset1 <- dataset
dataset1$BBL <- gsub("BBLL", "BBLTN", dataset1$BBL)
dataset1$BBL <- gsub("BBLR", "BBLTN", dataset1$BBL)
dataset1$BBL <- as.factor(dataset1$BBL)
propBBL<-table(dataset1$BBL)
propBBL
##
## BBLN BBLTN
## 11282 3752
perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
main="Berat Bayi Lahir",
xlab="Label",
ylab="proporsi",
col="brown",
density = 10,
angle = 45,
names.arg = c("BBLN","BBLTN"))
with(dataset1,
{
print(table(Wilayah));
print(table(Pend));
print(table(SK));
print(table(BBL));
}
)
## Wilayah
## 1 2
## 7384 7650
## Pend
## 0 1 2 3
## 197 3791 8466 2580
## SK
## 1 2 3 4 5
## 4014 2987 2826 2685 2522
## BBL
## BBLN BBLTN
## 11282 3752
# dataset1 <- na.omit(dataset1)
#BBLN <- which(dataset1$BBL == "BBLN")
#BBLTN <- which(dataset1$BBL =="BBLTN")
#length(BBLN)
#length(BBLTN)
#BBLN.downsample <- sample(BBLN,length(BBLTN))
#dataset1.down <- dataset1[c(BBLN.downsample,BBLTN),]
#View(dataset1.down)
#str(dataset1.down)
library(caret)
dataset3 <- dataset1
str(dataset3)
## 'data.frame': 15034 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 2 1 4 2 2 4 4 3 2 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 1 3 1 5 2 1 2 2 1 1 ...
## $ Umur : num 21 17 22 26 18 18 25 36 20 17 ...
## $ Ante : num 6 9 9 7 9 9 4 9 7 4 ...
## $ Berat : num 2100 3000 2500 3400 5000 3000 4700 4100 3500 2800 ...
## $ M14nol : num 6 9 9 7 9 9 4 9 7 4 ...
## $ BBL : Factor w/ 2 levels "BBLN","BBLTN": 2 1 2 1 2 1 2 2 1 1 ...
## - attr(*, "na.action")= 'omit' Named int [1:2491] 2 7 10 12 26 40 43 60 91 93 ...
## ..- attr(*, "names")= chr [1:2491] "2" "7" "10" "12" ...
propBBL<-table(dataset3$BBL)
propBBL
##
## BBLN BBLTN
## 11282 3752
perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
main="Berat Bayi Lahir",
xlab="Label",
ylab="proporsi",
col="brown",
density = 10,
angle = 45,
names.arg = c("BBLN","BBLTN"))
newdf <- upSample(dataset3[,-8],dataset3$BBL,yname="BBL")
str(newdf)
## 'data.frame': 22564 obs. of 8 variables:
## $ Wilayah: Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ Pend : Factor w/ 4 levels "0","1","2","3": 2 4 2 3 2 2 3 3 3 3 ...
## $ SK : Factor w/ 5 levels "1","2","3","4",..: 3 5 1 1 1 2 4 2 4 2 ...
## $ Umur : num 17 26 18 20 17 19 18 27 21 30 ...
## $ Ante : num 9 7 9 7 4 7 4 9 3 8 ...
## $ Berat : num 3000 3400 3000 3500 2800 2800 3000 3500 3000 2800 ...
## $ M14nol : num 9 7 9 7 4 7 4 9 3 8 ...
## $ BBL : Factor w/ 2 levels "BBLN","BBLTN": 1 1 1 1 1 1 1 1 1 1 ...
with(newdf,
{
print(table(Wilayah));
print(table(Pend));
print(table(SK));
print(table(BBL));
}
)
## Wilayah
## 1 2
## 10200 12364
## Pend
## 0 1 2 3
## 450 6365 12208 3541
## SK
## 1 2 3 4 5
## 7212 4442 3961 3671 3278
## BBL
## BBLN BBLTN
## 11282 11282
propBBL<-table(newdf$BBL)
propBBL
##
## BBLN BBLTN
## 11282 11282
perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
main="Berat Bayi Lahir",
xlab="Label",
ylab="proporsi",
col="brown",
density = 10,
angle = 45,
names.arg = c("BBLN","BBLTN"))
===============================
===============================================##########################=====================
#propBBL<-table(dataset$BBL)
#propBBL
#perc<-round(prop.table(propBBL), digits=2)
#barplot(perc,
# main="Berat Bayi Lahir",
# xlab="Label",
# ylab="proporsi",
# col="brown",
# density = 10,
# angle = 45,
# names.arg = c("BBLL","BBLN","BBLR"))
=======================================##############################=============================
Membuat Model
attach(newdf)
model <- BBL ~ Pend + SK + Ante + Umur + Wilayah
Membuat Splitting Data Train dan Data Test dengan 70:30 dan Cross Validation
set.seed(100)
sampling <- sample(1:nrow(newdf), 0.7*nrow(newdf))
train_set <- newdf[sampling,]
test_set <- newdf[-sampling,]
myControl <- trainControl(
method = "cv",
number = 10,
verboseIter = TRUE
)
Decision Tree Splitting 70:30
dtree <- rpart(model, data = train_set, method = "class")
rpart.plot(dtree,extra = 106)
pred_dtree <- predict(dtree, newdata = test_set, type = "class")
confusionMatrix(pred_dtree,test_set$BBL)
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2594 1782
## BBLTN 786 1608
##
## Accuracy : 0.6207
## 95% CI : (0.609, 0.6323)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2417
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7675
## Specificity : 0.4743
## Pos Pred Value : 0.5928
## Neg Pred Value : 0.6717
## Prevalence : 0.4993
## Detection Rate : 0.3832
## Detection Prevalence : 0.6464
## Balanced Accuracy : 0.6209
##
## 'Positive' Class : BBLN
##
Decision Tree Cross Validation
dtree_cv <- train(model, data=train_set,
method='rpart',
trControl=myControl)
## + Fold01: cp=0.005406
## - Fold01: cp=0.005406
## + Fold02: cp=0.005406
## - Fold02: cp=0.005406
## + Fold03: cp=0.005406
## - Fold03: cp=0.005406
## + Fold04: cp=0.005406
## - Fold04: cp=0.005406
## + Fold05: cp=0.005406
## - Fold05: cp=0.005406
## + Fold06: cp=0.005406
## - Fold06: cp=0.005406
## + Fold07: cp=0.005406
## - Fold07: cp=0.005406
## + Fold08: cp=0.005406
## - Fold08: cp=0.005406
## + Fold09: cp=0.005406
## - Fold09: cp=0.005406
## + Fold10: cp=0.005406
## - Fold10: cp=0.005406
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00541 on full training set
confusionMatrix(predict(dtree_cv,newdata = test_set)%>% as.factor(),test_set$BBL %>% as.factor())
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2315 1682
## BBLTN 1065 1708
##
## Accuracy : 0.5942
## 95% CI : (0.5824, 0.606)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.1887
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.6849
## Specificity : 0.5038
## Pos Pred Value : 0.5792
## Neg Pred Value : 0.6159
## Prevalence : 0.4993
## Detection Rate : 0.3419
## Detection Prevalence : 0.5904
## Balanced Accuracy : 0.5944
##
## 'Positive' Class : BBLN
##
#Random Forest dengan Splitting 70:30
#rf <- randomForest(model, data = train_set)
#print(rf)
#pred_rf <- predict(rf, newdata = test_set)
#confusionMatrix(pred_rf %>% as.factor(), test_set$BBL %>% as.factor())
#Random Forest dengan Cross Validation
#rf_cv <- train(model, data=train_set,
# method='rf',
# trControl=myControl)
#confusionMatrix(predict(rf_cv, newdata=test_set) %>% as.factor(),test_set$BBL %>% as.factor())
Naive Bayes dengan Splitting 70:30
nb <- naiveBayes(model, data = train_set)
print(nb)
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## BBLN BBLTN
## 0.5003166 0.4996834
##
## Conditional probabilities:
## Pend
## Y 0 1 2 3
## BBLN 0.006833713 0.220956720 0.585674513 0.186535054
## BBLTN 0.033831728 0.348454131 0.495818550 0.121895590
##
## SK
## Y 1 2 3 4 5
## BBLN 0.21589471 0.19792458 0.20032903 0.19741838 0.18843331
## BBLTN 0.42372022 0.19386721 0.15534719 0.12886467 0.09820071
##
## Ante
## Y [,1] [,2]
## BBLN 8.021893 3.041734
## BBLTN 6.375697 3.437017
##
## Umur
## Y [,1] [,2]
## BBLN 22.42356 4.323949
## BBLTN 21.48961 4.427969
##
## Wilayah
## Y 1 2
## BBLN 0.5234118 0.4765882
## BBLTN 0.3767106 0.6232894
pred_nb <- predict(nb, newdata = test_set)
confusionMatrix(pred_nb, test_set$BBL)
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 2232 1406
## BBLTN 1148 1984
##
## Accuracy : 0.6227
## 95% CI : (0.6111, 0.6343)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2456
##
## Mcnemar's Test P-Value : 3.669e-07
##
## Sensitivity : 0.6604
## Specificity : 0.5853
## Pos Pred Value : 0.6135
## Neg Pred Value : 0.6335
## Prevalence : 0.4993
## Detection Rate : 0.3297
## Detection Prevalence : 0.5374
## Balanced Accuracy : 0.6228
##
## 'Positive' Class : BBLN
##
Naive Bayes dengan Cross Validation
nb_cv <- train(model, data=train_set,
method='naive_bayes',
trControl=myControl)
## + Fold01: usekernel= TRUE, laplace=0, adjust=1
## - Fold01: usekernel= TRUE, laplace=0, adjust=1
## + Fold01: usekernel=FALSE, laplace=0, adjust=1
## - Fold01: usekernel=FALSE, laplace=0, adjust=1
## + Fold02: usekernel= TRUE, laplace=0, adjust=1
## - Fold02: usekernel= TRUE, laplace=0, adjust=1
## + Fold02: usekernel=FALSE, laplace=0, adjust=1
## - Fold02: usekernel=FALSE, laplace=0, adjust=1
## + Fold03: usekernel= TRUE, laplace=0, adjust=1
## - Fold03: usekernel= TRUE, laplace=0, adjust=1
## + Fold03: usekernel=FALSE, laplace=0, adjust=1
## - Fold03: usekernel=FALSE, laplace=0, adjust=1
## + Fold04: usekernel= TRUE, laplace=0, adjust=1
## - Fold04: usekernel= TRUE, laplace=0, adjust=1
## + Fold04: usekernel=FALSE, laplace=0, adjust=1
## - Fold04: usekernel=FALSE, laplace=0, adjust=1
## + Fold05: usekernel= TRUE, laplace=0, adjust=1
## - Fold05: usekernel= TRUE, laplace=0, adjust=1
## + Fold05: usekernel=FALSE, laplace=0, adjust=1
## - Fold05: usekernel=FALSE, laplace=0, adjust=1
## + Fold06: usekernel= TRUE, laplace=0, adjust=1
## - Fold06: usekernel= TRUE, laplace=0, adjust=1
## + Fold06: usekernel=FALSE, laplace=0, adjust=1
## - Fold06: usekernel=FALSE, laplace=0, adjust=1
## + Fold07: usekernel= TRUE, laplace=0, adjust=1
## - Fold07: usekernel= TRUE, laplace=0, adjust=1
## + Fold07: usekernel=FALSE, laplace=0, adjust=1
## - Fold07: usekernel=FALSE, laplace=0, adjust=1
## + Fold08: usekernel= TRUE, laplace=0, adjust=1
## - Fold08: usekernel= TRUE, laplace=0, adjust=1
## + Fold08: usekernel=FALSE, laplace=0, adjust=1
## - Fold08: usekernel=FALSE, laplace=0, adjust=1
## + Fold09: usekernel= TRUE, laplace=0, adjust=1
## - Fold09: usekernel= TRUE, laplace=0, adjust=1
## + Fold09: usekernel=FALSE, laplace=0, adjust=1
## - Fold09: usekernel=FALSE, laplace=0, adjust=1
## + Fold10: usekernel= TRUE, laplace=0, adjust=1
## - Fold10: usekernel= TRUE, laplace=0, adjust=1
## + Fold10: usekernel=FALSE, laplace=0, adjust=1
## - Fold10: usekernel=FALSE, laplace=0, adjust=1
## Aggregating results
## Selecting tuning parameters
## Fitting laplace = 0, usekernel = FALSE, adjust = 1 on full training set
print(nb_cv)
## Naive Bayes
##
## 15794 samples
## 5 predictor
## 2 classes: 'BBLN', 'BBLTN'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 14215, 14215, 14215, 14215, 14213, 14214, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.6080146 0.2160635
## TRUE 0.5967474 0.1936645
##
## Tuning parameter 'laplace' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were laplace = 0, usekernel = FALSE
## and adjust = 1.
confusionMatrix(predict(nb_cv, newdata=test_set) %>% as.factor(),test_set$BBL %>% as.factor())
## Confusion Matrix and Statistics
##
## Reference
## Prediction BBLN BBLTN
## BBLN 1988 1237
## BBLTN 1392 2153
##
## Accuracy : 0.6117
## 95% CI : (0.5999, 0.6233)
## No Information Rate : 0.5007
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2233
##
## Mcnemar's Test P-Value : 0.002669
##
## Sensitivity : 0.5882
## Specificity : 0.6351
## Pos Pred Value : 0.6164
## Neg Pred Value : 0.6073
## Prevalence : 0.4993
## Detection Rate : 0.2936
## Detection Prevalence : 0.4764
## Balanced Accuracy : 0.6116
##
## 'Positive' Class : BBLN
##
Membuat Kurva ROC
par(pty="s")
# ROC untuk dtree
dtreeROC <- roc(ifelse(test_set$BBL == "BBLN", 0, 1),
ifelse(pred_dtree == "BBLN", 0, 1), plot=TRUE, print.auc=TRUE, col="green", lwd =4, legacy.axes=TRUE, main="ROC Curves")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
#ROC untuk naive bayes
nbayesROC <- roc(ifelse(test_set$BBL == "BBLN", 0, 1), ifelse(pred_nb == "BBLN", 0, 1), plot=TRUE, print.auc=TRUE, col="blue", lwd = 4, print.auc.y=0.4, legacy.axes=TRUE, add = TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
legend("bottomright", legend=c("Decision Tree","Naive Bayes"),col=c("green", "blue"),lwd=4)