BISMILLAH YA GES YA

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Import Dataset Kita yagesya

setwd("D:/STIS/4SE/4. Data Mining/Tugas/FIX BENERAN YAK GUA GEBUG LUH")
library(readxl)

## Warning: package 'readxl' was built under R version 4.0.5

Data <- read_excel("datafix.xlsx")
View(Data)

Load Library kita yagesya

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.5

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)

## Warning: package 'tidyr' was built under R version 4.0.5

library(data.table)

## Warning: package 'data.table' was built under R version 4.0.5

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(DT)

## Warning: package 'DT' was built under R version 4.0.5

library(caret)

## Warning: package 'caret' was built under R version 4.0.5

## Loading required package: lattice

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.0.5

library(rpart)

## Warning: package 'rpart' was built under R version 4.0.5

library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 4.0.5

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.0.5

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

## The following object is masked from 'package:dplyr':
## 
##     combine

library(e1071)

## Warning: package 'e1071' was built under R version 4.0.5

library(rmarkdown)

## Warning: package 'rmarkdown' was built under R version 4.0.5

library(ggplot2)
library(pROC)

## Warning: package 'pROC' was built under R version 4.0.5

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(smotefamily)

## Warning: package 'smotefamily' was built under R version 4.0.5

dataset<-as.data.frame(Data)
str(dataset)

## 'data.frame':    2181 obs. of  7 variables:
##  $ BBL        : chr  "BBLR" "BBLR" "BBLR" "BBLR" ...
##  $ PEND       : chr  "Secondary" "Primary" "Primary" "Secondary" ...
##  $ KLASIFIKASI: chr  "Rural" "Rural" "Rural" "Rural" ...
##  $ SK         : chr  "Poorer" "Poorer" "Poorer" "Poorer" ...
##  $ ANTE       : num  9 3 3 9 0 5 1 6 9 8 ...
##  $ UMUR       : num  18 16 23 15 22 21 17 20 27 25 ...
##  $ M19        : chr  "BBLR" "BBLR" "BBLR" "BBLR" ...

#View(dataset)

str(dataset)

## 'data.frame':    2181 obs. of  7 variables:
##  $ BBL        : chr  "BBLR" "BBLR" "BBLR" "BBLR" ...
##  $ PEND       : chr  "Secondary" "Primary" "Primary" "Secondary" ...
##  $ KLASIFIKASI: chr  "Rural" "Rural" "Rural" "Rural" ...
##  $ SK         : chr  "Poorer" "Poorer" "Poorer" "Poorer" ...
##  $ ANTE       : num  9 3 3 9 0 5 1 6 9 8 ...
##  $ UMUR       : num  18 16 23 15 22 21 17 20 27 25 ...
##  $ M19        : chr  "BBLR" "BBLR" "BBLR" "BBLR" ...

for(i in 1:4){
  dataset[,i] <- as.factor(dataset[,i])}
dataset$M19 <- as.factor(dataset$M19)
str(dataset)

## 'data.frame':    2181 obs. of  7 variables:
##  $ BBL        : Factor w/ 2 levels "BBLN","BBLR": 2 2 2 2 2 2 2 2 2 2 ...
##  $ PEND       : Factor w/ 3 levels "Higher","Primary",..: 3 2 2 3 1 3 2 3 3 3 ...
##  $ KLASIFIKASI: Factor w/ 2 levels "Rural","Urban": 1 1 1 1 1 1 1 2 1 2 ...
##  $ SK         : Factor w/ 3 levels "Middle","Poorer",..: 2 2 2 2 2 2 2 2 3 2 ...
##  $ ANTE       : num  9 3 3 9 0 5 1 6 9 8 ...
##  $ UMUR       : num  18 16 23 15 22 21 17 20 27 25 ...
##  $ M19        : Factor w/ 3 levels "BBLL","BBLN",..: 3 3 3 3 3 3 3 3 3 3 ...

View(dataset)

propBBL<-table(dataset$BBL)
propBBL

## 
## BBLN BBLR 
## 1454  727

perc<-round(prop.table(propBBL), digits=2)
barplot(perc,
        main="Berat Bayi Lahir",
        xlab="Label",
        ylab="proporsi",
        col="brown",
        density = 10,
        angle = 45,
        names.arg = c("BBLN","BBLR"))

Membuat Model

attach(dataset)
model <- BBL ~ PEND + KLASIFIKASI + SK + ANTE + UMUR

Membuat Splitting Data Train dan Data Test dengan 70:30 dan Cross Validation

set.seed(100)
sampling <- sample(1:nrow(dataset), 0.7*nrow(dataset))
train_set <- dataset[sampling,]
test_set <- dataset[-sampling,]

myControl <- trainControl(
  method = "cv",
  number = 10,
  verboseIter = TRUE
)

Decision Tree Splitting 70:30

dtree <- rpart(model, data = train_set, method = "class")
rpart.plot(dtree,extra = 106)

pred_dtree <- predict(dtree, newdata = test_set, type = "class")
confusionMatrix(pred_dtree,test_set$BBL)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  399  153
##       BBLR   38   65
##                                          
##                Accuracy : 0.7084         
##                  95% CI : (0.6719, 0.743)
##     No Information Rate : 0.6672         
##     P-Value [Acc > NIR] : 0.01332        
##                                          
##                   Kappa : 0.2434         
##                                          
##  Mcnemar's Test P-Value : < 2e-16        
##                                          
##             Sensitivity : 0.9130         
##             Specificity : 0.2982         
##          Pos Pred Value : 0.7228         
##          Neg Pred Value : 0.6311         
##              Prevalence : 0.6672         
##          Detection Rate : 0.6092         
##    Detection Prevalence : 0.8427         
##       Balanced Accuracy : 0.6056         
##                                          
##        'Positive' Class : BBLN           
##

Decision Tree Cross Validation

dtree_cv <- train(model, data=train_set,
                  method='rpart',
                  trControl=myControl)

## + Fold01: cp=0.003274 
## - Fold01: cp=0.003274 
## + Fold02: cp=0.003274 
## - Fold02: cp=0.003274 
## + Fold03: cp=0.003274 
## - Fold03: cp=0.003274 
## + Fold04: cp=0.003274 
## - Fold04: cp=0.003274 
## + Fold05: cp=0.003274 
## - Fold05: cp=0.003274 
## + Fold06: cp=0.003274 
## - Fold06: cp=0.003274 
## + Fold07: cp=0.003274 
## - Fold07: cp=0.003274 
## + Fold08: cp=0.003274 
## - Fold08: cp=0.003274 
## + Fold09: cp=0.003274 
## - Fold09: cp=0.003274 
## + Fold10: cp=0.003274 
## - Fold10: cp=0.003274 
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00786 on full training set

confusionMatrix(predict(dtree_cv,newdata = test_set)%>% as.factor(),test_set$BBL %>% as.factor())

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  399  153
##       BBLR   38   65
##                                          
##                Accuracy : 0.7084         
##                  95% CI : (0.6719, 0.743)
##     No Information Rate : 0.6672         
##     P-Value [Acc > NIR] : 0.01332        
##                                          
##                   Kappa : 0.2434         
##                                          
##  Mcnemar's Test P-Value : < 2e-16        
##                                          
##             Sensitivity : 0.9130         
##             Specificity : 0.2982         
##          Pos Pred Value : 0.7228         
##          Neg Pred Value : 0.6311         
##              Prevalence : 0.6672         
##          Detection Rate : 0.6092         
##    Detection Prevalence : 0.8427         
##       Balanced Accuracy : 0.6056         
##                                          
##        'Positive' Class : BBLN           
##

#Random Forest dengan Splitting 70:30
rf <- randomForest(model, data = train_set)
print(rf)

## 
## Call:
##  randomForest(formula = model, data = train_set) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 33.22%
## Confusion matrix:
##      BBLN BBLR class.error
## BBLN  879  138   0.1356932
## BBLR  369  140   0.7249509

pred_rf <- predict(rf, newdata = test_set)
confusionMatrix(pred_rf %>% as.factor(), test_set$BBL %>% as.factor())

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  375  142
##       BBLR   62   76
##                                           
##                Accuracy : 0.6885          
##                  95% CI : (0.6515, 0.7239)
##     No Information Rate : 0.6672          
##     P-Value [Acc > NIR] : 0.1312          
##                                           
##                   Kappa : 0.2277          
##                                           
##  Mcnemar's Test P-Value : 3.182e-08       
##                                           
##             Sensitivity : 0.8581          
##             Specificity : 0.3486          
##          Pos Pred Value : 0.7253          
##          Neg Pred Value : 0.5507          
##              Prevalence : 0.6672          
##          Detection Rate : 0.5725          
##    Detection Prevalence : 0.7893          
##       Balanced Accuracy : 0.6034          
##                                           
##        'Positive' Class : BBLN            
##

#Random Forest dengan Cross Validation
rf_cv <- train(model, data=train_set,
               method='rf',
               trControl=myControl)

## + Fold01: mtry=2 
## - Fold01: mtry=2 
## + Fold01: mtry=4 
## - Fold01: mtry=4 
## + Fold01: mtry=7 
## - Fold01: mtry=7 
## + Fold02: mtry=2 
## - Fold02: mtry=2 
## + Fold02: mtry=4 
## - Fold02: mtry=4 
## + Fold02: mtry=7 
## - Fold02: mtry=7 
## + Fold03: mtry=2 
## - Fold03: mtry=2 
## + Fold03: mtry=4 
## - Fold03: mtry=4 
## + Fold03: mtry=7 
## - Fold03: mtry=7 
## + Fold04: mtry=2 
## - Fold04: mtry=2 
## + Fold04: mtry=4 
## - Fold04: mtry=4 
## + Fold04: mtry=7 
## - Fold04: mtry=7 
## + Fold05: mtry=2 
## - Fold05: mtry=2 
## + Fold05: mtry=4 
## - Fold05: mtry=4 
## + Fold05: mtry=7 
## - Fold05: mtry=7 
## + Fold06: mtry=2 
## - Fold06: mtry=2 
## + Fold06: mtry=4 
## - Fold06: mtry=4 
## + Fold06: mtry=7 
## - Fold06: mtry=7 
## + Fold07: mtry=2 
## - Fold07: mtry=2 
## + Fold07: mtry=4 
## - Fold07: mtry=4 
## + Fold07: mtry=7 
## - Fold07: mtry=7 
## + Fold08: mtry=2 
## - Fold08: mtry=2 
## + Fold08: mtry=4 
## - Fold08: mtry=4 
## + Fold08: mtry=7 
## - Fold08: mtry=7 
## + Fold09: mtry=2 
## - Fold09: mtry=2 
## + Fold09: mtry=4 
## - Fold09: mtry=4 
## + Fold09: mtry=7 
## - Fold09: mtry=7 
## + Fold10: mtry=2 
## - Fold10: mtry=2 
## + Fold10: mtry=4 
## - Fold10: mtry=4 
## + Fold10: mtry=7 
## - Fold10: mtry=7 
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 2 on full training set

confusionMatrix(predict(rf_cv, newdata=test_set) %>% as.factor(),test_set$BBL %>% as.factor())

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  398  153
##       BBLR   39   65
##                                           
##                Accuracy : 0.7069          
##                  95% CI : (0.6704, 0.7415)
##     No Information Rate : 0.6672          
##     P-Value [Acc > NIR] : 0.01651         
##                                           
##                   Kappa : 0.2404          
##                                           
##  Mcnemar's Test P-Value : 3.49e-16        
##                                           
##             Sensitivity : 0.9108          
##             Specificity : 0.2982          
##          Pos Pred Value : 0.7223          
##          Neg Pred Value : 0.6250          
##              Prevalence : 0.6672          
##          Detection Rate : 0.6076          
##    Detection Prevalence : 0.8412          
##       Balanced Accuracy : 0.6045          
##                                           
##        'Positive' Class : BBLN            
##

Naive Bayes dengan Splitting 70:30

nb <- naiveBayes(model, data = train_set)
print(nb)

## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##      BBLN      BBLR 
## 0.6664482 0.3335518 
## 
## Conditional probabilities:
##       PEND
## Y          Higher    Primary  Secondary
##   BBLN 0.19370698 0.17404130 0.63225172
##   BBLR 0.07662083 0.32809430 0.59528487
## 
##       KLASIFIKASI
## Y          Rural     Urban
##   BBLN 0.5368732 0.4631268
##   BBLR 0.5717092 0.4282908
## 
##       SK
## Y         Middle    Poorer    Richer
##   BBLN 0.2104228 0.4650934 0.3244838
##   BBLR 0.1984283 0.5992141 0.2023576
## 
##       ANTE
## Y          [,1]     [,2]
##   BBLN 5.590954 3.221153
##   BBLR 5.137525 3.361539
## 
##       UMUR
## Y          [,1]     [,2]
##   BBLN 22.11701 4.287305
##   BBLR 21.50884 4.488658

pred_nb <- predict(nb, newdata = test_set)
confusionMatrix(pred_nb, test_set$BBL)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  393  151
##       BBLR   44   67
##                                           
##                Accuracy : 0.7023          
##                  95% CI : (0.6657, 0.7371)
##     No Information Rate : 0.6672          
##     P-Value [Acc > NIR] : 0.03019         
##                                           
##                   Kappa : 0.2356          
##                                           
##  Mcnemar's Test P-Value : 3.179e-14       
##                                           
##             Sensitivity : 0.8993          
##             Specificity : 0.3073          
##          Pos Pred Value : 0.7224          
##          Neg Pred Value : 0.6036          
##              Prevalence : 0.6672          
##          Detection Rate : 0.6000          
##    Detection Prevalence : 0.8305          
##       Balanced Accuracy : 0.6033          
##                                           
##        'Positive' Class : BBLN            
##

Naive Bayes dengan Cross Validation

nb_cv <- train(model, data=train_set,
               method='naive_bayes',
               trControl=myControl)

## + Fold01: usekernel= TRUE, laplace=0, adjust=1 
## - Fold01: usekernel= TRUE, laplace=0, adjust=1 
## + Fold01: usekernel=FALSE, laplace=0, adjust=1 
## - Fold01: usekernel=FALSE, laplace=0, adjust=1 
## + Fold02: usekernel= TRUE, laplace=0, adjust=1 
## - Fold02: usekernel= TRUE, laplace=0, adjust=1 
## + Fold02: usekernel=FALSE, laplace=0, adjust=1 
## - Fold02: usekernel=FALSE, laplace=0, adjust=1 
## + Fold03: usekernel= TRUE, laplace=0, adjust=1 
## - Fold03: usekernel= TRUE, laplace=0, adjust=1 
## + Fold03: usekernel=FALSE, laplace=0, adjust=1 
## - Fold03: usekernel=FALSE, laplace=0, adjust=1 
## + Fold04: usekernel= TRUE, laplace=0, adjust=1 
## - Fold04: usekernel= TRUE, laplace=0, adjust=1 
## + Fold04: usekernel=FALSE, laplace=0, adjust=1 
## - Fold04: usekernel=FALSE, laplace=0, adjust=1 
## + Fold05: usekernel= TRUE, laplace=0, adjust=1 
## - Fold05: usekernel= TRUE, laplace=0, adjust=1 
## + Fold05: usekernel=FALSE, laplace=0, adjust=1 
## - Fold05: usekernel=FALSE, laplace=0, adjust=1 
## + Fold06: usekernel= TRUE, laplace=0, adjust=1 
## - Fold06: usekernel= TRUE, laplace=0, adjust=1 
## + Fold06: usekernel=FALSE, laplace=0, adjust=1 
## - Fold06: usekernel=FALSE, laplace=0, adjust=1 
## + Fold07: usekernel= TRUE, laplace=0, adjust=1 
## - Fold07: usekernel= TRUE, laplace=0, adjust=1 
## + Fold07: usekernel=FALSE, laplace=0, adjust=1 
## - Fold07: usekernel=FALSE, laplace=0, adjust=1 
## + Fold08: usekernel= TRUE, laplace=0, adjust=1 
## - Fold08: usekernel= TRUE, laplace=0, adjust=1 
## + Fold08: usekernel=FALSE, laplace=0, adjust=1 
## - Fold08: usekernel=FALSE, laplace=0, adjust=1 
## + Fold09: usekernel= TRUE, laplace=0, adjust=1 
## - Fold09: usekernel= TRUE, laplace=0, adjust=1 
## + Fold09: usekernel=FALSE, laplace=0, adjust=1 
## - Fold09: usekernel=FALSE, laplace=0, adjust=1 
## + Fold10: usekernel= TRUE, laplace=0, adjust=1 
## - Fold10: usekernel= TRUE, laplace=0, adjust=1 
## + Fold10: usekernel=FALSE, laplace=0, adjust=1 
## - Fold10: usekernel=FALSE, laplace=0, adjust=1 
## Aggregating results
## Selecting tuning parameters
## Fitting laplace = 0, usekernel = TRUE, adjust = 1 on full training set

print(nb_cv)

## Naive Bayes 
## 
## 1526 samples
##    5 predictor
##    2 classes: 'BBLN', 'BBLR' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1373, 1373, 1373, 1375, 1374, 1373, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE      0.6710889  0.17381701
##    TRUE      0.6723441  0.04392668
## 
## Tuning parameter 'laplace' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were laplace = 0, usekernel = TRUE
##  and adjust = 1.

confusionMatrix(predict(nb_cv, newdata=test_set) %>% as.factor(),test_set$BBL %>% as.factor())

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction BBLN BBLR
##       BBLN  426  204
##       BBLR   11   14
##                                           
##                Accuracy : 0.6718          
##                  95% CI : (0.6343, 0.7076)
##     No Information Rate : 0.6672          
##     P-Value [Acc > NIR] : 0.4196          
##                                           
##                   Kappa : 0.0502          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.97483         
##             Specificity : 0.06422         
##          Pos Pred Value : 0.67619         
##          Neg Pred Value : 0.56000         
##              Prevalence : 0.66718         
##          Detection Rate : 0.65038         
##    Detection Prevalence : 0.96183         
##       Balanced Accuracy : 0.51952         
##                                           
##        'Positive' Class : BBLN            
##

BISMILLAH YA GES YA

Kelompok Bitcoin

11/29/2021