library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_tb <- read_excel("D:/FINAL TA/SUMBER REFRENSI/TA.xlsx", sheet = 1)
head(data_tb)
## # A tibble: 6 × 11
## NO `TANGGAL MASUK` `JENIS KELAMIN` `UMUR TAHUN` `KEADAAN KELUAR` PENYAKIT
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 18/02/2024 1 124 1 1
## 2 2 21/02/2024 1 23 0 1
## 3 3 19/04/2024 1 62 0 1
## 4 4 20/04/2024 1 42 0 1
## 5 5 29/04/2024 1 71 0 1
## 6 6 45478 1 63 0 1
## # ℹ 5 more variables: `LAMA RAWAT INAP` <dbl>, `PENYAKIT PENYERTA` <chr>,
## # LEOKOSIT <chr>, TROMBOSIT <chr>, HEMOGLOBIN <chr>
str(data_tb)
## tibble [322 × 11] (S3: tbl_df/tbl/data.frame)
## $ NO : num [1:322] 1 2 3 4 5 6 7 8 9 10 ...
## $ TANGGAL MASUK : chr [1:322] "18/02/2024" "21/02/2024" "19/04/2024" "20/04/2024" ...
## $ JENIS KELAMIN : num [1:322] 1 1 1 1 1 1 0 0 0 1 ...
## $ UMUR TAHUN : num [1:322] 124 23 62 42 71 63 51 42 27 66 ...
## $ KEADAAN KELUAR : num [1:322] 1 0 0 0 0 0 0 0 0 0 ...
## $ PENYAKIT : num [1:322] 1 1 1 1 1 1 1 1 1 1 ...
## $ LAMA RAWAT INAP : num [1:322] 3 1 3 3 3 3 4 7 3 5 ...
## $ PENYAKIT PENYERTA: chr [1:322] "0" "0" "0" "0" ...
## $ LEOKOSIT : chr [1:322] "0" "0" "0" "1" ...
## $ TROMBOSIT : chr [1:322] "435" "314" "478" "538" ...
## $ HEMOGLOBIN : chr [1:322] "1" "0" "1" "1" ...
data_tb <- na.omit(data_tb)
colnames(data_tb) <- c(
"NO",
"TANGGAL_MASUK",
"JENIS_KELAMIN",
"UMUR_TAHUN",
"KEADAAN_KELUAR",
"PENYAKIT",
"LAMA_RAWAT_INAP",
"PENYAKIT_PENYERTA",
"LEOKOSIT",
"TROMBOSIT",
"HEMOGLOBIN"
)
data_tb <- data_tb %>%
mutate(KODE_TROMBOSIT = ifelse(TROMBOSIT >= 150 & TROMBOSIT <= 450, 0, 1))
data_tb <- data_tb %>%
mutate(KODE_LAMA_RAWAT = ifelse(LAMA_RAWAT_INAP <= 5, 0, 1))
data_tb <- data_tb %>%
mutate(KODE_PENYERTA = ifelse(PENYAKIT_PENYERTA == 0, 0, 1))
data_tb$KEADAAN_KELUAR <- factor(data_tb$KEADAAN_KELUAR,
levels = c(0,1),
labels = c("HIDUP","MENINGGAL"))
data_tb$PENYAKIT <- factor(data_tb$PENYAKIT,
levels = c(0,1,2),
labels = c("TB_PARU","TB_PLEURA","TB_LAINNYA"))
data_tb$JENIS_KELAMIN <- factor(data_tb$JENIS_KELAMIN,
levels = c(0,1),
labels = c("P","L"))
summary(data_tb)
## NO TANGGAL_MASUK JENIS_KELAMIN UMUR_TAHUN
## Min. : 1.00 Length:322 P:109 Min. : 1.0
## 1st Qu.: 81.25 Class :character L:213 1st Qu.: 41.0
## Median :161.50 Mode :character Median : 54.0
## Mean :161.50 Mean : 51.4
## 3rd Qu.:241.75 3rd Qu.: 64.0
## Max. :322.00 Max. :124.0
## KEADAAN_KELUAR PENYAKIT LAMA_RAWAT_INAP PENYAKIT_PENYERTA
## HIDUP :291 TB_PARU :265 Min. : 1.000 Length:322
## MENINGGAL: 31 TB_PLEURA : 36 1st Qu.: 3.000 Class :character
## TB_LAINNYA: 21 Median : 4.000 Mode :character
## Mean : 4.339
## 3rd Qu.: 5.000
## Max. :15.000
## LEOKOSIT TROMBOSIT HEMOGLOBIN KODE_TROMBOSIT
## Length:322 Length:322 Length:322 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Mode :character Median :0.0000
## Mean :0.3292
## 3rd Qu.:1.0000
## Max. :1.0000
## KODE_LAMA_RAWAT KODE_PENYERTA
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.000
## Mean :0.1429 Mean :0.205
## 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.000
str(data_tb)
## tibble [322 × 14] (S3: tbl_df/tbl/data.frame)
## $ NO : num [1:322] 1 2 3 4 5 6 7 8 9 10 ...
## $ TANGGAL_MASUK : chr [1:322] "18/02/2024" "21/02/2024" "19/04/2024" "20/04/2024" ...
## $ JENIS_KELAMIN : Factor w/ 2 levels "P","L": 2 2 2 2 2 2 1 1 1 2 ...
## $ UMUR_TAHUN : num [1:322] 124 23 62 42 71 63 51 42 27 66 ...
## $ KEADAAN_KELUAR : Factor w/ 2 levels "HIDUP","MENINGGAL": 2 1 1 1 1 1 1 1 1 1 ...
## $ PENYAKIT : Factor w/ 3 levels "TB_PARU","TB_PLEURA",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LAMA_RAWAT_INAP : num [1:322] 3 1 3 3 3 3 4 7 3 5 ...
## $ PENYAKIT_PENYERTA: chr [1:322] "0" "0" "0" "0" ...
## $ LEOKOSIT : chr [1:322] "0" "0" "0" "1" ...
## $ TROMBOSIT : chr [1:322] "435" "314" "478" "538" ...
## $ HEMOGLOBIN : chr [1:322] "1" "0" "1" "1" ...
## $ KODE_TROMBOSIT : num [1:322] 0 0 1 1 0 1 1 1 1 0 ...
## $ KODE_LAMA_RAWAT : num [1:322] 0 0 0 0 0 0 0 1 0 0 ...
## $ KODE_PENYERTA : num [1:322] 0 0 0 0 1 1 0 0 0 0 ...
colSums(is.na(data_tb))
## NO TANGGAL_MASUK JENIS_KELAMIN UMUR_TAHUN
## 0 0 0 0
## KEADAAN_KELUAR PENYAKIT LAMA_RAWAT_INAP PENYAKIT_PENYERTA
## 0 0 0 0
## LEOKOSIT TROMBOSIT HEMOGLOBIN KODE_TROMBOSIT
## 0 0 0 0
## KODE_LAMA_RAWAT KODE_PENYERTA
## 0 0
View(data_tb)
# Frekuensi jenis TBC
tb_freq <- table(data_tb$PENYAKIT)
# Persentase
tb_percent <- prop.table(tb_freq) * 100
# Gabungkan
tb_deskriptif <- data.frame(
Jenis_TBC = names(tb_freq),
N = as.vector(tb_freq),
Persen = round(as.vector(tb_percent),2)
)
tb_deskriptif
## Jenis_TBC N Persen
## 1 TB_PARU 265 82.30
## 2 TB_PLEURA 36 11.18
## 3 TB_LAINNYA 21 6.52
pie(tb_freq,
main = "Diagram Lingkaran Jenis TBC",
col = c("orange","green","yellow"))

data_tb <- data_tb %>%
mutate(
USIA_KATEGORI = ifelse(UMUR_TAHUN <= 45, "≤45", ">45")
)
library(dplyr)
deskriptif <- function(data, var){
freq <- table(data[[var]])
persen <- prop.table(freq) * 100
hasil <- data.frame(
Variabel = var,
Kategori = names(freq),
N = as.vector(freq),
Persen = round(as.vector(persen), 2)
)
return(hasil)
}
d1 <- deskriptif(data_tb, "JENIS_KELAMIN")
d2 <- deskriptif(data_tb, "USIA_KATEGORI")
d3 <- deskriptif(data_tb, "LEOKOSIT")
d4 <- deskriptif(data_tb, "KODE_TROMBOSIT")
d5 <- deskriptif(data_tb, "HEMOGLOBIN")
d6 <- deskriptif(data_tb, "KODE_PENYERTA")
d7 <- deskriptif(data_tb, "KODE_LAMA_RAWAT")
d8 <- deskriptif(data_tb, "KEADAAN_KELUAR")
tabel_deskriptif <- bind_rows(d1,d2,d3,d4,d5,d6,d7,d8)
tabel_deskriptif
## Variabel Kategori N Persen
## 1 JENIS_KELAMIN P 109 33.85
## 2 JENIS_KELAMIN L 213 66.15
## 3 USIA_KATEGORI >45 213 66.15
## 4 USIA_KATEGORI ≤45 109 33.85
## 5 LEOKOSIT - 14 4.35
## 6 LEOKOSIT 0 139 43.17
## 7 LEOKOSIT 1 169 52.48
## 8 KODE_TROMBOSIT 0 216 67.08
## 9 KODE_TROMBOSIT 1 106 32.92
## 10 HEMOGLOBIN - 39 12.11
## 11 HEMOGLOBIN 0 120 37.27
## 12 HEMOGLOBIN 1 163 50.62
## 13 KODE_PENYERTA 0 256 79.50
## 14 KODE_PENYERTA 1 66 20.50
## 15 KODE_LAMA_RAWAT 0 276 85.71
## 16 KODE_LAMA_RAWAT 1 46 14.29
## 17 KEADAAN_KELUAR HIDUP 291 90.37
## 18 KEADAAN_KELUAR MENINGGAL 31 9.63
data_no_na <- na.omit(data_tb)
head(data_tb)
## # A tibble: 6 × 15
## NO TANGGAL_MASUK JENIS_KELAMIN UMUR_TAHUN KEADAAN_KELUAR PENYAKIT
## <dbl> <chr> <fct> <dbl> <fct> <fct>
## 1 1 18/02/2024 L 124 MENINGGAL TB_PLEURA
## 2 2 21/02/2024 L 23 HIDUP TB_PLEURA
## 3 3 19/04/2024 L 62 HIDUP TB_PLEURA
## 4 4 20/04/2024 L 42 HIDUP TB_PLEURA
## 5 5 29/04/2024 L 71 HIDUP TB_PLEURA
## 6 6 45478 L 63 HIDUP TB_PLEURA
## # ℹ 9 more variables: LAMA_RAWAT_INAP <dbl>, PENYAKIT_PENYERTA <chr>,
## # LEOKOSIT <chr>, TROMBOSIT <chr>, HEMOGLOBIN <chr>, KODE_TROMBOSIT <dbl>,
## # KODE_LAMA_RAWAT <dbl>, KODE_PENYERTA <dbl>, USIA_KATEGORI <chr>
str(data_tb)
## tibble [322 × 15] (S3: tbl_df/tbl/data.frame)
## $ NO : num [1:322] 1 2 3 4 5 6 7 8 9 10 ...
## $ TANGGAL_MASUK : chr [1:322] "18/02/2024" "21/02/2024" "19/04/2024" "20/04/2024" ...
## $ JENIS_KELAMIN : Factor w/ 2 levels "P","L": 2 2 2 2 2 2 1 1 1 2 ...
## $ UMUR_TAHUN : num [1:322] 124 23 62 42 71 63 51 42 27 66 ...
## $ KEADAAN_KELUAR : Factor w/ 2 levels "HIDUP","MENINGGAL": 2 1 1 1 1 1 1 1 1 1 ...
## $ PENYAKIT : Factor w/ 3 levels "TB_PARU","TB_PLEURA",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LAMA_RAWAT_INAP : num [1:322] 3 1 3 3 3 3 4 7 3 5 ...
## $ PENYAKIT_PENYERTA: chr [1:322] "0" "0" "0" "0" ...
## $ LEOKOSIT : chr [1:322] "0" "0" "0" "1" ...
## $ TROMBOSIT : chr [1:322] "435" "314" "478" "538" ...
## $ HEMOGLOBIN : chr [1:322] "1" "0" "1" "1" ...
## $ KODE_TROMBOSIT : num [1:322] 0 0 1 1 0 1 1 1 1 0 ...
## $ KODE_LAMA_RAWAT : num [1:322] 0 0 0 0 0 0 0 1 0 0 ...
## $ KODE_PENYERTA : num [1:322] 0 0 0 0 1 1 0 0 0 0 ...
## $ USIA_KATEGORI : chr [1:322] ">45" "≤45" ">45" "≤45" ...
data_tb[data_tb == "-"] <- NA
nrow(data_tb)
## [1] 322
nrow(data_no_na)
## [1] 322
data_no_na <- na.omit(data_tb)
X <- data_no_na[,c(
"JENIS_KELAMIN",
"USIA_KATEGORI",
"LEOKOSIT",
"KODE_TROMBOSIT",
"HEMOGLOBIN",
"KODE_PENYERTA",
"KODE_LAMA_RAWAT",
"KEADAAN_KELUAR"
)]
X <- data.frame(lapply(X, function(x) as.numeric(as.factor(x))))
str(X)
## 'data.frame': 281 obs. of 8 variables:
## $ JENIS_KELAMIN : num 2 2 2 2 2 1 1 2 2 2 ...
## $ USIA_KATEGORI : num 1 2 1 2 1 1 2 1 1 1 ...
## $ LEOKOSIT : num 1 1 1 2 2 2 2 2 1 2 ...
## $ KODE_TROMBOSIT : num 1 1 2 2 1 2 2 1 2 1 ...
## $ HEMOGLOBIN : num 2 1 2 2 2 2 2 1 2 1 ...
## $ KODE_PENYERTA : num 1 1 1 1 2 1 1 1 2 1 ...
## $ KODE_LAMA_RAWAT: num 1 1 1 1 1 1 2 1 1 1 ...
## $ KEADAAN_KELUAR : num 2 1 1 1 1 1 1 1 1 1 ...
View(data_no_na)
data_model <- data_tb[, c(
"PENYAKIT",
"JENIS_KELAMIN",
"USIA_KATEGORI",
"LEOKOSIT",
"KODE_TROMBOSIT",
"HEMOGLOBIN",
"KODE_PENYERTA",
"KODE_LAMA_RAWAT",
"KEADAAN_KELUAR"
)]
data_model$PENYAKIT <- as.factor(data_model$PENYAKIT)
View(data_model)
Naive Bayes dengan Imbalanced Data FULL
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
set.seed(1001)
train_index <- createDataPartition(data_model$PENYAKIT, p = 0.80, list = FALSE)
trainData <- data_model[train_index, ]
testData <- data_model[-train_index, ]
# Jumlah data training
n <- nrow(trainData)
n
## [1] 258
# Jumlah data training
n <- nrow(testData)
n
## [1] 64
trainData %>% count(PENYAKIT)
## # A tibble: 3 × 2
## PENYAKIT n
## <fct> <int>
## 1 TB_PARU 212
## 2 TB_PLEURA 29
## 3 TB_LAINNYA 17
testData %>% count(PENYAKIT)
## # A tibble: 3 × 2
## PENYAKIT n
## <fct> <int>
## 1 TB_PARU 53
## 2 TB_PLEURA 7
## 3 TB_LAINNYA 4
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
NBClassifier <- naiveBayes(PENYAKIT ~ ., data = trainData)
NBClassifier
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.82170543 0.11240310 0.06589147
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3301887 0.6698113
## TB_PLEURA 0.2758621 0.7241379
## TB_LAINNYA 0.6470588 0.3529412
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6839623 0.3160377
## TB_PLEURA 0.6896552 0.3103448
## TB_LAINNYA 0.2352941 0.7647059
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4328358 0.5671642
## TB_PLEURA 0.3571429 0.6428571
## TB_LAINNYA 0.8750000 0.1250000
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3349057 0.4730745
## TB_PLEURA 0.4827586 0.5085476
## TB_LAINNYA 0.2941176 0.4696682
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4408602 0.5591398
## TB_PLEURA 0.2692308 0.7307692
## TB_LAINNYA 0.5454545 0.4545455
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.2028302 0.4030588
## TB_PLEURA 0.1379310 0.3509312
## TB_LAINNYA 0.4117647 0.5072997
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.15566038 0.3633911
## TB_PLEURA 0.06896552 0.2578807
## TB_LAINNYA 0.17647059 0.3929526
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.90566038 0.09433962
## TB_PLEURA 0.86206897 0.13793103
## TB_LAINNYA 0.94117647 0.05882353
# Predict using Naive Bayes
testData$predicted <- predict(NBClassifier, testData)
# data aktual
testData$actual <- testData$PENYAKIT
library(caret)
confusionMatrix(
factor(testData$predicted),
factor(testData$actual)
)
## Warning in confusionMatrix.default(factor(testData$predicted),
## factor(testData$actual)): Levels are not in the same order for reference and
## data. Refactoring data to match.
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 53 7 4
## TB_PLEURA 0 0 0
## TB_LAINNYA 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.8281
## 95% CI : (0.7132, 0.911)
## No Information Rate : 0.8281
## P-Value [Acc > NIR] : 0.5796
##
## Kappa : 0
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 1.0000 0.0000 0.0000
## Specificity 0.0000 1.0000 1.0000
## Pos Pred Value 0.8281 NaN NaN
## Neg Pred Value NaN 0.8906 0.9375
## Prevalence 0.8281 0.1094 0.0625
## Detection Rate 0.8281 0.0000 0.0000
## Detection Prevalence 1.0000 0.0000 0.0000
## Balanced Accuracy 0.5000 0.5000 0.5000
Naive Bayes dengan Balanced data (Undersampling) FULL DATA
set.seed(1001)
down_train <- downSample(
x = trainData[, !colnames(trainData) %in% "PENYAKIT"],
y = trainData$PENYAKIT
)
names(down_train)[names(down_train) == "Class"] <- "PENYAKIT"
table(down_train$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 17 17 17
down_train %>%
count(PENYAKIT)
## PENYAKIT n
## 1 TB_PARU 17
## 2 TB_PLEURA 17
## 3 TB_LAINNYA 17
library(e1071)
NBClassifier3 <- naiveBayes(PENYAKIT ~ ., data = down_train)
NBClassifier3
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3333333 0.3333333 0.3333333
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.4705882 0.5294118
## TB_PLEURA 0.2941176 0.7058824
## TB_LAINNYA 0.6470588 0.3529412
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.7647059 0.2352941
## TB_PLEURA 0.7647059 0.2352941
## TB_LAINNYA 0.2352941 0.7647059
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.3529412 0.6470588
## TB_PLEURA 0.3125000 0.6875000
## TB_LAINNYA 0.8750000 0.1250000
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.4117647 0.5072997
## TB_PLEURA 0.5294118 0.5144958
## TB_LAINNYA 0.2941176 0.4696682
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.3750000 0.6250000
## TB_PLEURA 0.1875000 0.8125000
## TB_LAINNYA 0.5454545 0.4545455
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.05882353 0.2425356
## TB_PLEURA 0.11764706 0.3321056
## TB_LAINNYA 0.41176471 0.5072997
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.05882353 0.2425356
## TB_PLEURA 0.11764706 0.3321056
## TB_LAINNYA 0.17647059 0.3929526
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.94117647 0.05882353
## TB_PLEURA 0.82352941 0.17647059
## TB_LAINNYA 0.94117647 0.05882353
testData$predicted <- predict(NBClassifier3, testData)
testData$actual <- testData$PENYAKIT
library(caret)
confusionMatrix(
factor(testData$predicted),
factor(testData$actual)
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 34 3 1
## TB_PLEURA 7 1 1
## TB_LAINNYA 12 3 2
##
## Overall Statistics
##
## Accuracy : 0.5781
## 95% CI : (0.4482, 0.7006)
## No Information Rate : 0.8281
## P-Value [Acc > NIR] : 0.999999
##
## Kappa : 0.1143
##
## Mcnemar's Test P-Value : 0.007706
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.6415 0.14286 0.50000
## Specificity 0.6364 0.85965 0.75000
## Pos Pred Value 0.8947 0.11111 0.11765
## Neg Pred Value 0.2692 0.89091 0.95745
## Prevalence 0.8281 0.10938 0.06250
## Detection Rate 0.5312 0.01562 0.03125
## Detection Prevalence 0.5938 0.14062 0.26562
## Balanced Accuracy 0.6389 0.50125 0.62500
Naive Bayes dengan Balanced data (Oversampling) FULL DATA
set.seed(1001)
up_train <- upSample(
x = trainData[, !colnames(trainData) %in% "PENYAKIT"],
y = trainData$PENYAKIT
)
names(up_train)[names(up_train) == "Class"] <- "PENYAKIT"
table(up_train$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 212 212 212
up_train %>%
count(PENYAKIT)
## PENYAKIT n
## 1 TB_PARU 212
## 2 TB_PLEURA 212
## 3 TB_LAINNYA 212
library(e1071)
NBClassifier1 <- naiveBayes(PENYAKIT ~ ., data = up_train)
NBClassifier1
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3333333 0.3333333 0.3333333
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3301887 0.6698113
## TB_PLEURA 0.2830189 0.7169811
## TB_LAINNYA 0.5896226 0.4103774
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6839623 0.3160377
## TB_PLEURA 0.7311321 0.2688679
## TB_LAINNYA 0.2405660 0.7594340
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4328358 0.5671642
## TB_PLEURA 0.3399015 0.6600985
## TB_LAINNYA 0.8168317 0.1831683
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3349057 0.4730745
## TB_PLEURA 0.4622642 0.4997540
## TB_LAINNYA 0.2924528 0.4559658
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4408602 0.5591398
## TB_PLEURA 0.2538860 0.7461140
## TB_LAINNYA 0.6122449 0.3877551
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.2028302 0.4030588
## TB_PLEURA 0.1273585 0.3341632
## TB_LAINNYA 0.4245283 0.4954411
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.15566038 0.3633911
## TB_PLEURA 0.06132075 0.2404856
## TB_LAINNYA 0.20283019 0.4030588
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.90566038 0.09433962
## TB_PLEURA 0.83490566 0.16509434
## TB_LAINNYA 0.91981132 0.08018868
testData$predicted <- predict(NBClassifier1, testData)
testData$actual <- testData$PENYAKIT
library(caret)
confusionMatrix(
factor(testData$predicted),
factor(testData$actual)
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 15 2 1
## TB_PLEURA 30 3 1
## TB_LAINNYA 8 2 2
##
## Overall Statistics
##
## Accuracy : 0.3125
## 95% CI : (0.2024, 0.4406)
## No Information Rate : 0.8281
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.014
##
## Mcnemar's Test P-Value : 1.206e-06
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.2830 0.42857 0.50000
## Specificity 0.7273 0.45614 0.83333
## Pos Pred Value 0.8333 0.08824 0.16667
## Neg Pred Value 0.1739 0.86667 0.96154
## Prevalence 0.8281 0.10938 0.06250
## Detection Rate 0.2344 0.04688 0.03125
## Detection Prevalence 0.2812 0.53125 0.18750
## Balanced Accuracy 0.5051 0.44236 0.66667
data_model_no_na <- data_no_na[, c(
"PENYAKIT",
"JENIS_KELAMIN",
"USIA_KATEGORI",
"LEOKOSIT",
"KODE_TROMBOSIT",
"HEMOGLOBIN",
"KODE_PENYERTA",
"KODE_LAMA_RAWAT",
"KEADAAN_KELUAR"
)]
data_model_no_na $PENYAKIT <- factor(data_model_no_na $PENYAKIT)
library(caret)
set.seed(123)
index <- createDataPartition(data_model_no_na $PENYAKIT, p = 0.8, list = FALSE)
trainData <- data_model_no_na [index, ]
testData <- data_model_no_na [-index, ]
n<-nrow(trainData)
n
## [1] 227
n<-nrow(testData)
n
## [1] 54
# sebelum
table(trainData$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 188 27 12
Naive Bayes dengan Balanced data (SMOTE) dengan data tanpa NA
library(themis)
## Warning: package 'themis' was built under R version 4.4.3
## Loading required package: recipes
## Warning: package 'recipes' was built under R version 4.4.3
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(recipes)
rec <- recipe(PENYAKIT ~ ., data = trainData) %>%
step_smotenc(PENYAKIT)
train_smote <- prep(rec) %>%
juice()
NB_TB_smote <- naiveBayes(
PENYAKIT ~ .,
data = train_smote,
laplace = 1
)
library(e1071)
NB_TB_smote <- naiveBayes(
PENYAKIT ~ .,
data = train_smote,
laplace = 1
)
NB_TB_smote
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3333333 0.3333333 0.3333333
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3421053 0.6578947
## TB_PLEURA 0.1631579 0.8368421
## TB_LAINNYA 0.7263158 0.2736842
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6947368 0.3052632
## TB_PLEURA 0.8631579 0.1368421
## TB_LAINNYA 0.4052632 0.5947368
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.42631579 0.57368421
## TB_PLEURA 0.56842105 0.43157895
## TB_LAINNYA 0.97894737 0.02105263
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3138298 0.4652872
## TB_PLEURA 0.3486680 0.4514144
## TB_LAINNYA 0.2760590 0.3896022
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4263158 0.5736842
## TB_PLEURA 0.1947368 0.8052632
## TB_LAINNYA 0.3473684 0.6526316
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.1808511 0.3859225
## TB_PLEURA 0.1315456 0.3189518
## TB_LAINNYA 0.4160202 0.4399221
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.14361702 0.3516374
## TB_PLEURA 0.03961402 0.1645487
## TB_LAINNYA 0.24698177 0.3770241
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.90526316 0.09473684
## TB_PLEURA 0.94210526 0.05789474
## TB_LAINNYA 0.98947368 0.01052632
testData$predicted_smote <- predict(NB_TB_smote, testData)
testData$actual <- testData$PENYAKIT
library(caret)
confusionMatrix(
factor(testData$predicted_smote),
factor(testData$actual)
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 12 2 1
## TB_PLEURA 23 4 0
## TB_LAINNYA 11 0 1
##
## Overall Statistics
##
## Accuracy : 0.3148
## 95% CI : (0.1952, 0.4555)
## No Information Rate : 0.8519
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0206
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.2609 0.66667 0.50000
## Specificity 0.6250 0.52083 0.78846
## Pos Pred Value 0.8000 0.14815 0.08333
## Neg Pred Value 0.1282 0.92593 0.97619
## Prevalence 0.8519 0.11111 0.03704
## Detection Rate 0.2222 0.07407 0.01852
## Detection Prevalence 0.2778 0.50000 0.22222
## Balanced Accuracy 0.4429 0.59375 0.64423
Naive Bayes Balanced data dengan Weighted training dengan data no
NA
library(dplyr)
library(caret)
library(e1071)
data <- data_model_no_na
table(data$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 234 33 14
prop.table(table(data$PENYAKIT))
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.83274021 0.11743772 0.04982206
class_freq <- table(data$PENYAKIT)
class_weight <- sum(class_freq) / (length(class_freq) * class_freq)
class_weight
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.4002849 2.8383838 6.6904762
data$weight <- class_weight[data$PENYAKIT]
set.seed(123)
trainIndex <- createDataPartition(data$PENYAKIT, p = 0.8, list = FALSE)
trainData <- data[trainIndex, ]
testData <- data[-trainIndex, ]
x_train <- trainData %>% select(-PENYAKIT, -weight)
y_train <- trainData$PENYAKIT
set.seed(123)
train_weighted <- trainData %>%
slice_sample(
n = nrow(trainData),
replace = TRUE,
weight_by = weight
)
table(down_train$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 17 17 17
down_train %>% count(PENYAKIT)
## PENYAKIT n
## 1 TB_PARU 17
## 2 TB_PLEURA 17
## 3 TB_LAINNYA 17
train_weighted <- train_weighted %>%
select(-weight)
testData <- testData %>%
select(-weight)
model_nb <- naiveBayes(PENYAKIT ~ ., data = train_weighted)
model_nb
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3612335 0.3171806 0.3215859
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3170732 0.6829268
## TB_PLEURA 0.1944444 0.8055556
## TB_LAINNYA 0.4657534 0.5342466
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6585366 0.3414634
## TB_PLEURA 0.7638889 0.2361111
## TB_LAINNYA 0.3561644 0.6438356
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4024390 0.5975610
## TB_PLEURA 0.4305556 0.5694444
## TB_LAINNYA 0.6849315 0.3150685
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3048780 0.4631887
## TB_PLEURA 0.3194444 0.4695334
## TB_LAINNYA 0.3972603 0.4927171
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4024390 0.5975610
## TB_PLEURA 0.1805556 0.8194444
## TB_LAINNYA 0.5068493 0.4931507
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.1341463 0.3429068
## TB_PLEURA 0.1111111 0.3164751
## TB_LAINNYA 0.4931507 0.5034130
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.24390244 0.4320773
## TB_PLEURA 0.09722222 0.2983392
## TB_LAINNYA 0.39726027 0.4927171
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.92682927 0.07317073
## TB_PLEURA 0.90277778 0.09722222
## TB_LAINNYA 0.93150685 0.06849315
prediksi <- predict(model_nb, testData)
confusionMatrix(prediksi, testData$PENYAKIT)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 23 2 0
## TB_PLEURA 15 3 0
## TB_LAINNYA 8 1 2
##
## Overall Statistics
##
## Accuracy : 0.5185
## 95% CI : (0.3784, 0.6566)
## No Information Rate : 0.8519
## P-Value [Acc > NIR] : 1.0000000
##
## Kappa : 0.1418
##
## Mcnemar's Test P-Value : 0.0002812
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.5000 0.50000 1.00000
## Specificity 0.7500 0.68750 0.82692
## Pos Pred Value 0.9200 0.16667 0.18182
## Neg Pred Value 0.2069 0.91667 1.00000
## Prevalence 0.8519 0.11111 0.03704
## Detection Rate 0.4259 0.05556 0.03704
## Detection Prevalence 0.4630 0.33333 0.20370
## Balanced Accuracy 0.6250 0.59375 0.91346
Naive Bayes dengan Balanced data dengan n menentukan sendiri
library(dplyr)
library(e1071)
library(caret)
set.seed(1001)
# jumlah data yang diinginkan per kelas
n_sample <- 258
down_train <- trainData %>%
group_by(PENYAKIT) %>%
sample_n(size = n_sample, replace = TRUE) %>%
ungroup()
# cek distribusi
table(down_train$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 258 258 258
down_train %>% count(PENYAKIT)
## # A tibble: 3 × 2
## PENYAKIT n
## <fct> <int>
## 1 TB_PARU 258
## 2 TB_PLEURA 258
## 3 TB_LAINNYA 258
NB_down <- naiveBayes(PENYAKIT ~ ., data = down_train)
NB_down
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3333333 0.3333333 0.3333333
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3100775 0.6899225
## TB_PLEURA 0.2209302 0.7790698
## TB_LAINNYA 0.4767442 0.5232558
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6705426 0.3294574
## TB_PLEURA 0.7093023 0.2906977
## TB_LAINNYA 0.3875969 0.6124031
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4457364 0.5542636
## TB_PLEURA 0.4069767 0.5930233
## TB_LAINNYA 0.7558140 0.2441860
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3217054 0.4680386
## TB_PLEURA 0.3914729 0.4890284
## TB_LAINNYA 0.3139535 0.4649998
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4496124 0.5503876
## TB_PLEURA 0.2364341 0.7635659
## TB_LAINNYA 0.4728682 0.5271318
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.1976744 0.3990192
## TB_PLEURA 0.1744186 0.3802066
## TB_LAINNYA 0.4496124 0.4984215
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.1589147 0.3663071
## TB_PLEURA 0.1162791 0.3211823
## TB_LAINNYA 0.3255814 0.4695024
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.93023256 0.06976744
## TB_PLEURA 0.86821705 0.13178295
## TB_LAINNYA 0.92635659 0.07364341
##
## weight
## Y [,1] [,2]
## TB_PARU 0.4002849 0
## TB_PLEURA 2.8383838 0
## TB_LAINNYA 6.6904762 0
testData$predicted <- predict(NB_down, testData)
## Warning in predict.naiveBayes(NB_down, testData): Type mismatch between
## training and new data for variable 'weight'. Did you use factors with numeric
## labels for training, and numeric values for new data?
testData$actual <- testData$PENYAKIT
confusionMatrix(
factor(testData$predicted),
factor(testData$actual)
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 19 2 0
## TB_PLEURA 16 3 0
## TB_LAINNYA 11 1 2
##
## Overall Statistics
##
## Accuracy : 0.4444
## 95% CI : (0.3092, 0.586)
## No Information Rate : 0.8519
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.104
##
## Mcnemar's Test P-Value : 4.259e-05
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.4130 0.50000 1.00000
## Specificity 0.7500 0.66667 0.76923
## Pos Pred Value 0.9048 0.15789 0.14286
## Neg Pred Value 0.1818 0.91429 1.00000
## Prevalence 0.8519 0.11111 0.03704
## Detection Rate 0.3519 0.05556 0.03704
## Detection Prevalence 0.3889 0.35185 0.25926
## Balanced Accuracy 0.5815 0.58333 0.88462
Naive Bayes Balanced data dengan Weighted training dengan data no
NA
library(dplyr)
library(caret)
library(e1071)
# Gunakan data tanpa missing value
data <- data_model_no_na
table(data$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 234 33 14
prop.table(table(data$PENYAKIT))
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.83274021 0.11743772 0.04982206
class_freq <- table(data$PENYAKIT)
class_weight <- sum(class_freq) / (length(class_freq) * class_freq)
class_weight
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.4002849 2.8383838 6.6904762
data <- data %>%
mutate(weight = class_weight[PENYAKIT])
set.seed(123)
trainIndex <- createDataPartition(data$PENYAKIT, p = 0.8, list = FALSE)
trainData <- data[trainIndex, ]
testData <- data[-trainIndex, ]
set.seed(123)
train_weighted <- trainData %>%
slice_sample(
n = nrow(trainData),
replace = TRUE,
weight_by = weight
)
train_weighted <- train_weighted %>%
select(-weight)
testData <- testData %>%
select(-weight)
train_weighted %>%
count(PENYAKIT)
## # A tibble: 3 × 2
## PENYAKIT n
## <fct> <int>
## 1 TB_PARU 82
## 2 TB_PLEURA 72
## 3 TB_LAINNYA 73
model_nb_weight <- naiveBayes(
PENYAKIT ~ .,
data = train_weighted,
laplace = 1
)
model_nb_weight
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3612335 0.3171806 0.3215859
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.3214286 0.6785714
## TB_PLEURA 0.2027027 0.7972973
## TB_LAINNYA 0.4666667 0.5333333
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6707317 0.3536585
## TB_PLEURA 0.7777778 0.2500000
## TB_LAINNYA 0.3698630 0.6575342
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4146341 0.6097561
## TB_PLEURA 0.4444444 0.5833333
## TB_LAINNYA 0.6986301 0.3287671
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.3048780 0.4631887
## TB_PLEURA 0.3194444 0.4695334
## TB_LAINNYA 0.3972603 0.4927171
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4146341 0.6097561
## TB_PLEURA 0.1944444 0.8333333
## TB_LAINNYA 0.5205479 0.5068493
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.1341463 0.3429068
## TB_PLEURA 0.1111111 0.3164751
## TB_LAINNYA 0.4931507 0.5034130
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.24390244 0.4320773
## TB_PLEURA 0.09722222 0.2983392
## TB_LAINNYA 0.39726027 0.4927171
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.91666667 0.08333333
## TB_PLEURA 0.89189189 0.10810811
## TB_LAINNYA 0.92000000 0.08000000
prediksi <- predict(model_nb_weight, testData)
confusionMatrix(prediksi, testData$PENYAKIT)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 20 2 0
## TB_PLEURA 18 3 0
## TB_LAINNYA 8 1 2
##
## Overall Statistics
##
## Accuracy : 0.463
## 95% CI : (0.3262, 0.6039)
## No Information Rate : 0.8519
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1082
##
## Mcnemar's Test P-Value : 7.179e-05
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.4348 0.50000 1.00000
## Specificity 0.7500 0.62500 0.82692
## Pos Pred Value 0.9091 0.14286 0.18182
## Neg Pred Value 0.1875 0.90909 1.00000
## Prevalence 0.8519 0.11111 0.03704
## Detection Rate 0.3704 0.05556 0.03704
## Detection Prevalence 0.4074 0.38889 0.20370
## Balanced Accuracy 0.5924 0.56250 0.91346
Naive Bayes Balanced data dengan Weighted training dengan DATA
FULL
library(dplyr)
library(caret)
library(e1071)
# Gunakan data tanpa missing value
data <- data_model
table(data$PENYAKIT)
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 265 36 21
prop.table(table(data$PENYAKIT))
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.82298137 0.11180124 0.06521739
class_freq <- table(data$PENYAKIT)
class_weight <- sum(class_freq) / (length(class_freq) * class_freq)
class_weight
##
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.4050314 2.9814815 5.1111111
data <- data %>%
mutate(weight = class_weight[PENYAKIT])
set.seed(123)
trainIndex <- createDataPartition(data$PENYAKIT, p = 0.8, list = FALSE)
trainData <- data[trainIndex, ]
testData <- data[-trainIndex, ]
set.seed(123)
train_weighted <- trainData %>%
slice_sample(
n = nrow(trainData),
replace = TRUE,
weight_by = weight
)
train_weighted <- train_weighted %>%
select(-weight)
testData <- testData %>%
select(-weight)
train_weighted %>%
count(PENYAKIT)
## # A tibble: 3 × 2
## PENYAKIT n
## <fct> <int>
## 1 TB_PARU 91
## 2 TB_PLEURA 78
## 3 TB_LAINNYA 89
model_nb_weight <- naiveBayes(
PENYAKIT ~ .,
data = train_weighted,
laplace = 1
)
model_nb_weight
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## TB_PARU TB_PLEURA TB_LAINNYA
## 0.3527132 0.3023256 0.3449612
##
## Conditional probabilities:
## JENIS_KELAMIN
## Y P L
## TB_PARU 0.2903226 0.7096774
## TB_PLEURA 0.2750000 0.7250000
## TB_LAINNYA 0.4505495 0.5494505
##
## USIA_KATEGORI
## Y >45 ≤45
## TB_PARU 0.6703297 0.3516484
## TB_PLEURA 0.6538462 0.3717949
## TB_LAINNYA 0.3595506 0.6629213
##
## LEOKOSIT
## Y 0 1
## TB_PARU 0.4470588 0.5764706
## TB_PLEURA 0.2631579 0.7631579
## TB_LAINNYA 0.7831325 0.2409639
##
## KODE_TROMBOSIT
## Y [,1] [,2]
## TB_PARU 0.2417582 0.4305206
## TB_PLEURA 0.4230769 0.4972452
## TB_LAINNYA 0.3483146 0.4791357
##
## HEMOGLOBIN
## Y 0 1
## TB_PARU 0.4683544 0.5569620
## TB_PLEURA 0.4520548 0.5753425
## TB_LAINNYA 0.3787879 0.6515152
##
## KODE_PENYERTA
## Y [,1] [,2]
## TB_PARU 0.2197802 0.4163919
## TB_PLEURA 0.1923077 0.3966644
## TB_LAINNYA 0.3707865 0.4857521
##
## KODE_LAMA_RAWAT
## Y [,1] [,2]
## TB_PARU 0.05494505 0.2291354
## TB_PLEURA 0.02564103 0.1590850
## TB_LAINNYA 0.23595506 0.4269999
##
## KEADAAN_KELUAR
## Y HIDUP MENINGGAL
## TB_PARU 0.89247312 0.10752688
## TB_PLEURA 0.93750000 0.06250000
## TB_LAINNYA 0.93406593 0.06593407
prediksi <- predict(model_nb_weight, testData)
confusionMatrix(prediksi, testData$PENYAKIT)
## Confusion Matrix and Statistics
##
## Reference
## Prediction TB_PARU TB_PLEURA TB_LAINNYA
## TB_PARU 19 4 0
## TB_PLEURA 20 1 2
## TB_LAINNYA 14 2 2
##
## Overall Statistics
##
## Accuracy : 0.3438
## 95% CI : (0.2295, 0.473)
## No Information Rate : 0.8281
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0166
##
## Mcnemar's Test P-Value : 1.813e-05
##
## Statistics by Class:
##
## Class: TB_PARU Class: TB_PLEURA Class: TB_LAINNYA
## Sensitivity 0.3585 0.14286 0.50000
## Specificity 0.6364 0.61404 0.73333
## Pos Pred Value 0.8261 0.04348 0.11111
## Neg Pred Value 0.1707 0.85366 0.95652
## Prevalence 0.8281 0.10938 0.06250
## Detection Rate 0.2969 0.01562 0.03125
## Detection Prevalence 0.3594 0.35938 0.28125
## Balanced Accuracy 0.4974 0.37845 0.61667