Willibrordus Bayu 12/11/2021
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
library("openxlsx")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
str(dataCreditRating)## 'data.frame': 900 obs. of 7 variables:
## $ kode_kontrak : chr "AGR-000001" "AGR-000011" "AGR-000030" "AGR-000043" ...
## $ pendapatan_setahun_juta: num 295 271 159 210 165 220 70 88 163 100 ...
## $ kpr_aktif : chr "YA" "YA" "TIDAK" "YA" ...
## $ durasi_pinjaman_bulan : num 48 36 12 12 36 24 36 48 48 36 ...
## $ jumlah_tanggungan : num 5 5 0 3 0 5 3 3 5 6 ...
## $ rata_rata_overdue : chr "61 - 90 days" "61 - 90 days" "0 - 30 days" "46 - 60 days" ...
## $ risk_rating : num 4 4 1 3 2 1 2 2 2 2 ...
#Melakukan konversi kolom risk_rating menjadi factor
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
#Melihat struktur setelah konversi
str(dataCreditRating)## 'data.frame': 900 obs. of 7 variables:
## $ kode_kontrak : chr "AGR-000001" "AGR-000011" "AGR-000030" "AGR-000043" ...
## $ pendapatan_setahun_juta: num 295 271 159 210 165 220 70 88 163 100 ...
## $ kpr_aktif : chr "YA" "YA" "TIDAK" "YA" ...
## $ durasi_pinjaman_bulan : num 48 36 12 12 36 24 36 48 48 36 ...
## $ jumlah_tanggungan : num 5 5 0 3 0 5 3 3 5 6 ...
## $ rata_rata_overdue : chr "61 - 90 days" "61 - 90 days" "0 - 30 days" "46 - 60 days" ...
## $ risk_rating : Factor w/ 5 levels "1","2","3","4",..: 4 4 1 3 2 1 2 2 2 2 ...
library("openxlsx")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Merubah tipe data class variable sebagai factor
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
str(dataCreditRating)## 'data.frame': 900 obs. of 7 variables:
## $ kode_kontrak : chr "AGR-000001" "AGR-000011" "AGR-000030" "AGR-000043" ...
## $ pendapatan_setahun_juta: num 295 271 159 210 165 220 70 88 163 100 ...
## $ kpr_aktif : chr "YA" "YA" "TIDAK" "YA" ...
## $ durasi_pinjaman_bulan : num 48 36 12 12 36 24 36 48 48 36 ...
## $ jumlah_tanggungan : num 5 5 0 3 0 5 3 3 5 6 ...
## $ rata_rata_overdue : chr "61 - 90 days" "61 - 90 days" "0 - 30 days" "46 - 60 days" ...
## $ risk_rating : Factor w/ 5 levels "1","2","3","4",..: 4 4 1 3 2 1 2 2 2 2 ...
#Menghilangkan beberapa variable input dari dataset
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[, input_columns]
str(datafeed)## 'data.frame': 900 obs. of 2 variables:
## $ durasi_pinjaman_bulan: num 48 36 12 12 36 24 36 48 48 36 ...
## $ jumlah_tanggungan : num 5 5 0 3 0 5 3 3 5 6 ...
library("openxlsx")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan porsi index acak untuk training dan testing set
set.seed(100)
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
str(input_training_set)## 'data.frame': 800 obs. of 2 variables:
## $ durasi_pinjaman_bulan: num 36 24 36 36 36 24 12 48 48 12 ...
## $ jumlah_tanggungan : num 1 1 5 1 5 3 3 3 0 0 ...
str(class_training_set)## Factor w/ 5 levels "1","2","3","4",..: 1 1 4 1 5 3 3 3 2 1 ...
str(input_testing_set)## 'data.frame': 100 obs. of 2 variables:
## $ durasi_pinjaman_bulan: num 12 36 48 36 48 48 12 12 12 12 ...
## $ jumlah_tanggungan : num 0 0 3 3 6 5 0 0 0 4 ...
library("openxlsx")
library("C50")## Warning: package 'C50' was built under R version 4.1.2
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan dan menampilkan summary model
risk_rating_model <- C5.0(input_training_set, class_training_set)
summary(risk_rating_model)##
## Call:
## C5.0.default(x = input_training_set, y = class_training_set)
##
##
## C5.0 [Release 2.07 GPL Edition] Sat Dec 11 16:10:19 2021
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 800 cases (3 attributes) from undefined.data
##
## Decision tree:
##
## jumlah_tanggungan > 4:
## :...durasi_pinjaman_bulan <= 24: 4 (105/30)
## : durasi_pinjaman_bulan > 24: 5 (120/51)
## jumlah_tanggungan <= 4:
## :...jumlah_tanggungan > 2: 3 (216/20)
## jumlah_tanggungan <= 2:
## :...durasi_pinjaman_bulan <= 36: 1 (264/80)
## durasi_pinjaman_bulan > 36:
## :...jumlah_tanggungan <= 0: 2 (37/7)
## jumlah_tanggungan > 0: 3 (58/4)
##
##
## Evaluation on training data (800 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 6 192(24.0%) <<
##
##
## (a) (b) (c) (d) (e) <-classified as
## ---- ---- ---- ---- ----
## 184 2 5 6 6 (a): class 1
## 80 30 19 6 11 (b): class 2
## 3 250 (c): class 3
## 2 75 34 (d): class 4
## 18 69 (e): class 5
##
##
## Attribute usage:
##
## 100.00% jumlah_tanggungan
## 73.00% durasi_pinjaman_bulan
##
##
## Time: 0.0 secs
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan dan menampilkan summary model
risk_rating_model <- C5.0(input_training_set, class_training_set)
plot(risk_rating_model)library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label = "Risk Rating"))
summary(risk_rating_model)##
## Call:
## C5.0.default(x = input_training_set, y = class_training_set, control
## = C5.0Control(label = "Risk Rating"))
##
##
## C5.0 [Release 2.07 GPL Edition] Sat Dec 11 16:10:20 2021
## -------------------------------
##
## Class specified by attribute `Risk Rating'
##
## Read 800 cases (3 attributes) from undefined.data
##
## Decision tree:
##
## jumlah_tanggungan > 4:
## :...durasi_pinjaman_bulan <= 24: 4 (105/30)
## : durasi_pinjaman_bulan > 24: 5 (120/51)
## jumlah_tanggungan <= 4:
## :...jumlah_tanggungan > 2: 3 (216/20)
## jumlah_tanggungan <= 2:
## :...durasi_pinjaman_bulan <= 36: 1 (264/80)
## durasi_pinjaman_bulan > 36:
## :...jumlah_tanggungan <= 0: 2 (37/7)
## jumlah_tanggungan > 0: 3 (58/4)
##
##
## Evaluation on training data (800 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 6 192(24.0%) <<
##
##
## (a) (b) (c) (d) (e) <-classified as
## ---- ---- ---- ---- ----
## 184 2 5 6 6 (a): class 1
## 80 30 19 6 11 (b): class 2
## 3 250 (c): class 3
## 2 75 34 (d): class 4
## 18 69 (e): class 5
##
##
## Attribute usage:
##
## 100.00% jumlah_tanggungan
## 73.00% durasi_pinjaman_bulan
##
##
## Time: 0.0 secs
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan", "kpr_aktif")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 780)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
summary(risk_rating_model)##
## Call:
## C5.0.default(x = input_training_set, y = class_training_set, control
## = C5.0Control(label = "Risk Rating"))
##
##
## C5.0 [Release 2.07 GPL Edition] Sat Dec 11 16:10:20 2021
## -------------------------------
##
## Class specified by attribute `Risk Rating'
##
## Read 780 cases (4 attributes) from undefined.data
##
## Decision tree:
##
## kpr_aktif = TIDAK:
## :...durasi_pinjaman_bulan <= 36: 1 (293/103)
## : durasi_pinjaman_bulan > 36: 2 (47/7)
## kpr_aktif = YA:
## :...jumlah_tanggungan <= 4: 3 (247/4)
## jumlah_tanggungan > 4:
## :...durasi_pinjaman_bulan <= 24: 4 (91/17)
## durasi_pinjaman_bulan > 24: 5 (102/34)
##
##
## Evaluation on training data (780 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 5 165(21.2%) <<
##
##
## (a) (b) (c) (d) (e) <-classified as
## ---- ---- ---- ---- ----
## 190 5 2 (a): class 1
## 101 40 1 (b): class 2
## 1 1 243 (c): class 3
## 2 74 33 (d): class 4
## 1 1 17 68 (e): class 5
##
##
## Attribute usage:
##
## 100.00% kpr_aktif
## 68.33% durasi_pinjaman_bulan
## 56.41% jumlah_tanggungan
##
##
## Time: 0.0 secs
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating[dataCreditRating$risk_rating == "1"] <- "satu"
dataCreditRating$risk_rating[dataCreditRating$risk_rating == "2"] <- "dua"
dataCreditRating$risk_rating[dataCreditRating$risk_rating == "3"] <- "tiga"
dataCreditRating$risk_rating[dataCreditRating$risk_rating == "4"] <- "empat"
dataCreditRating$risk_rating[dataCreditRating$risk_rating == "5"] <- "lima"
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
summary(risk_rating_model)##
## Call:
## C5.0.default(x = input_training_set, y = class_training_set, control
## = C5.0Control(label = "Risk Rating"))
##
##
## C5.0 [Release 2.07 GPL Edition] Sat Dec 11 16:10:20 2021
## -------------------------------
##
## Class specified by attribute `Risk Rating'
##
## Read 800 cases (3 attributes) from undefined.data
##
## Decision tree:
##
## jumlah_tanggungan > 4:
## :...durasi_pinjaman_bulan <= 24: empat (105/30)
## : durasi_pinjaman_bulan > 24: lima (120/51)
## jumlah_tanggungan <= 4:
## :...jumlah_tanggungan > 2: tiga (216/20)
## jumlah_tanggungan <= 2:
## :...durasi_pinjaman_bulan <= 36: satu (264/80)
## durasi_pinjaman_bulan > 36:
## :...jumlah_tanggungan <= 0: dua (37/7)
## jumlah_tanggungan > 0: tiga (58/4)
##
##
## Evaluation on training data (800 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 6 192(24.0%) <<
##
##
## (a) (b) (c) (d) (e) <-classified as
## ---- ---- ---- ---- ----
## 30 6 11 80 19 (a): class dua
## 2 75 34 (b): class empat
## 18 69 (c): class lima
## 2 6 6 184 5 (d): class satu
## 3 250 (e): class tiga
##
##
## Attribute usage:
##
## 100.00% jumlah_tanggungan
## 73.00% durasi_pinjaman_bulan
##
##
## Time: 0.0 secs
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
#menggunakan model untuk prediksi testing set
predict(risk_rating_model, input_testing_set)## [1] 1 1 3 3 5 5 1 1 1 3 1 2 1 1 3 3 1 3 3 3 3 3 1 5 1 1 3 1 3 5 1 1 2 1 5 1 1
## [38] 5 3 3 3 3 4 3 3 1 3 5 2 3 2 5 3 5 1 5 4 5 3 4 1 3 4 4 3 5 5 5 3 1 1 1 1 3
## [75] 5 1 4 5 3 1 3 3 3 3 3 1 3 3 5 4 5 3 3 3 1 1 5 5 3 3
## Levels: 1 2 3 4 5
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
#menyimpan hasil prediksi testing set ke dalam kolom hasil_prediksi
input_testing_set$risk_rating <- dataCreditRating[-indeks_training_set,]$risk_rating
input_testing_set$hasil_prediksi <- predict(risk_rating_model, input_testing_set)
print(input_testing_set)## durasi_pinjaman_bulan jumlah_tanggungan risk_rating hasil_prediksi
## 3 12 0 1 1
## 5 36 0 2 1
## 8 48 3 2 3
## 40 36 3 2 3
## 41 48 6 2 5
## 44 48 5 2 5
## 58 12 0 1 1
## 70 12 0 1 1
## 109 12 0 1 1
## 110 12 4 3 3
## 122 12 0 1 1
## 151 48 0 2 2
## 179 36 1 1 1
## 180 36 1 2 1
## 182 24 4 3 3
## 195 48 3 3 3
## 200 24 0 1 1
## 217 12 4 3 3
## 230 48 2 3 3
## 231 12 3 3 3
## 234 24 3 3 3
## 236 24 4 3 3
## 238 24 0 1 1
## 245 36 5 4 5
## 252 24 0 1 1
## 253 24 0 1 1
## 260 48 1 3 3
## 265 36 0 2 1
## 275 12 3 3 3
## 279 36 6 5 5
## 285 36 1 1 1
## 295 24 0 1 1
## 317 48 0 2 2
## 343 24 0 1 1
## 350 48 6 5 5
## 352 12 1 1 1
## 356 36 2 2 1
## 369 48 6 5 5
## 373 48 3 3 3
## 375 48 2 3 3
## 384 24 3 3 3
## 388 36 3 3 3
## 399 24 6 4 4
## 419 48 3 3 3
## 433 24 4 3 3
## 437 36 1 1 1
## 446 24 3 3 3
## 455 48 5 5 5
## 493 48 0 2 2
## 496 12 3 3 3
## 501 48 0 3 2
## 521 48 5 4 5
## 524 48 2 3 3
## 527 36 5 5 5
## 534 36 1 1 1
## 536 48 6 5 5
## 544 12 5 4 4
## 548 48 6 5 5
## 561 12 3 3 3
## 565 12 6 4 4
## 574 24 1 1 1
## 577 48 2 3 3
## 587 12 6 4 4
## 594 12 6 4 4
## 612 24 4 3 3
## 616 48 6 5 5
## 621 36 5 5 5
## 632 48 6 5 5
## 641 36 4 3 3
## 645 12 2 2 1
## 657 12 2 1 1
## 675 12 2 1 1
## 687 12 2 1 1
## 697 36 4 3 3
## 704 48 6 5 5
## 707 12 2 1 1
## 716 12 5 4 4
## 721 36 5 5 5
## 729 48 1 3 3
## 737 12 2 1 1
## 743 36 3 3 3
## 748 48 1 3 3
## 749 36 4 3 3
## 786 48 1 3 3
## 799 12 3 3 3
## 801 24 2 1 1
## 806 24 4 3 3
## 814 36 3 3 3
## 825 36 6 5 5
## 831 24 6 4 4
## 861 48 5 5 5
## 863 12 3 3 3
## 869 48 3 3 3
## 870 48 3 3 3
## 872 24 2 1 1
## 880 36 1 2 1
## 888 48 5 5 5
## 890 48 5 5 5
## 893 48 3 3 3
## 897 48 2 3 3
library("openxlsx")
library("C50")
library("reshape2")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
#menyimpan risk_rating dari data awal dan hasil prediksi testing set ke dalam kolom hasil_prediksi
input_testing_set$risk_rating <- dataCreditRating[-indeks_training_set,]$risk_rating
input_testing_set$hasil_prediksi <- predict(risk_rating_model, input_testing_set)
#membuat confusion matrix
dcast(hasil_prediksi ~ risk_rating, data = input_testing_set)## Using hasil_prediksi as value column: use value.var to override.
## Aggregation function missing: defaulting to length
## hasil_prediksi 1 2 3 4 5
## 1 1 24 6 0 0 0
## 2 2 0 3 1 0 0
## 3 3 0 2 37 0 0
## 4 4 0 0 0 7 0
## 5 5 0 2 0 2 16
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
#menyimpan risk_rating dari data awal dan hasil prediksi testing set ke dalam kolom hasil_prediksi
input_testing_set$risk_rating <- dataCreditRating[-indeks_training_set,]$risk_rating
input_testing_set$hasil_prediksi <- predict(risk_rating_model, input_testing_set)
#Menghitung jumlah prediksi yang benar
nrow(input_testing_set[input_testing_set$risk_rating == input_testing_set$hasil_prediksi,])## [1] 87
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan model
risk_rating_model <- C5.0(input_training_set, class_training_set, control = C5.0Control(label="Risk Rating"))
#menyimpan risk_rating dari data awal dan hasil prediksi testing set ke dalam kolom hasil_prediksi
input_testing_set$risk_rating <- dataCreditRating[-indeks_training_set,]$risk_rating
input_testing_set$hasil_prediksi <- predict(risk_rating_model, input_testing_set)
#Menghitung jumlah prediksi yang salah
nrow(input_testing_set[input_testing_set$risk_rating!=input_testing_set$hasil_prediksi,])## [1] 13
#Membuat data frame aplikasi baru
aplikasi_baru <- data.frame(jumlah_tanggungan = 6, durasi_pinjaman_bulan = 12)
print(aplikasi_baru)## jumlah_tanggungan durasi_pinjaman_bulan
## 1 6 12
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan dan menampilkan summary model
risk_rating_model <- C5.0(input_training_set, class_training_set)
#Membuat data frame aplikasi baru
aplikasi_baru <- data.frame(jumlah_tanggungan = 6, durasi_pinjaman_bulan = 12)
#melakukan prediksi
predict(risk_rating_model, aplikasi_baru)## [1] 4
## Levels: 1 2 3 4 5
library("openxlsx")
library("C50")
#Mempersiapkan data
dataCreditRating <- read.xlsx(xlsxFile = "https://storage.googleapis.com/dqlab-dataset/credit_scoring_dqlab.xlsx")
#Mempersiapkan class dan input variables
dataCreditRating$risk_rating <- as.factor(dataCreditRating$risk_rating)
input_columns <- c("durasi_pinjaman_bulan", "jumlah_tanggungan")
datafeed <- dataCreditRating[ , input_columns ]
#Mempersiapkan training dan testing set
set.seed(100) #untuk menyeragamkan hasil random antar tiap komputer
indeks_training_set <- sample(900, 800)
#Membuat dan menampilkan training set dan testing set
input_training_set <- datafeed[indeks_training_set,]
class_training_set <- dataCreditRating[indeks_training_set,]$risk_rating
input_testing_set <- datafeed[-indeks_training_set,]
#menghasilkan dan menampilkan summary model
risk_rating_model <- C5.0(input_training_set, class_training_set)
#Membuat data frame aplikasi baru
aplikasi_baru <- data.frame(jumlah_tanggungan = 6, durasi_pinjaman_bulan = 64)
#melakukan prediksi
predict(risk_rating_model, aplikasi_baru)## [1] 5
## Levels: 1 2 3 4 5