Load the necessary packages.
library(rsample) # stratified sampling
library(cdata) # data wrangling
## Loading required package: wrapr
library(ggplot2) # beautiful plot
library(GGally) # grouped scatter plot matrix
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(magrittr) # pipe
library(dplyr) # data wrangling
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:wrapr':
##
## coalesce
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret) # data preprocessing and transform
## Loading required package: lattice
library(e1071) # svm
##
## Attaching package: 'e1071'
## The following object is masked from 'package:rsample':
##
## permutations
library(WVPlots) # double density plot and ROC curve
library(knitr) # tidy table
library(sigr) # calculate AUC
Import the data saved as .RData and split it into training and test data by stratified sampling.
load("credit.RData")
str(dat)
## 'data.frame': 1000 obs. of 21 variables:
## $ status : Factor w/ 4 levels "no checking account",..: 1 1 2 1 1 1 1 1 4 2 ...
## $ duration : int 18 9 12 12 12 10 8 6 18 24 ...
## $ credit_history : Factor w/ 5 levels "delay in paying off in the past",..: 5 5 3 5 5 5 5 5 5 3 ...
## $ purpose : Factor w/ 11 levels "others","car (new)",..: 3 1 10 1 1 1 1 1 4 4 ...
## $ amount : int 1049 2799 841 2122 2171 2241 3398 1361 1098 3758 ...
## $ savings : Factor w/ 5 levels "unknown/no savings account",..: 1 1 2 1 1 1 1 1 1 3 ...
## $ employment_duration : Factor w/ 5 levels "unemployed","< 1 yr",..: 2 3 4 3 3 2 4 2 1 1 ...
## $ installment_rate : Ord.factor w/ 4 levels ">= 35"<"25 <= ... < 35"<..: 4 2 2 3 4 1 1 2 4 1 ...
## $ personal_status_sex : Factor w/ 4 levels "male : divorced/separated",..: 2 3 2 3 3 3 3 3 2 2 ...
## $ other_debtors : Factor w/ 3 levels "none","co-applicant",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ present_residence : Ord.factor w/ 4 levels "< 1 yr"<"1 <= ... < 4 yrs"<..: 4 2 4 2 4 3 4 4 4 4 ...
## $ property : Factor w/ 4 levels "unknown / no property",..: 2 1 1 1 2 1 1 1 3 4 ...
## $ age : int 21 36 23 39 38 48 39 40 65 23 ...
## $ other_installment_plans: Factor w/ 3 levels "bank","stores",..: 3 3 3 3 1 3 3 3 3 3 ...
## $ housing : Factor w/ 3 levels "for free","rent",..: 1 1 1 1 2 1 2 2 2 1 ...
## $ number_credits : Ord.factor w/ 4 levels "1"<"2-3"<"4-5"<..: 1 2 1 2 2 2 2 1 2 1 ...
## $ job : Factor w/ 4 levels "unemployed/unskilled - non-resident",..: 3 3 2 2 2 2 2 2 1 1 ...
## $ people_liable : Factor w/ 2 levels "3 or more","0 to 2": 2 1 2 1 2 1 2 1 2 2 ...
## $ telephone : Factor w/ 2 levels "no","yes (under customer name)": 1 1 1 1 1 1 1 1 1 1 ...
## $ foreign_worker : Factor w/ 2 levels "yes","no": 2 2 2 1 1 1 1 1 2 2 ...
## $ credit_risk : Factor w/ 2 levels "bad","good": 2 2 2 2 2 2 2 2 2 2 ...
summary(dat)
## status duration
## no checking account :274 Min. : 4.0
## ... < 0 DM :269 1st Qu.:12.0
## 0<= ... < 200 DM : 63 Median :18.0
## ... >= 200 DM / salary for at least 1 year:394 Mean :20.9
## 3rd Qu.:24.0
## Max. :72.0
##
## credit_history purpose
## delay in paying off in the past : 40 furniture/equipment:280
## critical account/other credits elsewhere : 49 others :234
## no credits taken/all credits paid back duly:530 car (used) :181
## existing credits paid back duly till now : 88 car (new) :103
## all credits at this bank paid back duly :293 retraining : 97
## repairs : 50
## (Other) : 55
## amount savings employment_duration
## Min. : 250 unknown/no savings account:603 unemployed : 62
## 1st Qu.: 1366 ... < 100 DM :103 < 1 yr :172
## Median : 2320 100 <= ... < 500 DM : 63 1 <= ... < 4 yrs:339
## Mean : 3271 500 <= ... < 1000 DM : 48 4 <= ... < 7 yrs:174
## 3rd Qu.: 3972 ... >= 1000 DM :183 >= 7 yrs :253
## Max. :18424
##
## installment_rate personal_status_sex
## >= 35 :136 male : divorced/separated : 50
## 25 <= ... < 35:231 female : non-single or male : single:310
## 20 <= ... < 25:157 male : married/widowed :548
## < 20 :476 female : single : 92
##
##
##
## other_debtors present_residence
## none :907 < 1 yr :130
## co-applicant: 41 1 <= ... < 4 yrs:308
## guarantor : 52 4 <= ... < 7 yrs:149
## >= 7 yrs :413
##
##
##
## property age
## unknown / no property :282 Min. :19.00
## car or other :232 1st Qu.:27.00
## building soc. savings agr./life insurance:332 Median :33.00
## real estate :154 Mean :35.54
## 3rd Qu.:42.00
## Max. :75.00
##
## other_installment_plans housing number_credits
## bank :139 for free:179 1 :633
## stores: 47 rent :714 2-3 :333
## none :814 own :107 4-5 : 28
## >= 6: 6
##
##
##
## job people_liable
## unemployed/unskilled - non-resident : 22 3 or more:155
## unskilled - resident :200 0 to 2 :845
## skilled employee/official :630
## manager/self-empl./highly qualif. employee:148
##
##
##
## telephone foreign_worker credit_risk
## no :596 yes: 37 bad :300
## yes (under customer name):404 no :963 good:700
##
##
##
##
##
# no missing values
response='credit_risk'
library(rsample)
set.seed(10)
split=initial_split(dat,prop=0.8,strata = response)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(response)` instead of `response` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
train=training(split)
test=testing(split)
Based on the output of str() function, we know that there are 17 categorical predictors and 3 continuos predictors. There are 700 good credit and 300 bad credits. From the paper, it is stated that the bad credit instances are oversampled. Normally, the prevalence of bad credit instances is approximately 5%.
# Continuos predictors
idx_con=c("duration","amount","age")
# Grouped boxplot. Turn train data from wide to long format
train_con=cbind(train[,idx_con],credit_risk=train[,response])
train_con_long=unpivot_to_blocks(train_con,nameForNewKeyColumn = "variables",
nameForNewValueColumn = "values",
columnsToTakeFrom = idx_con)
ggplot(data=train_con_long, aes(x=credit_risk,y=values)) +
geom_boxplot(color="blue",fill="blue",alpha=0.2,notch=TRUE,
outlier.color="red",outlier.fill = "red",outlier.size = 2) +
facet_wrap(~variables,ncol=3,scales = "free")
ggpairs(train_con,columns = 1:3, ggplot2::aes(colour=credit_risk))
# Visualization of two categorical variables: method 1
ggplot(data=train) +
geom_count(aes(x=housing,y=credit_risk))
# method 2
train %>%
count(credit_history,credit_risk) %>%
ggplot(aes(x=credit_history,y=credit_risk)) +
geom_tile(aes(fill=n)) +
theme(axis.text.x = element_text(angle = 45,hjust=1))
Chi-square independence test
# categorical predictors. Employ chi-square test to investigate the association between
# categorical features and response/outcome
train_cat=train[,!(colnames(train) %in% idx_con)]
# (tt=chisq.test(train$status,train$credit_risk))
t=c()
idx=c()
for (i in (1:(ncol(train_cat)-1))) {
t[i]=chisq.test(train_cat[,i],train$credit_risk)$p.value
# u[i]=fisher.test(train[,i],train$credit_risk)$p.value
if (!is.list(tryCatch( { result <- chisq.test(train[,i],train$credit_risk) }
, warning = function(w) { print("TRUE") }))) {
idx=c(idx,i)
}
}
## [1] "TRUE"
## Warning in chisq.test(train_cat[, i], train$credit_risk): Chi-squared
## approximation may be incorrect
## [1] "TRUE"
## [1] "TRUE"
## Warning in chisq.test(train_cat[, i], train$credit_risk): Chi-squared
## approximation may be incorrect
## [1] "TRUE"
## [1] "TRUE"
idx_sig=which(t<=0.05)
idx_int=!(idx_sig %in% idx)
colnames(train_cat)[idx_sig[idx_int]]
## [1] "status" "purpose"
## [3] "personal_status_sex" "property"
## [5] "other_installment_plans" "housing"
## [7] "foreign_worker"
# Perform one hot encoding for categorical (nominal) predictors.
var_cat=c("status","credit_history","purpose","savings","employment_duration",
"personal_status_sex","other_debtors","property","other_installment_plans",
"housing","job","people_liable","telephone","foreign_worker")
train_cat=train[,colnames(train) %in% var_cat]
dummy=dummyVars("~.",data=train_cat)
newdata=data.frame(predict(dummy,newdata=train_cat))
# label encoding for ordinal variables
# chooses the related variables
var_ord=c("installment_rate","present_residence","number_credits")
train_cont=train[,colnames(train) %in% var_ord]
train_cont=transform(train_cont,installment_rate=as.numeric(installment_rate)-1,
present_residence=as.numeric(present_residence)-1,
number_credits=as.numeric(number_credits)-1)
# Min-max normalization of continuos predictors
var_cont=c("amount","age","duration")
dat_cont=train[,colnames(train) %in% var_cont]
process=preProcess(dat_cont,method = c("range"))
scaled_dat_cont=predict(process,dat_cont)
# concatenate all the predictors with response variable by columns
train_new=cbind(newdata,train_cont,
scaled_dat_cont,credit_risk=train$credit_risk)
# One-hot encoding
test_cat=test[,colnames(test) %in% var_cat]
newdata=data.frame(predict(dummy,newdata=test_cat))
# Label encoding
test_cont=test[,colnames(test) %in% var_ord]
test_cont=transform(test_cont,installment_rate=as.numeric(installment_rate)-1,
present_residence=as.numeric(present_residence)-1,
number_credits=as.numeric(number_credits)-1)
# Min-max normalization
dat_cont=test[,colnames(test) %in% var_cont]
scaled_dat_cont=predict(process,dat_cont)
# concatenate by columns
test_new=cbind(newdata,test_cont,
scaled_dat_cont,credit_risk=test$credit_risk)
# Hyper-parameters tuning cost function
cost_matrix=matrix(c(0,1,5,0),ncol=2)
err=function(truth,pred){
t=table(truth=truth,pred=pred)
tot_cost=sum(t*cost_matrix)
tot_cost
}
range_exp=seq(-10,10,by=2)
set.seed(200) # for reproducibility
# linear kernel SVM. No scaling is needed as it had been performed beforehand.
# class weight is set to be inversely proportional to the number of samples in
# each class
svm_tune=tune(svm,credit_risk~.,data = train_new,kernel='linear', scale=FALSE,
probability=TRUE, class.weights='inverse',
ranges = list(cost=c(2^range_exp)),
tunecontrol = tune.control(cross=5,error.fun = err))
summary(svm_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 5-fold cross validation
##
## - best parameters:
## cost
## 16
##
## - best performance: 98.8
##
## - Detailed performance results:
## cost error dispersion
## 1 9.765625e-04 172.8 79.672454
## 2 3.906250e-03 172.8 79.672454
## 3 1.562500e-02 172.8 79.672454
## 4 6.250000e-02 172.8 79.672454
## 5 2.500000e-01 172.8 79.672454
## 6 1.000000e+00 108.0 9.273618
## 7 4.000000e+00 102.2 5.890671
## 8 1.600000e+01 98.8 13.663821
## 9 6.400000e+01 102.4 9.044335
## 10 2.560000e+02 105.2 6.610598
## 11 1.024000e+03 106.8 12.029131
min_cost=svm_tune$performances$cost[which.min(svm_tune$performances$error)]
# Visualization
svm_tune$performances %>%
ggplot(aes(x=cost,y=error)) +
geom_line() +
scale_x_continuous(name = "cost, C",trans = "log2") +
ylab("misclassification cost") +
geom_vline(xintercept = min_cost,
color="red",linetype=2)
# Extract the best model in term of misclassification cost
svm_lin=svm_tune$best.model
Coefficients of linear kernel SVM
# Coefficients of linear SVM can give insight on how individual predictor affects
# the outcome
coef_lin=data.frame(names=names(coef(svm_lin))[-1],coef=coef(svm_lin)[-1])
coef_lin_10=coef_lin[order(-abs(coef_lin$coef))[1:10],]
rownames(coef_lin_10)=NULL
library(knitr)
kable(coef_lin_10)
| names | coef |
|---|---|
| status……..200.DM…salary.for.at.least.1.year | -0.7908579 |
| credit_history.all.credits.at.this.bank.paid.back.duly | -0.5707810 |
| status.no.checking.account | 0.5532820 |
| purpose.car..new. | -0.3861087 |
| duration | 0.3832446 |
| savings.unknown.no.savings.account | 0.3404894 |
| property.unknown…no.property | -0.3105557 |
| amount | 0.3055987 |
| other_debtors.guarantor | -0.2966404 |
| other_debtors.co.applicant | 0.2952381 |
# Visualization of coefficients estimates
ggplot(data=coef_lin_10,aes(x=names,y=coef)) +
geom_pointrange(aes(ymin=0,ymax=coef)) +
coord_flip() +theme_classic() + ylab("coefficient estimates")
range_exp_sigma=seq(-5,5,by=2)
set.seed(300)
svm_tune=tune(svm,credit_risk~.,data = train_new,kernel='radial', scale=FALSE,
probability=TRUE, class.weights='inverse',
ranges = list(cost=c(2^range_exp),gamma=c(2^range_exp_sigma)),
tunecontrol = tune.control(cross=5,error.fun = err))
summary(svm_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 5-fold cross validation
##
## - best parameters:
## cost gamma
## 64 0.03125
##
## - best performance: 100.6
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 9.765625e-04 0.03125 213.2 58.789455
## 2 3.906250e-03 0.03125 213.2 58.789455
## 3 1.562500e-02 0.03125 213.2 58.789455
## 4 6.250000e-02 0.03125 213.2 58.789455
## 5 2.500000e-01 0.03125 213.2 58.789455
## 6 1.000000e+00 0.03125 213.2 58.789455
## 7 4.000000e+00 0.03125 213.2 58.789455
## 8 1.600000e+01 0.03125 151.8 33.169263
## 9 6.400000e+01 0.03125 100.6 11.216060
## 10 2.560000e+02 0.03125 104.6 8.324662
## 11 1.024000e+03 0.03125 110.4 15.009997
## 12 9.765625e-04 0.12500 213.2 58.789455
## 13 3.906250e-03 0.12500 213.2 58.789455
## 14 1.562500e-02 0.12500 213.2 58.789455
## 15 6.250000e-02 0.12500 213.2 58.789455
## 16 2.500000e-01 0.12500 213.2 58.789455
## 17 1.000000e+00 0.12500 213.2 58.789455
## 18 4.000000e+00 0.12500 213.2 58.789455
## 19 1.600000e+01 0.12500 191.0 45.667275
## 20 6.400000e+01 0.12500 106.2 15.722595
## 21 2.560000e+02 0.12500 123.2 10.545141
## 22 1.024000e+03 0.12500 138.2 23.530831
## 23 9.765625e-04 0.50000 213.2 58.789455
## 24 3.906250e-03 0.50000 213.2 58.789455
## 25 1.562500e-02 0.50000 213.2 58.789455
## 26 6.250000e-02 0.50000 213.2 58.789455
## 27 2.500000e-01 0.50000 213.2 58.789455
## 28 1.000000e+00 0.50000 213.2 58.789455
## 29 4.000000e+00 0.50000 213.2 58.789455
## 30 1.600000e+01 0.50000 213.2 58.789455
## 31 6.400000e+01 0.50000 213.2 58.789455
## 32 2.560000e+02 0.50000 224.6 17.444197
## 33 1.024000e+03 0.50000 228.8 18.780309
## 34 9.765625e-04 2.00000 213.2 58.789455
## 35 3.906250e-03 2.00000 213.2 58.789455
## 36 1.562500e-02 2.00000 213.2 58.789455
## 37 6.250000e-02 2.00000 213.2 58.789455
## 38 2.500000e-01 2.00000 213.2 58.789455
## 39 1.000000e+00 2.00000 213.2 58.789455
## 40 4.000000e+00 2.00000 213.2 58.789455
## 41 1.600000e+01 2.00000 213.2 58.789455
## 42 6.400000e+01 2.00000 213.2 58.789455
## 43 2.560000e+02 2.00000 238.0 14.832397
## 44 1.024000e+03 2.00000 238.0 14.832397
## 45 9.765625e-04 8.00000 213.2 58.789455
## 46 3.906250e-03 8.00000 213.2 58.789455
## 47 1.562500e-02 8.00000 213.2 58.789455
## 48 6.250000e-02 8.00000 213.2 58.789455
## 49 2.500000e-01 8.00000 213.2 58.789455
## 50 1.000000e+00 8.00000 213.2 58.789455
## 51 4.000000e+00 8.00000 213.2 58.789455
## 52 1.600000e+01 8.00000 213.2 58.789455
## 53 6.400000e+01 8.00000 213.2 58.789455
## 54 2.560000e+02 8.00000 238.0 14.832397
## 55 1.024000e+03 8.00000 238.0 14.832397
## 56 9.765625e-04 32.00000 213.2 58.789455
## 57 3.906250e-03 32.00000 213.2 58.789455
## 58 1.562500e-02 32.00000 213.2 58.789455
## 59 6.250000e-02 32.00000 213.2 58.789455
## 60 2.500000e-01 32.00000 213.2 58.789455
## 61 1.000000e+00 32.00000 213.2 58.789455
## 62 4.000000e+00 32.00000 213.2 58.789455
## 63 1.600000e+01 32.00000 213.2 58.789455
## 64 6.400000e+01 32.00000 213.2 58.789455
## 65 2.560000e+02 32.00000 238.0 14.832397
## 66 1.024000e+03 32.00000 238.0 14.832397
# Best parameters from grid search
idx_min=which.min(svm_tune$performances[,'error'])
# countour plot
ggplot(svm_tune$performances,aes(x=cost,y=gamma)) +
geom_raster(aes(fill=error)) +
geom_contour(aes(z=error),color='white') +
scale_x_continuous(name='cost',trans = "log2") +
scale_y_continuous(name='gamma',trans = "log2") +
geom_point(aes(x=cost[idx_min],y=gamma[idx_min]),shape=19,color='red',size=2) +
geom_text(data=svm_tune$performances[idx_min,],
aes(x=cost,y=1.05*gamma, color='yellow',
label=sprintf("cost of misclassification: %.2f",error)),
show.legend = FALSE)
# fine grid search
range_exp=seq(4,10,by=1)
range_exp_gamma=seq(-6,-2,by=1)
set.seed(400)
svm_tune=tune(svm,credit_risk~.,data = train_new,kernel='radial', scale=FALSE,
probability=TRUE, class.weights='inverse',
ranges = list(cost=c(2^range_exp),gamma=c(2^range_exp_gamma)),
tunecontrol = tune.control(cross=5,error.fun = err))
summary(svm_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 5-fold cross validation
##
## - best parameters:
## cost gamma
## 128 0.03125
##
## - best performance: 97.8
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 16 0.015625 190.0 74.551995
## 2 32 0.015625 105.4 6.985700
## 3 64 0.015625 104.2 12.930584
## 4 128 0.015625 99.6 10.014989
## 5 256 0.015625 98.2 6.906519
## 6 512 0.015625 102.6 10.502381
## 7 1024 0.015625 108.4 13.011533
## 8 16 0.031250 141.8 33.700148
## 9 32 0.031250 108.8 16.946976
## 10 64 0.031250 104.2 13.971399
## 11 128 0.031250 97.8 10.305338
## 12 256 0.031250 104.4 10.597169
## 13 512 0.031250 112.4 13.867228
## 14 1024 0.031250 116.8 12.853015
## 15 16 0.062500 132.6 31.627520
## 16 32 0.062500 110.8 16.006249
## 17 64 0.062500 103.0 13.583078
## 18 128 0.062500 104.4 10.549882
## 19 256 0.062500 114.4 21.454603
## 20 512 0.062500 118.8 19.715476
## 21 1024 0.062500 122.6 29.271146
## 22 16 0.125000 176.2 69.142606
## 23 32 0.125000 109.4 11.414903
## 24 64 0.125000 108.4 13.867228
## 25 128 0.125000 109.0 14.949916
## 26 256 0.125000 115.0 24.545875
## 27 512 0.125000 127.6 34.997143
## 28 1024 0.125000 148.8 40.517897
## 29 16 0.250000 190.0 74.551995
## 30 32 0.250000 186.0 73.426834
## 31 64 0.250000 124.2 21.707142
## 32 128 0.250000 133.0 33.726844
## 33 256 0.250000 150.8 36.792662
## 34 512 0.250000 170.6 42.799533
## 35 1024 0.250000 176.6 44.410584
svm_rbf=svm_tune$best.model
idx_min=which.min(svm_tune$performances[,'error'])
# contour plot
ggplot(svm_tune$performances,aes(x=cost,y=gamma)) +
geom_raster(aes(fill=error)) +
geom_contour(aes(z=error),color='white') +
scale_x_continuous(name='cost',trans = "log2") +
scale_y_continuous(name='gamma',trans = "log2") +
geom_point(aes(x=cost[idx_min],y=gamma[idx_min]),shape=19,color='red',size=2) +
geom_text(data=svm_tune$performances[idx_min,],
aes(x=cost,y=1.05*gamma, color='yellow',
label=sprintf("cost of misclassification: %.2f",error)),
show.legend = FALSE)
Performance metrics calculation function
performance=function(truth,pred){
tab=table(truth=truth,prediction=pred)
acc=sum(diag(tab))/sum(tab)
cost=sum(cost_matrix*tab)
recall=tab[1,1]/sum(tab[1,])
precision=tab[1,1]/sum(tab[,1])
F1=(2*precision*recall)/(precision+recall)
truth_logical=as.numeric(truth)-1
AUC=calcAUC(attr(pred,"decision.values"),truth_logical,yTarget = FALSE)
round(c(accuracy=acc,misclass_cost=cost,recall=recall,precision=precision,
f1_measure=F1,AUC=AUC),4)
}
pred_svm_lin=predict(svm_lin,newdata=test_new,decision.values = TRUE)
pred_svm_rbf=predict(svm_rbf,newdata = test_new,decision.values = TRUE)
dat_plot=data.frame(outcome=test_new$credit_risk,dv_svm_linear=attr(pred_svm_lin,"decision.values")[1:nrow(test_new)],
dv_svm_rbf=attr(pred_svm_rbf,"decision.values")[1:nrow(test_new)])
library(WVPlots)
DoubleDensityPlot(dat_plot,xvar="dv_svm_linear",truthVar = "outcome",
title="Distribution of linear svm scores (test data)") +
geom_vline(xintercept = 0, color="red", linetype=2)
DoubleDensityPlot(dat_plot,xvar="dv_svm_rbf",truthVar = "outcome",
title="Distribution of RBF svm scores (test data)") +
geom_vline(xintercept = 0, color="red", linetype=2)
ROCPlotPair(dat_plot,xvar1="dv_svm_linear",xvar2 = "dv_svm_rbf",truthVar = "outcome",
truthTarget = "bad", title="ROC plots for svm models (test data)")
perf_svm_lin=performance(test_new$credit_risk,pred_svm_lin)
perf_svm_rbf=performance(test_new$credit_risk,pred_svm_rbf)
tab_perf=rbind(perf_svm_lin,perf_svm_rbf)
rownames(tab_perf)=c("linear svm","rbf svm")
kable(tab_perf)
| accuracy | misclass_cost | recall | precision | f1_measure | AUC | |
|---|---|---|---|---|---|---|
| linear svm | 0.710 | 118 | 0.7500 | 0.5114 | 0.6081 | 0.7626 |
| rbf svm | 0.685 | 119 | 0.7667 | 0.4842 | 0.5935 | 0.7613 |
Grömping, U. (2019). South German Credit Data: Correcting a Widely Used Data Set. Report 4/2019, Reports in Mathematics, Physics and Chemistry, Department II, Beuth University of Applied Sciences Berlin.